In [28]:
import csv
from openpyxl import Workbook
import re
import requests
from bs4 import BeautifulSoup
import datetime
import pandas as pd


class DaumNewsScrap():
    def __init__(self, query, start=20200109000000, end=20200205000000, page=1):
        self.query = query
        self.page = page
        self.start = start
        self.end = end
        self.url1 = "https://search.daum.net/search?w=news&enc=utf8&cluster=y&cluster_page=1&q={query}"
        self.url2 = "&p={page}&period=u&sd={start}&ed={end}"
        self.url_total = self.url1 + self.url2
        
        # 전체 스크랩 데이터 숫자 판단
        DaumNewsScrap.number_of_news(self)

    def page_url(self, page=1):
        """
        각 페이지 url 설정하는 부분, Daum에서 키워드 검색 후 뉴스 탭에서 페이지를 한 페이지씩 넘어가기 위해
        """
        self.url = self.url_total.format(query=self.query, page=page,
                                         start=self.start, end=self.end)
        return self.url

    def number_of_news(self):
        """
        검색어에 대한 뉴스 총 개수 확인,
        뉴스 페이지 당 10개씩 뉴스 기사가 업로드 되므로
        total_num//10 만큼 페이지를 돌아가면됨
        :return: total_num
        """

        url = DaumNewsScrap.page_url(self)
        page = requests.get(url).text
        soup = BeautifulSoup(page, 'html.parser')

        pat = re.compile("<a")
        count_pat = re.compile("약* ([0-9\,]*)건")
        number = soup.find_all('span', id='resultCntArea')[0].text
        total_num = count_pat.findall(number)[0]
        self.total_num = int(total_num.replace(",", ""))
        return self.total_num

    def news_scrap(self):
        """
        뉴스 스크랩 한 후 엑셀파일에 작성하는 부분
        csv나 encoding 잘 모르겠어서 그냥 xlsx 로 저장했음
        :return: 최종 결과물 파일 저장
        """
        # 총 반복 횟수 설정
        number_of_repeat = self.total_num // 10
        
        cnt = 0
        df_format = pd.DataFrame({'회사':[], '날짜' : [], '언론사' : [], '내용' : []})

        # 본격 적인 뉴스 스크랩
        news_total_scrap = ""
        for i in range(1, number_of_repeat):
            DaumNewsScrap.page_url(self, i)
            link = self.url
            soup = BeautifulSoup((requests.get(link).text), 'html.parser')
            url_soups = soup.select('span.f_nb.date > a.f_nb')
            # url_soups에 10개의 뉴스 기사의 링크가 들어감

            print(i, '/', number_of_repeat - 1)  # 진행 정도
            for url in url_soups:
                # 뉴스기사의 url, 각 뉴스 기사 링크로 들어가서 기사 파싱해오기
                url = url.get('href')
                soup = BeautifulSoup(requests.get(url).text, 'html.parser')

                # 날짜 구하기
                try:
                    date_find = soup.select('div > span > span:nth-child(2)')[0].text
                except:
                    date_find = soup.select('div > span > span')[0].text

                try:
                    re_date = re.compile("(\d\d\d\d\.\d\d\.\d\d)")
                    date = re_date.findall(date_find)[0]
                    date = datetime.datetime.strptime(date, '%Y.%m.%d')
                except:
                    date = 0

                # 언론사명 구하기
                media_name = soup.select('div > em > a > img')[0].get('alt')

                # 기사내용 긁어오기
                contents = soup.find_all('p')
                temp = []
                for sentence in contents:
                    temp.append(sentence.text)  # 한 기사 통쨰로 만들기

                this_news =  "".join(temp)                
                this_news_filter = re.sub('[^가-힣0-9a-zA-Z\@\%\.]', ' ',this_news )
                df = pd.DataFrame({'회사' : [self.query], '날짜' : [date], '언론사' : [media_name], '내용' : [this_news_filter]})
                df_format = df_format.append(df)
                news_total_scrap += this_news_filter
            cnt +=1
            if cnt % 500 == 0:
                df_format.reset_index(inplace=True)
                del df_format['index']
                df_format.to_csv("NewsScrapData2.csv", mode='a',encoding='euc-kr', header=False) 
                df_format = pd.DataFrame({'회사':[], '날짜' : [], '언론사' : [], '내용' : []})
                print("임시저장 완료")
        df_format.reset_index(inplace=True)
        del df_format['index']
        df_format.to_csv("NewsScrapData2.csv", mode='a',encoding='euc-kr', header=False)
        print(i,self.query, "저장 완료")
        return news_total_scrap

In [29]:
# file.csv 200개 종목명 저장된 파일
path = r"C:\Users\Administrator\Desktop\Untitled\file.csv"

df_top200 = pd.read_csv(path, encoding='euc-kr')

# 데이터 기본 저장 틀 제작
df_format = pd.DataFrame({'회사' : [],'날짜' : [], '언론사' : [], '내용' : []})
df_format.to_csv("NewsScrapData2.csv", mode='w',encoding='euc-kr')

for company in df_top200['company'][2:]:
    print(company, 'scrap is start')
    daum = DaumNewsScrap(company)
    py_scrap = daum.news_scrap()
print("Done")

삼성바이오로직스 scrap is start
1 / 313
2 / 313
3 / 313
4 / 313
5 / 313
6 / 313
7 / 313
8 / 313
9 / 313
10 / 313
11 / 313
12 / 313
13 / 313
14 / 313
15 / 313
16 / 313
17 / 313
18 / 313
19 / 313
20 / 313
21 / 313
22 / 313
23 / 313
24 / 313
25 / 313
26 / 313
27 / 313
28 / 313
29 / 313
30 / 313
31 / 313
32 / 313
33 / 313
34 / 313
35 / 313
36 / 313
37 / 313
38 / 313
39 / 313
40 / 313
41 / 313
42 / 313
43 / 313
44 / 313
45 / 313
46 / 313
47 / 313
48 / 313
49 / 313
50 / 313
51 / 313
52 / 313
53 / 313
54 / 313
55 / 313
56 / 313
57 / 313
58 / 313
59 / 313
60 / 313
61 / 313
62 / 313
63 / 313
64 / 313
65 / 313
66 / 313
67 / 313
68 / 313
69 / 313
70 / 313
71 / 313
72 / 313
73 / 313
74 / 313
75 / 313
76 / 313
77 / 313
78 / 313
79 / 313
80 / 313
81 / 313
82 / 313
83 / 313
84 / 313
85 / 313
86 / 313
87 / 313
88 / 313
89 / 313
90 / 313
91 / 313
92 / 313
93 / 313
94 / 313
95 / 313
96 / 313
97 / 313
98 / 313
99 / 313
100 / 313
101 / 313
102 / 313
103 / 313
104 / 313
105 / 313
106 / 313
107 / 313
108 / 313
109 

477 / 3099
478 / 3099
479 / 3099
480 / 3099
481 / 3099
482 / 3099
483 / 3099
484 / 3099
485 / 3099
486 / 3099
487 / 3099
488 / 3099
489 / 3099
490 / 3099
491 / 3099
492 / 3099
493 / 3099
494 / 3099
495 / 3099
496 / 3099
497 / 3099
498 / 3099
499 / 3099
500 / 3099
임시저장 완료
501 / 3099
502 / 3099
503 / 3099
504 / 3099
505 / 3099
506 / 3099
507 / 3099
508 / 3099
509 / 3099
510 / 3099
511 / 3099
512 / 3099
513 / 3099
514 / 3099
515 / 3099
516 / 3099
517 / 3099
518 / 3099
519 / 3099
520 / 3099
521 / 3099
522 / 3099
523 / 3099
524 / 3099
525 / 3099
526 / 3099
527 / 3099
528 / 3099
529 / 3099
530 / 3099
531 / 3099
532 / 3099
533 / 3099
534 / 3099
535 / 3099
536 / 3099
537 / 3099
538 / 3099
539 / 3099
540 / 3099
541 / 3099
542 / 3099
543 / 3099
544 / 3099
545 / 3099
546 / 3099
547 / 3099
548 / 3099
549 / 3099
550 / 3099
551 / 3099
552 / 3099
553 / 3099
554 / 3099
555 / 3099
556 / 3099
557 / 3099
558 / 3099
559 / 3099
560 / 3099
561 / 3099
562 / 3099
563 / 3099
564 / 3099
565 / 3099
566 / 3099
56

1203 / 3099
1204 / 3099
1205 / 3099
1206 / 3099
1207 / 3099
1208 / 3099
1209 / 3099
1210 / 3099
1211 / 3099
1212 / 3099
1213 / 3099
1214 / 3099
1215 / 3099
1216 / 3099
1217 / 3099
1218 / 3099
1219 / 3099
1220 / 3099
1221 / 3099
1222 / 3099
1223 / 3099
1224 / 3099
1225 / 3099
1226 / 3099
1227 / 3099
1228 / 3099
1229 / 3099
1230 / 3099
1231 / 3099
1232 / 3099
1233 / 3099
1234 / 3099
1235 / 3099
1236 / 3099
1237 / 3099
1238 / 3099
1239 / 3099
1240 / 3099
1241 / 3099
1242 / 3099
1243 / 3099
1244 / 3099
1245 / 3099
1246 / 3099
1247 / 3099
1248 / 3099
1249 / 3099
1250 / 3099
1251 / 3099
1252 / 3099
1253 / 3099
1254 / 3099
1255 / 3099
1256 / 3099
1257 / 3099
1258 / 3099
1259 / 3099
1260 / 3099
1261 / 3099
1262 / 3099
1263 / 3099
1264 / 3099
1265 / 3099
1266 / 3099
1267 / 3099
1268 / 3099
1269 / 3099
1270 / 3099
1271 / 3099
1272 / 3099
1273 / 3099
1274 / 3099
1275 / 3099
1276 / 3099
1277 / 3099
1278 / 3099
1279 / 3099
1280 / 3099
1281 / 3099
1282 / 3099
1283 / 3099
1284 / 3099
1285 / 3099
1286

1886 / 3099
1887 / 3099
1888 / 3099
1889 / 3099
1890 / 3099
1891 / 3099
1892 / 3099
1893 / 3099
1894 / 3099
1895 / 3099
1896 / 3099
1897 / 3099
1898 / 3099
1899 / 3099
1900 / 3099
1901 / 3099
1902 / 3099
1903 / 3099
1904 / 3099
1905 / 3099
1906 / 3099
1907 / 3099
1908 / 3099
1909 / 3099
1910 / 3099
1911 / 3099
1912 / 3099
1913 / 3099
1914 / 3099
1915 / 3099
1916 / 3099
1917 / 3099
1918 / 3099
1919 / 3099
1920 / 3099
1921 / 3099
1922 / 3099
1923 / 3099
1924 / 3099
1925 / 3099
1926 / 3099
1927 / 3099
1928 / 3099
1929 / 3099
1930 / 3099
1931 / 3099
1932 / 3099
1933 / 3099
1934 / 3099
1935 / 3099
1936 / 3099
1937 / 3099
1938 / 3099
1939 / 3099
1940 / 3099
1941 / 3099
1942 / 3099
1943 / 3099
1944 / 3099
1945 / 3099
1946 / 3099
1947 / 3099
1948 / 3099
1949 / 3099
1950 / 3099
1951 / 3099
1952 / 3099
1953 / 3099
1954 / 3099
1955 / 3099
1956 / 3099
1957 / 3099
1958 / 3099
1959 / 3099
1960 / 3099
1961 / 3099
1962 / 3099
1963 / 3099
1964 / 3099
1965 / 3099
1966 / 3099
1967 / 3099
1968 / 3099
1969

2569 / 3099
2570 / 3099
2571 / 3099
2572 / 3099
2573 / 3099
2574 / 3099
2575 / 3099
2576 / 3099
2577 / 3099
2578 / 3099
2579 / 3099
2580 / 3099
2581 / 3099
2582 / 3099
2583 / 3099
2584 / 3099
2585 / 3099
2586 / 3099
2587 / 3099
2588 / 3099
2589 / 3099
2590 / 3099
2591 / 3099
2592 / 3099
2593 / 3099
2594 / 3099
2595 / 3099
2596 / 3099
2597 / 3099
2598 / 3099
2599 / 3099
2600 / 3099
2601 / 3099
2602 / 3099
2603 / 3099
2604 / 3099
2605 / 3099
2606 / 3099
2607 / 3099
2608 / 3099
2609 / 3099
2610 / 3099
2611 / 3099
2612 / 3099
2613 / 3099
2614 / 3099
2615 / 3099
2616 / 3099
2617 / 3099
2618 / 3099
2619 / 3099
2620 / 3099
2621 / 3099
2622 / 3099
2623 / 3099
2624 / 3099
2625 / 3099
2626 / 3099
2627 / 3099
2628 / 3099
2629 / 3099
2630 / 3099
2631 / 3099
2632 / 3099
2633 / 3099
2634 / 3099
2635 / 3099
2636 / 3099
2637 / 3099
2638 / 3099
2639 / 3099
2640 / 3099
2641 / 3099
2642 / 3099
2643 / 3099
2644 / 3099
2645 / 3099
2646 / 3099
2647 / 3099
2648 / 3099
2649 / 3099
2650 / 3099
2651 / 3099
2652

173 / 1349
174 / 1349
175 / 1349
176 / 1349
177 / 1349
178 / 1349
179 / 1349
180 / 1349
181 / 1349
182 / 1349
183 / 1349
184 / 1349
185 / 1349
186 / 1349
187 / 1349
188 / 1349
189 / 1349
190 / 1349
191 / 1349
192 / 1349
193 / 1349
194 / 1349
195 / 1349
196 / 1349
197 / 1349
198 / 1349
199 / 1349
200 / 1349
201 / 1349
202 / 1349
203 / 1349
204 / 1349
205 / 1349
206 / 1349
207 / 1349
208 / 1349
209 / 1349
210 / 1349
211 / 1349
212 / 1349
213 / 1349
214 / 1349
215 / 1349
216 / 1349
217 / 1349
218 / 1349
219 / 1349
220 / 1349
221 / 1349
222 / 1349
223 / 1349
224 / 1349
225 / 1349
226 / 1349
227 / 1349
228 / 1349
229 / 1349
230 / 1349
231 / 1349
232 / 1349
233 / 1349
234 / 1349
235 / 1349
236 / 1349
237 / 1349
238 / 1349
239 / 1349
240 / 1349
241 / 1349
242 / 1349
243 / 1349
244 / 1349
245 / 1349
246 / 1349
247 / 1349
248 / 1349
249 / 1349
250 / 1349
251 / 1349
252 / 1349
253 / 1349
254 / 1349
255 / 1349
256 / 1349
257 / 1349
258 / 1349
259 / 1349
260 / 1349
261 / 1349
262 / 1349
263 / 1349

919 / 1349
920 / 1349
921 / 1349
922 / 1349
923 / 1349
924 / 1349
925 / 1349
926 / 1349
927 / 1349
928 / 1349
929 / 1349
930 / 1349
931 / 1349
932 / 1349
933 / 1349
934 / 1349
935 / 1349
936 / 1349
937 / 1349
938 / 1349
939 / 1349
940 / 1349
941 / 1349
942 / 1349
943 / 1349
944 / 1349
945 / 1349
946 / 1349
947 / 1349
948 / 1349
949 / 1349
950 / 1349
951 / 1349
952 / 1349
953 / 1349
954 / 1349
955 / 1349
956 / 1349
957 / 1349
958 / 1349
959 / 1349
960 / 1349
961 / 1349
962 / 1349
963 / 1349
964 / 1349
965 / 1349
966 / 1349
967 / 1349
968 / 1349
969 / 1349
970 / 1349
971 / 1349
972 / 1349
973 / 1349
974 / 1349
975 / 1349
976 / 1349
977 / 1349
978 / 1349
979 / 1349
980 / 1349
981 / 1349
982 / 1349
983 / 1349
984 / 1349
985 / 1349
986 / 1349
987 / 1349
988 / 1349
989 / 1349
990 / 1349
991 / 1349
992 / 1349
993 / 1349
994 / 1349
995 / 1349
996 / 1349
997 / 1349
998 / 1349
999 / 1349
1000 / 1349
임시저장 완료
1001 / 1349
1002 / 1349
1003 / 1349
1004 / 1349
1005 / 1349
1006 / 1349
1007 / 1349
1008 

320 / 381
321 / 381
322 / 381
323 / 381
324 / 381
325 / 381
326 / 381
327 / 381
328 / 381
329 / 381
330 / 381
331 / 381
332 / 381
333 / 381
334 / 381
335 / 381
336 / 381
337 / 381
338 / 381
339 / 381
340 / 381
341 / 381
342 / 381
343 / 381
344 / 381
345 / 381
346 / 381
347 / 381
348 / 381
349 / 381
350 / 381
351 / 381
352 / 381
353 / 381
354 / 381
355 / 381
356 / 381
357 / 381
358 / 381
359 / 381
360 / 381
361 / 381
362 / 381
363 / 381
364 / 381
365 / 381
366 / 381
367 / 381
368 / 381
369 / 381
370 / 381
371 / 381
372 / 381
373 / 381
374 / 381
375 / 381
376 / 381
377 / 381
378 / 381
379 / 381
380 / 381
381 / 381
LG화학 저장 완료
현대모비스 scrap is start
1 / 724
2 / 724
3 / 724
4 / 724
5 / 724
6 / 724
7 / 724
8 / 724
9 / 724
10 / 724
11 / 724
12 / 724
13 / 724
14 / 724
15 / 724
16 / 724
17 / 724
18 / 724
19 / 724
20 / 724
21 / 724
22 / 724
23 / 724
24 / 724
25 / 724
26 / 724
27 / 724
28 / 724
29 / 724
30 / 724
31 / 724
32 / 724
33 / 724
34 / 724
35 / 724
36 / 724
37 / 724
38 / 724
39 / 724
40 / 7

43 / 315
44 / 315
45 / 315
46 / 315
47 / 315
48 / 315
49 / 315
50 / 315
51 / 315
52 / 315
53 / 315
54 / 315
55 / 315
56 / 315
57 / 315
58 / 315
59 / 315
60 / 315
61 / 315
62 / 315
63 / 315
64 / 315
65 / 315
66 / 315
67 / 315
68 / 315
69 / 315
70 / 315
71 / 315
72 / 315
73 / 315
74 / 315
75 / 315
76 / 315
77 / 315
78 / 315
79 / 315
80 / 315
81 / 315
82 / 315
83 / 315
84 / 315
85 / 315
86 / 315
87 / 315
88 / 315
89 / 315
90 / 315
91 / 315
92 / 315
93 / 315
94 / 315
95 / 315
96 / 315
97 / 315
98 / 315
99 / 315
100 / 315
101 / 315
102 / 315
103 / 315
104 / 315
105 / 315
106 / 315
107 / 315
108 / 315
109 / 315
110 / 315
111 / 315
112 / 315
113 / 315
114 / 315
115 / 315
116 / 315
117 / 315
118 / 315
119 / 315
120 / 315
121 / 315
122 / 315
123 / 315
124 / 315
125 / 315
126 / 315
127 / 315
128 / 315
129 / 315
130 / 315
131 / 315
132 / 315
133 / 315
134 / 315
135 / 315
136 / 315
137 / 315
138 / 315
139 / 315
140 / 315
141 / 315
142 / 315
143 / 315
144 / 315
145 / 315
146 / 315
147 / 315
148 / 3

160 / 248
161 / 248
162 / 248
163 / 248
164 / 248
165 / 248
166 / 248
167 / 248
168 / 248
169 / 248
170 / 248
171 / 248
172 / 248
173 / 248
174 / 248
175 / 248
176 / 248
177 / 248
178 / 248
179 / 248
180 / 248
181 / 248
182 / 248
183 / 248
184 / 248
185 / 248
186 / 248
187 / 248
188 / 248
189 / 248
190 / 248
191 / 248
192 / 248
193 / 248
194 / 248
195 / 248
196 / 248
197 / 248
198 / 248
199 / 248
200 / 248
201 / 248
202 / 248
203 / 248
204 / 248
205 / 248
206 / 248
207 / 248
208 / 248
209 / 248
210 / 248
211 / 248
212 / 248
213 / 248
214 / 248
215 / 248
216 / 248
217 / 248
218 / 248
219 / 248
220 / 248
221 / 248
222 / 248
223 / 248
224 / 248
225 / 248
226 / 248
227 / 248
228 / 248
229 / 248
230 / 248
231 / 248
232 / 248
233 / 248
234 / 248
235 / 248
236 / 248
237 / 248
238 / 248
239 / 248
240 / 248
241 / 248
242 / 248
243 / 248
244 / 248
245 / 248
246 / 248
247 / 248
248 / 248
LG생활건강 저장 완료
POSCO scrap is start
1 / 667
2 / 667
3 / 667
4 / 667
5 / 667
6 / 667
7 / 667
8 / 667
9 / 667
10 /

76 / 256
77 / 256
78 / 256
79 / 256
80 / 256
81 / 256
82 / 256
83 / 256
84 / 256
85 / 256
86 / 256
87 / 256
88 / 256
89 / 256
90 / 256
91 / 256
92 / 256
93 / 256
94 / 256
95 / 256
96 / 256
97 / 256
98 / 256
99 / 256
100 / 256
101 / 256
102 / 256
103 / 256
104 / 256
105 / 256
106 / 256
107 / 256
108 / 256
109 / 256
110 / 256
111 / 256
112 / 256
113 / 256
114 / 256
115 / 256
116 / 256
117 / 256
118 / 256
119 / 256
120 / 256
121 / 256
122 / 256
123 / 256
124 / 256
125 / 256
126 / 256
127 / 256
128 / 256
129 / 256
130 / 256
131 / 256
132 / 256
133 / 256
134 / 256
135 / 256
136 / 256
137 / 256
138 / 256
139 / 256
140 / 256
141 / 256
142 / 256
143 / 256
144 / 256
145 / 256
146 / 256
147 / 256
148 / 256
149 / 256
150 / 256
151 / 256
152 / 256
153 / 256
154 / 256
155 / 256
156 / 256
157 / 256
158 / 256
159 / 256
160 / 256
161 / 256
162 / 256
163 / 256
164 / 256
165 / 256
166 / 256
167 / 256
168 / 256
169 / 256
170 / 256
171 / 256
172 / 256
173 / 256
174 / 256
175 / 256
176 / 256
177 / 256
178 

IndexError: list index out of range

In [30]:
daum.url

'https://search.daum.net/search?w=news&enc=utf8&cluster=y&cluster_page=1&q=SK텔레콤&p=3&period=u&sd=20200109000000&ed=20200205000000'

In [54]:
link = daum.url
soup = BeautifulSoup((requests.get(link).text), 'html.parser')
url_soups = soup.select('span.f_nb.date > a.f_nb')
        
for url in url_soups:
    # 뉴스기사의 url, 각 뉴스 기사 링크로 들어가서 기사 파싱해오기
    print(url)
    url = url.get('href')
    soup = BeautifulSoup(requests.get(url).text, 'html.parser')

    # 날짜 구하기
    try:
        date_find = soup.select('div > span > span:nth-child(2)')[0].text
    except:
        date_find = soup.select('div > span > span')[0].text

    try:
        re_date = re.compile("(\d\d\d\d\.\d\d\.\d\d)")
        date = re_date.findall(date_find)[0]
        date = datetime.datetime.strptime(date, '%Y.%m.%d')
    except:
        date = 0

    # 언론사명 구하기
    media_name = soup.select('div > em > a > img')[0].get('alt')

    # 기사내용 긁어오기
    contents = soup.find_all('p')
    temp = []
    for sentence in contents:
        temp.append(sentence.text)  # 한 기사 통쨰로 만들기

    this_news =  "".join(temp)                
    this_news_filter = re.sub('[^가-힣0-9a-zA-Z\@\%\.]', ' ',this_news )
    df = pd.DataFrame({'회사' : ['test'], '날짜' : [date], '언론사' : [media_name], '내용' : [this_news_filter]})
    df_format = df_format.append(df)

<a "="" class="f_nb" href="http://v.media.daum.net/v/20200121091731074" onclick='smartLog(this, "dc=NNS&amp;d=26y-ed0pe5IdZvHt3I&amp;pg=3&amp;r=1&amp;p=3&amp;rc=10&amp;e1=16e7LwQHlmD09KU6jS&amp;e3=0&amp;ext=dsid=26y-ed0pe5IdZvHt3I", event);' target="_blank">다음뉴스</a>
<a "="" class="f_nb" href="http://v.media.daum.net/v/20200120112506896" onclick='smartLog(this, "dc=NNS&amp;d=26GX17Phdw2QfuuohY&amp;pg=3&amp;r=3&amp;p=3&amp;rc=10&amp;e1=16yGc-mR1Rz5JT4-UZ&amp;e3=0&amp;ext=dsid=26GX17Phdw2QfuuohY", event);' target="_blank">다음뉴스</a>
<a class="f_nb" href="http://v.media.daum.net/v/20200120092524380" onclick='smartLog(this, "dc=NNS&amp;d=26dUDmfbSGI76rewGa&amp;pg=3&amp;r=3&amp;p=10&amp;rc=10&amp;e1=16AffY_mb9wFLBN77-&amp;e3=0&amp;ext=dsid=26dUDmfbSGI76rewGa&amp;rtt=parent&amp;rtid=26GX17Phdw2QfuuohY&amp;rtsc=", event);' target="_blank">다음뉴스</a>
<a class="f_nb" href="http://v.media.daum.net/v/20200120091501880" onclick='smartLog(this, "dc=NNS&amp;d=266SCDdVPNIkOzaWgo&amp;pg=3&amp;r=3&amp;p=10&

IndexError: list index out of range

In [56]:
#cSub > div > em > a > img
soup.select('div > em > a')

[<a class="link_cp #util #cp_logo" href="http://www.fomos.co.kr" target="_blank"> 포모스 </a>]

In [53]:
soup.select('div > em > a > img')

[<img alt="뉴스1" class="thumb_g" src="https://t1.daumcdn.net/media/news/news2016/cp/cp_news1.gif"/>]