# 라이브러리 세팅

In [87]:
# 라이브러리 불러오기
import pandas as pd
import torch
from tqdm import tqdm
import chardet
from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration, pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
from label import id2label # Label 정보 매핑용 사전, label.py의 내용을 불러 옴
import re

# 모델 로딩

In [88]:
# 요약 모델 (digit82/kobart-summarization)
summarizer_model = 'digit82/kobart-summarization'
tokenizer = PreTrainedTokenizerFast.from_pretrained(summarizer_model)
model = BartForConditionalGeneration.from_pretrained(summarizer_model)

summarizer = pipeline(
    'summarization',
    model=model,
    tokenizer=tokenizer,
    framework='pt',
    device=0 if torch.cuda.is_available() else -1
)

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels will be overwritten to 2.
Device set to use cpu


In [89]:
# NER 모델 (KPF BERT)
ner_model_name = 'KPF/KPF-BERT-NER'
ner_pipeline = pipeline(
    task='ner',
    model=ner_model_name,
    tokenizer=ner_model_name,
    aggregation_strategy='simple',
    framework='pt'
)

Device set to use mps:0


In [90]:
# 종목명 사전
company_df = pd.read_csv('/Users/han-yeeun/final/eda/yeeun/KRX_전체_기업명_20250530.csv', encoding='cp949')
company_df.head()

Unnamed: 0,표준코드,단축코드,한글 종목명,한글 종목약명,영문 종목명,상장일,시장구분,증권구분,소속부,주식종류,액면가,상장주식수
0,KR7098120009,98120,(주)마이크로컨텍솔루션,마이크로컨텍솔,"Micro Contact Solution Co.,Ltd.",2008/09/23,KOSDAQ,주권,중견기업부,보통주,500,8312766
1,KR7009520008,9520,(주)포스코엠텍,포스코엠텍,"POSCO M-TECH CO.,LTD.",1997/11/10,KOSDAQ GLOBAL,주권,우량기업부,보통주,500,41642703
2,KR7095570008,95570,AJ네트웍스보통주,AJ네트웍스,"AJ Networks Co.,Ltd.",2015/08/21,KOSPI,주권,,보통주,1000,45252759
3,KR7006840003,6840,AK홀딩스보통주,AK홀딩스,"AK Holdings, Inc.",1999/08/11,KOSPI,주권,,보통주,5000,13247561
4,KR7282330000,282330,BGF리테일보통주,BGF리테일,BGF Retail,2017/12/08,KOSPI,주권,,보통주,1000,17283906


# 데이터 로딩 및 전처리

In [108]:
# 뉴스 데이터 (1000개만 사용)
news_df = pd.read_csv('/Users/han-yeeun/final/db/news_2023_2025_with_NER.csv')
news_df = news_df.head(1000)

In [109]:
news_df.head(3)

Unnamed: 0,news_id,wdate,title,article,press,url,image,text_combined,labels_by_id
0,20250523_0001,2025-05-23 19:11,[마켓인]모태펀드 존속 불확실성 해소될까…이재명 공약에 업계 주목,"2035년 종료 앞둬, 존속 공약에 기대감\n창업 초기자금 공백 완화 가능성에 업계...",이데일리,https://n.news.naver.com/mnews/article/018/000...,https://imgnews.pstatic.net/image/018/2025/05/...,[마켓인]모태펀드 존속 불확실성 해소될까…이재명 공약에 업계 주목 2035년 종료 ...,[]
1,20250523_0002,2025-05-23 18:52,"[단독] 카카오페이, 2500만 회원 쓱·스마일페이 품나…간편결제 시장 빅3 경쟁 후끈",매각가 5000억 안팎 달할듯\n결제시장 내 입지강화 포석\n카카오페이 [사진 = ...,매일경제,https://n.news.naver.com/mnews/article/009/000...,https://imgnews.pstatic.net/image/009/2025/05/...,"[단독] 카카오페이, 2500만 회원 쓱·스마일페이 품나…간편결제 시장 빅3 경쟁 ...",['카카오페이']
2,20250523_0003,2025-05-23 18:38,"키스트론, 일반 청약 흥행…증거금 6조원 모았다","5월22~23일 청약 진행, 경쟁률 총2166대1\n상장 후 예상 시가총개 643억...",머니투데이,https://n.news.naver.com/mnews/article/008/000...,https://imgnews.pstatic.net/image/008/2025/05/...,"키스트론, 일반 청약 흥행…증거금 6조원 모았다 5월22~23일 청약 진행, 경쟁률...",[]


In [110]:
# 제목 + 본문 결합
news_df['text_combined'] = (
    news_df['title'].fillna('') + ' ' + news_df['article'].fillna('')
).str.replace(r'\s+', ' ', regex=True).str.strip()

# 함수 정의

In [115]:
# 종목명 정규화 사전 생성
company_dict = {}

for _, row in company_df.iterrows():
    full_name = str(row['한글 종목명']).strip()
    short_name = str(row['한글 종목약명']).strip()

    if full_name:
        company_dict[full_name] = full_name
    if short_name and short_name != full_name:
        company_dict[short_name] = full_name

In [117]:
def extract_stock_labels(text, company_dict, is_long=False):
    if not isinstance(text, str) or len(text.strip()) < 10:
        return []

    matched_names = set()
    try:
        if is_long:
            # 길면 문장 단위로 자르기
            sentences = re.split(r'(?<=[.!?])\s+', text)
            chunks = []
            current_chunk = ''

            for sent in sentences:
                if len(current_chunk) + len(sent) <= 512:
                    current_chunk += ' ' + sent
                else:
                    chunks.append(current_chunk.strip())
                    current_chunk = sent
            if current_chunk:
                chunks.append(current_chunk.strip())
        else:
            chunks = [text]

        # chunk마다 NER 수행
        for chunk in chunks:
            entities = ner_pipeline(chunk)
            merged_entities = []
            current_word = ''
            current_score = []
            current_label = ''

            for ent in entities:
                word = ent['word']
                score = ent['score']
                label_id = ent['entity_group']
                label_num = int(label_id.split('_')[1])
                label_name = id2label.get(label_num, "")
                key = label_name[2:] if label_name.startswith(('B-', 'I-')) else label_name

                if key.startswith('OGG_ECONOMY'):
                    if word.startswith('##'):
                        current_word += word[2:]
                        current_score.append(score)
                    else:
                        if current_word:
                            merged_entities.append((current_word, current_label, sum(current_score)/len(current_score)))
                        current_word = word
                        current_score = [score]
                        current_label = label_name
            if current_word:
                merged_entities.append((current_word, current_label, sum(current_score)/len(current_score)))

            for word, label, score in merged_entities:
                if word in company_dict:
                    matched_names.add(company_dict[word])
    except Exception as e:
        print(f'NER 오류: {e}')
    
    return list(matched_names)

# 요약 및 종목명 추출

In [118]:
# 요약 및 종목명 추출
results = []

for i, row in tqdm(news_df.iterrows(), total=len(news_df), desc='요약 + 종목 추출 중'):
    title = row['title']
    url = row['url']
    article = row['article']
    
    # 요약
    try:
        summary = summarizer(
            article,
            max_length=240,
            min_length=120,
            do_sample=False
        )[0]['summary_text']
    except Exception as e:
        print(f'요약 오류 (index {i}): {e}')
        summary = ''

    # 종목명 추출
    try:
        stocks_from_summary = extract_stock_labels(summary, company_dict, is_long=False)
        stocks_from_article = extract_stock_labels(article, company_dict, is_long=True)
        combined_stocks = list(set(stocks_from_summary) | set(stocks_from_article))
    except Exception as e:
        print(f'NER 오류 (index {i}): {e}')
        combined_stocks = []

    results.append({
        'index': i,
        '제목': title,
        '요약': summary,
        '종목명': combined_stocks,
        'url': url,
        '본문 일부': article[:300]
    })

요약 + 종목 추출 중:   0%|          | 1/1000 [00:02<47:30,  2.85s/it]Your max_length is set to 240, but your input_length is only 226. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=113)
요약 + 종목 추출 중:   1%|          | 7/1000 [00:13<29:39,  1.79s/it]

요약 오류 (index 7): index out of range in self


요약 + 종목 추출 중:   1%|          | 8/1000 [00:14<23:13,  1.40s/it]Your max_length is set to 240, but your input_length is only 223. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=111)
요약 + 종목 추출 중:   1%|          | 9/1000 [00:16<27:21,  1.66s/it]Your max_length is set to 240, but your input_length is only 171. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)
요약 + 종목 추출 중:   2%|▏         | 15/1000 [00:29<32:27,  1.98s/it]

요약 오류 (index 15): index out of range in self


요약 + 종목 추출 중:   2%|▏         | 17/1000 [00:32<30:08,  1.84s/it]Your max_length is set to 240, but your input_length is only 225. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=112)
요약 + 종목 추출 중:   3%|▎         | 29/1000 [00:59<36:39,  2.27s/it]Your max_length is set to 240, but your input_length is only 171. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)
요약 + 종목 추출 중:   4%|▍         | 41/1000 [01:24<34:44,  2.17s/it]Your max_length is set to 240, but your input_length is only 227. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=113)
요약 + 종목 추출 중:   5%|▍         | 48/1000 [01:39<36:39,  2.31s

요약 오류 (index 74): index out of range in self


요약 + 종목 추출 중:   8%|▊         | 85/1000 [02:57<35:11,  2.31s/it]Your max_length is set to 240, but your input_length is only 218. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=109)
요약 + 종목 추출 중:   9%|▊         | 87/1000 [03:01<34:01,  2.24s/it]

요약 오류 (index 87): index out of range in self


요약 + 종목 추출 중:  10%|█         | 104/1000 [03:35<32:03,  2.15s/it]

요약 오류 (index 104): index out of range in self


요약 + 종목 추출 중:  11%|█         | 107/1000 [03:39<26:15,  1.76s/it]Your max_length is set to 240, but your input_length is only 231. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=115)
요약 + 종목 추출 중:  13%|█▎        | 133/1000 [04:43<35:16,  2.44s/it]Your max_length is set to 240, but your input_length is only 182. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=91)
요약 + 종목 추출 중:  14%|█▍        | 142/1000 [05:03<31:19,  2.19s/it]Your max_length is set to 240, but your input_length is only 186. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=93)
요약 + 종목 추출 중:  17%|█▋        | 171/1000 [06:09<32:20,  2.

요약 오류 (index 171): index out of range in self


요약 + 종목 추출 중:  18%|█▊        | 177/1000 [06:20<27:28,  2.00s/it]Your max_length is set to 240, but your input_length is only 206. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=103)
요약 + 종목 추출 중:  20%|██        | 200/1000 [07:14<27:41,  2.08s/it]Your max_length is set to 240, but your input_length is only 130. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)
요약 + 종목 추출 중:  21%|██        | 211/1000 [07:37<24:03,  1.83s/it]Your max_length is set to 240, but your input_length is only 210. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=105)
요약 + 종목 추출 중:  22%|██▏       | 215/1000 [07:46<28:47,  2

요약 오류 (index 215): index out of range in self


요약 + 종목 추출 중:  22%|██▏       | 216/1000 [07:47<21:50,  1.67s/it]Your max_length is set to 240, but your input_length is only 182. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=91)
요약 + 종목 추출 중:  22%|██▏       | 222/1000 [08:00<28:55,  2.23s/it]Your max_length is set to 240, but your input_length is only 163. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=81)
요약 + 종목 추출 중:  23%|██▎       | 229/1000 [08:15<27:21,  2.13s/it]Your max_length is set to 240, but your input_length is only 170. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)
요약 + 종목 추출 중:  23%|██▎       | 232/1000 [08:22<28:36,  2.2

요약 오류 (index 269): index out of range in self


요약 + 종목 추출 중:  27%|██▋       | 270/1000 [09:41<17:49,  1.46s/it]

요약 오류 (index 270): index out of range in self


요약 + 종목 추출 중:  29%|██▉       | 294/1000 [10:34<23:01,  1.96s/it]Your max_length is set to 240, but your input_length is only 234. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=117)
요약 + 종목 추출 중:  30%|██▉       | 295/1000 [10:36<24:01,  2.04s/it]Your max_length is set to 240, but your input_length is only 197. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=98)
요약 + 종목 추출 중:  30%|██▉       | 297/1000 [10:40<22:29,  1.92s/it]Your max_length is set to 240, but your input_length is only 101. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)
요약 + 종목 추출 중:  30%|██▉       | 299/1000 [10:45<24:05,  2.

요약 오류 (index 415): index out of range in self


요약 + 종목 추출 중:  42%|████▏     | 418/1000 [15:00<18:54,  1.95s/it]

요약 오류 (index 418): index out of range in self


요약 + 종목 추출 중:  42%|████▏     | 419/1000 [15:00<14:12,  1.47s/it]Your max_length is set to 240, but your input_length is only 239. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=119)
요약 + 종목 추출 중:  46%|████▌     | 456/1000 [16:20<22:10,  2.45s/it]

요약 오류 (index 456): index out of range in self


요약 + 종목 추출 중:  46%|████▌     | 461/1000 [16:30<20:11,  2.25s/it]Your max_length is set to 240, but your input_length is only 118. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)
요약 + 종목 추출 중:  47%|████▋     | 469/1000 [16:48<19:38,  2.22s/it]Your max_length is set to 240, but your input_length is only 218. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=109)
요약 + 종목 추출 중:  47%|████▋     | 474/1000 [16:58<19:31,  2.23s/it]Your max_length is set to 240, but your input_length is only 124. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=62)
요약 + 종목 추출 중:  48%|████▊     | 477/1000 [17:05<20:04,  2.

요약 오류 (index 492): index out of range in self


요약 + 종목 추출 중:  51%|█████     | 509/1000 [18:13<17:22,  2.12s/it]Your max_length is set to 240, but your input_length is only 205. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=102)
요약 + 종목 추출 중:  52%|█████▏    | 519/1000 [18:33<14:25,  1.80s/it]

요약 오류 (index 519): index out of range in self


요약 + 종목 추출 중:  52%|█████▏    | 520/1000 [18:33<10:39,  1.33s/it]

요약 오류 (index 520): index out of range in self


요약 + 종목 추출 중:  52%|█████▏    | 522/1000 [18:36<11:33,  1.45s/it]Your max_length is set to 240, but your input_length is only 217. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=108)
요약 + 종목 추출 중:  53%|█████▎    | 530/1000 [18:53<16:58,  2.17s/it]Your max_length is set to 240, but your input_length is only 238. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=119)
요약 + 종목 추출 중:  53%|█████▎    | 531/1000 [18:54<14:33,  1.86s/it]Your max_length is set to 240, but your input_length is only 151. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=75)
요약 + 종목 추출 중:  54%|█████▎    | 536/1000 [19:05<17:30,  2

요약 오류 (index 594): index out of range in self


요약 + 종목 추출 중:  60%|█████▉    | 595/1000 [21:23<12:03,  1.79s/it]Your max_length is set to 240, but your input_length is only 222. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=111)
요약 + 종목 추출 중:  60%|█████▉    | 596/1000 [21:25<13:00,  1.93s/it]Your max_length is set to 240, but your input_length is only 150. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=75)
요약 + 종목 추출 중:  60%|██████    | 600/1000 [21:33<13:24,  2.01s/it]Your max_length is set to 240, but your input_length is only 205. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=102)
요약 + 종목 추출 중:  61%|██████    | 611/1000 [21:57<12:41,  1

요약 오류 (index 614): index out of range in self


요약 + 종목 추출 중:  64%|██████▍   | 642/1000 [23:03<14:48,  2.48s/it]Your max_length is set to 240, but your input_length is only 161. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=80)
요약 + 종목 추출 중:  65%|██████▍   | 647/1000 [23:14<13:19,  2.26s/it]Your max_length is set to 240, but your input_length is only 115. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)
요약 + 종목 추출 중:  65%|██████▍   | 648/1000 [23:16<12:54,  2.20s/it]Your max_length is set to 240, but your input_length is only 188. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=94)
요약 + 종목 추출 중:  65%|██████▍   | 649/1000 [23:18<12:43,  2.1

요약 오류 (index 786): index out of range in self


요약 + 종목 추출 중:  79%|███████▉  | 794/1000 [28:20<07:30,  2.19s/it]

요약 오류 (index 794): index out of range in self


요약 + 종목 추출 중:  84%|████████▎ | 837/1000 [29:52<06:36,  2.43s/it]

요약 오류 (index 837): index out of range in self


요약 + 종목 추출 중:  84%|████████▍ | 839/1000 [29:55<05:29,  2.04s/it]Your max_length is set to 240, but your input_length is only 229. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=114)
요약 + 종목 추출 중:  85%|████████▍ | 847/1000 [30:14<06:07,  2.40s/it]

요약 오류 (index 847): index out of range in self


요약 + 종목 추출 중:  85%|████████▌ | 851/1000 [30:20<04:51,  1.95s/it]

요약 오류 (index 851): index out of range in self


요약 + 종목 추출 중:  85%|████████▌ | 852/1000 [30:20<03:34,  1.45s/it]

요약 오류 (index 852): index out of range in self


요약 + 종목 추출 중:  85%|████████▌ | 854/1000 [30:23<03:42,  1.52s/it]

요약 오류 (index 854): index out of range in self


요약 + 종목 추출 중:  86%|████████▌ | 856/1000 [30:26<03:34,  1.49s/it]

요약 오류 (index 856): index out of range in self


요약 + 종목 추출 중:  86%|████████▋ | 863/1000 [30:40<04:39,  2.04s/it]

요약 오류 (index 863): index out of range in self


요약 + 종목 추출 중:  87%|████████▋ | 872/1000 [31:00<04:55,  2.31s/it]Your max_length is set to 240, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)
요약 + 종목 추출 중:  88%|████████▊ | 876/1000 [31:07<04:08,  2.00s/it]

요약 오류 (index 876): index out of range in self


요약 + 종목 추출 중:  88%|████████▊ | 879/1000 [31:12<03:42,  1.84s/it]Your max_length is set to 240, but your input_length is only 234. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=117)
요약 + 종목 추출 중:  88%|████████▊ | 883/1000 [31:21<04:26,  2.28s/it]Your max_length is set to 240, but your input_length is only 216. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=108)
요약 + 종목 추출 중:  89%|████████▊ | 886/1000 [31:28<04:23,  2.31s/it]Your max_length is set to 240, but your input_length is only 82. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)
요약 + 종목 추출 중:  89%|████████▉ | 892/1000 [31:42<04:14,  2.

요약 오류 (index 893): index out of range in self


요약 + 종목 추출 중:  90%|█████████ | 901/1000 [32:00<03:55,  2.38s/it]Your max_length is set to 240, but your input_length is only 108. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
요약 + 종목 추출 중:  90%|█████████ | 903/1000 [32:04<03:36,  2.23s/it]Your max_length is set to 240, but your input_length is only 128. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)
요약 + 종목 추출 중:  91%|█████████ | 911/1000 [48:50<1:48:15, 72.99s/it] Your max_length is set to 240, but your input_length is only 236. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=118)
요약 + 종목 추출 중:  92%|█████████▏| 918/1000 [49:06<11:09, 

In [119]:
# results → DataFrame으로 변환
result_df = pd.DataFrame(results)

# 요약 및 종목명 확인용 출력
for i, row in result_df.iterrows():
    print(f'\n=== 뉴스 {i+1} ===')
    print('제목:', row['제목'])
    print('요약:', row['요약'])
    print('종목명:', row['종목명'])
    print('url:', row['url'])
    print('본문 일부:', row['본문 일부'], '...')


=== 뉴스 1 ===
제목: [마켓인]모태펀드 존속 불확실성 해소될까…이재명 공약에 업계 주목
요약: 정부 출자사업의 대표격인 모태펀드의 존속을 둘러싼 논란 속에서 이재명 더불어민주당 대선 후보가 예산 확대와 존속기간 연장을 골자로 한 공약을 내놓으며 업계의 시선이 쏠리고 있다.                                                                                                                                                                                                       
종목명: []
url: https://n.news.naver.com/mnews/article/018/0006022208
본문 일부: 2035년 종료 앞둬, 존속 공약에 기대감
창업 초기자금 공백 완화 가능성에 업계 안도
VC, "정책 연속성 중요…불확실성 줄어야"
퇴직연금도 벤처로…BDC 등 활성화 방안 포함
이 기사는 2025년05월23일 17시10분에
마켓인 프리미엄 콘텐츠
로 선공개 되었습니다.
[이데일리 마켓in 송재민 기자] 정부 출자사업의 대표격인 모태펀드의 존속을 둘러싼 논란 속에서 이재명 더불어민주당 대선 후보가 예산 확대와 존속기간 연장을 골자로 한 공약을 내놓으며 업계의 시선이 쏠리고 있다. 모태펀드의 조기 종료 우려가 제기되던 가운데, 이번  ...

=== 뉴스 2 ===
제목: [단독] 카카오페이, 2500만 회원 쓱·스마일페이 품나…간편결제 시장 빅3 경쟁 후끈
요약: 카카오페이가 네이버페이·토스페이에 대항해 시장 점유율을 늘리려는 포석으로 네이버페이·토스페이에 대항해 시장 점유율을 늘리려는 포석으로 네이버페이·토스페이에 대항해 시장 점유율을 늘리려는 포석으로 네이버페이·토스페이에 대항해 시장 점유율을 늘리려는 포석으로 네이버페이·토스페이에 대항해 시장 점유율을 늘리려는 포석으로 네이버페이·토스페이에 대항해

In [120]:
# 종목명이 1개 이상 매칭된 뉴스의 수
matched_count = sum(1 for r in results if r['종목명'])

# 전체 뉴스 수
total_count = len(results)

# 매칭률 계산
matched_ratio = matched_count / total_count * 100

# 출력
print(f'종목명이 매칭된 뉴스 수: {matched_count}개 / {total_count}개')
print(f'매칭률: {matched_ratio:.2f}%')

종목명이 매칭된 뉴스 수: 796개 / 1000개
매칭률: 79.60%
