In [None]:
import os 
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="gemma3:12b")

os.environ['SERPAPI_API_KEY'] = 'serpapi key'

In [None]:
from langchain_community.utilities import SerpAPIWrapper

params = {
    "engine" : "google_news", 
    "gl": "KR",
    "hl": "ko",
}
search = SerpAPIWrapper(params=params)

search.run("이차전지 산업")

In [None]:
results = search.run("이차전지 산업")
len(results)

In [None]:
results[0]

In [None]:
results[0].keys()

In [None]:
url = results[0]['link']
url

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(url)
docs = loader.load()

docs[0]

In [None]:
def extract_longest_text(text):
    segments = text.split('\n')
    longest_segment = max(segments, key=len)
    
    return longest_segment

text = docs[0].page_content

longest_text = extract_longest_text(text)
print("가장 긴 텍스트:\n", longest_text)

In [None]:
from tqdm import tqdm
import time 

new_results = []
for result in tqdm(results):
    try:
        url = result['link']
        loader = WebBaseLoader(url)
        docs = loader.load()
        text = docs[0].page_content
        longest_Text = extract_longest_text(text)
        result['content']= longest_text
        new_results.append(result)
    except:
        pass

print("새로운 결과 개수 : ", len(new_results))
new_results[0]


In [None]:
import pandas as pd
data=pd.DataFrame(new_results)
data.head()

## 뉴스 요약 

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_ollama import OllamaLLM

from langchain_core.output_parsers import StrOutputParser

#promopt
prompt_template = """다음 내용을 한국어(한글)로 간결하게 요약하라.

규칙:
- 요약 내용만 출력한다
- 영어 문장, 설명, 인사말을 포함하지 않는다
- 불필요한 문구 없이 바로 요약문만 작성한다

내용:
{text}
"""
prompt = PromptTemplate.from_template(prompt_template)

#LLM
llm = OllamaLLM(model="gemma3:12b")

#parser
output_parser = StrOutputParser()

llm_chain = prompt | llm | output_parser

response = llm_chain.invoke({"text": data['content'][0]})

response

In [None]:
def summarize_news(content):
    response = llm_chain.invoke({"text": content})
    return response

df_test = data.head(3)
df_test['summary'] = df_test['content'].apply(summarize_news)

df_test

In [None]:
df_test[['content', 'summary']]

## 키워드 추출

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_ollama import OllamaLLM
from langchain_core.output_parsers import StrOutputParser

#prompt
prompt_template = """Please extract 3 key words form the following content in Korean Hangul(한글) and separate them with commas (,) 
"{text}" 
Key words :"""

prompt = PromptTemplate.from_template(prompt_template)

#LLM 
llm = OllamaLLM(temperature = 0, model="gemma3:12b")

#ouput parser
output_parser = StrOutputParser()

#Chain
llm_chain = prompt | llm | output_parser

response = llm_chain.invoke({"text": data['content'][0]})

response

In [None]:
# 뉴스 본문을 입력으로 사용하여 핵심 키워드를 추출하는 함수 
def extract_keywords(content):
    response = llm_chain.invoke({"text": content})
    return response

#결과 확인 - 테스트를 위해서 첫 3행만 별도로 추출하여 추출 
df_test['keywords'] = df_test['content'].apply(extract_keywords)

df_test[['content', 'summary' , 'keywords']]

## 뉴스 카테고리 분류

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_ollama import OllamaLLM
from langchain_core.output_parsers import StrOutputParser 

#prompt 
prompt_template = """Based on the following content, please classify the news into the approproiate category and provide the category name in Korean without description:
"{text}"
News Category:"""

prompt = PromptTemplate.from_template(prompt_template)

#LLM 
llm = OllamaLLM(temperature = 0, model="gemma3:12b")

#output parser
output_parser = StrOutputParser()

#chain
llm_chain = prompt | llm | output_parser

response = llm_chain.invoke({"text": data['content'][0]})

response

In [None]:
#뉴스 본문을 입력으로 사용하여 카테고리를 분류하는 함수 
def classify_news_category(content):
    response = llm_chain.invoke({"text": content})
    return response

#결과 확인 - 테스트를 우해서 첫 3행만 별도로 추출하여 카테고리 분류 
df_test['category'] = df_test['content'].apply(classify_news_category)

df_test[['content', 'summary', 'keywords', 'category']]