In [None]:
from openai import OpenAI

In [None]:
from typing import List
from pydantic import BaseModel, Field, ValidationError
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai.chat_models.base import BaseChatOpenAI

def relevance_extractor(headlines: List[str], sector: str) -> List[bool]:
    """
    headline: list of headlines
    sector: sector of relevance
    output: list of booleans indicating whether each headline is relevant to the sector
    """ 
    llm = BaseChatOpenAI(
        model='deepseek-chat', 
        openai_api_key='xdd', 
        openai_api_base='https://api.deepseek.com',
        max_tokens=1024
    )
    class Query(BaseModel):
        query: List[bool] = Field(
            ...,
            description="""List of booleans indicating whether each headline is relevant to determining 
                        if a stock in the specified sector will rise or decrease. Treat || as delimiter"""
        )
    
    length = len(headlines)
    parser = JsonOutputParser(pydantic_object=Query)

    prompt = PromptTemplate(
        template="""Assess whether each headline is relevant to determining if a stock in {sector} will rise or decrease. 
        Return a JSON list of booleans, one for each headline, indicating its relevance. Output list should be of length {length}
        {format_instructions}

        Headlines:
        {headlines}
        """,
        input_variables=["sector", "headlines", "length"],
        partial_variables={"format_instructions": parser.get_format_instructions(), },
    )
    
    chain = prompt | llm | parser
    llm_response = chain.invoke({"sector": sector,"headlines": "||".join(headlines), "length": length})
    try:
        if len(llm_response['query']) != length:
            raise ValueError("output list size does not match the number of headlines")
        return llm_response['query']
    except (ValidationError, ValueError) as e:
        print(f"Error: {e}")
        return [False] * length 

headlines = [
    "Global Markets Surge Amid Positive Economic Forecast",
    "Tech Companies Report Record Profits in Q4",
    "New Study Reveals Climate Change Impact on Global Agriculture",
    "Breakthrough in AI Technology Promises Faster Data Processing",
    "Government Announces New Tax Reforms for Small Businesses",
    "Scientists Discover Potential Cure for Rare Genetic Disorder",
    "Electric Vehicles Gain Popularity as Charging Infrastructure Expands",
    "Space Agency Plans Mission to Explore Distant Exoplanet",
    "Cybersecurity Experts Warn of Rising Phishing Attacks",
    "Healthcare Innovations to Address Aging Population Challenges"
]
sector = "tech"
output = relevance_extractor(headlines, sector)
print(output)


[True, True, False, True, False, False, False, False, True, False]


In [27]:
import pandas as pd
data = pd.read_csv('data.csv')
headlines = data['headline'][20:40].tolist()
sector = "tech"
output = relevance_extractor(headlines, sector)
output

[False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]

In [22]:
from typing import List
from pydantic import BaseModel, Field, ValidationError
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai.chat_models.base import BaseChatOpenAI
import pandas as pd
def relevance_extractor(headlines: List[str], stock: str,) -> int:
    """
    headline: list of headlines
    stock: stock of relevance
    output: integer from 1-10 indicating whether each headline is relevant to the stock
    """ 
    llm = BaseChatOpenAI(
        model='deepseek-chat', 
        openai_api_key='sk-3d5efd9e47ee45f18cdab474a4d0de8e', 
        openai_api_base='https://api.deepseek.com',
        max_tokens=1024,
        temperature = 1
    )
    class importance_feature(BaseModel):
        query: int = Field(
            description="""Integer from -5 to 5 indicating the importance of the news headline to the stock price"""
        )
    
    parser = JsonOutputParser(pydantic_object=importance_feature)

    prompt = PromptTemplate(
        template="""You are assessing the likelihood of {stock} price increasing or decreasing based on given news headlines. Output a number from -5 to 5 inclusive.
        If a news headline is likely to cause an increase in {stock} price withing the next day then the output should be positive, and if a news headline is reporting on
        something that will likely cause {stock} to decrease in price within the next day then return a negative integer. A higher absolute value indicates higher confidence.
        Do not treat each headline as equally important, if some headlines have more impact on the stock price than others, indicate that in the output.
        {format_instructions}

        Headlines:
        {headlines}
        """,
        input_variables=["stock", "headlines"],
        partial_variables={"format_instructions": parser.get_format_instructions(), },
    )
    
    chain = prompt | llm | parser
    llm_response = chain.invoke({"stock": stock,"headlines": "||".join(headlines), })
    print(llm_response['query'])
    try:
        if abs(llm_response['query']) > 5:
            raise ValueError("output list size does not match the number of headlines")
        return llm_response['query']
    except (ValidationError, ValueError) as e:
        print(f"Error: {e}")
        return 0

headlines = pd.read_csv('data.csv')['headline'][20:40].tolist()
stock = "google"
output = relevance_extractor(headlines, stock)


-2


In [15]:
pd.read_csv('data.csv')['headline'][20:40].tolist()

['Wednesday Briefing: Trump Opponents Push Back',
 'How Could Trump’s Tariffs Affect the U.S., Canada and Mexico?',
 'Rubio Oversees Halt to Foreign Aid and Meets With Asian Diplomats on Day 1',
 'Fire Kills Dozens of People at Turkish Ski Resort',
 'U.S. Wind Power Faces Huge Challenges After Trump Orders a Crackdown',
 'Bimla Bissell, Ambassadors’ Aide and a Social Hub in India, Dies at 92',
 'As Trump Takes Office, Zelensky Urges Europe to Stand Firm Against Russia',
 'Trump’s Order to End E.V. Subsidies Draws Pushback and Doubt',
 '2 Families, in Business for 50 Years, Wage War in a South Korean Boardroom',
 'Trump Pardons Creator of Silk Road Drug Marketplace',
 'Trump Says He Intends to Impose 10% Tariffs on Chinese Imports on Feb. 1',
 'Taking Trump’s Tariffs Threats Seriously and Literally',
 'Sustainable Travel: Maximizing Visitors’ Positive Impact on Locals',
 'Cruises: ‘Neighborhoods,’ Private Islands and More Megaships',
 'Luxury Resorts: Far-Flung Settings, Fair Prices and