In [None]:
!python -m pip install langchain langchain-core langchain-community langchain-experimental --quiet
!python -m pip install -U langchain-google-genai duckduckgo-search --quiet
!python -m pip install -U ddgs
!python -m pip install mlflow --quiet
!python -m pip install spacy
!python -m pip install textstat
!python -m pip install python-dotenv --quiet
!python -m  pip install "langchain-experimental==0.0.29"

In [None]:
import mlflow
mlflow.set_tracking_uri("file:./mlruns")  # or your remote MLflow URI
mlflow.set_experiment("Company Sentiment Pipeline")

In [None]:
from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler
import mlflow

# Configure MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("Company Sentiment Pipeline")

# ✅ Correct new way — no extra args
mlflow_callback = MlflowCallbackHandler(
    tracking_uri="file:./mlruns",
    name="Company Sentiment Pipeline"
)

callbacks = [mlflow_callback]


In [None]:
#!python -m spacy download en_core_web_sm

## Model & Parser Setup

In [None]:
from langchain.chat_models import init_chat_model
#from langchain_core.output_parsers import StructuredOutputParser
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
#from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel ,Field
from langchain_community.tools import DuckDuckGoSearchRun


from dotenv import load_dotenv
import os

# Load environment variables from .env
load_dotenv()

# Retrieve the API key
google_api_key = os.getenv("GOOGLE_API_KEY")

# Initialize model
model_name = "gemini-2.0-flash"
model = init_chat_model(model_name, model_provider="google_genai",google_api_key=google_api_key)

# Search tool
news_tool = DuckDuckGoSearchRun()


## Step 2 — Get Stock Code

In [None]:
from langchain_core.output_parsers import StrOutputParser

symbol_prompt = ChatPromptTemplate([
    ("system",
     "You are a financial assistant. Given a company name, return only its stock ticker symbol (like AAPL for Apple Inc). "
     "If not public, return 'N/A'."),
    ("user","{company_name}")
])

symbol_chain = symbol_prompt | model | StrOutputParser()


## Step 3 — Fetch Recent Company News

In [None]:
search_query_prompt = ChatPromptTemplate([
    ("system","Generate a short search query to get recent news about the company {company_name} ({stock_code})."),
    ("user","Make the query concise and specific to company news only.")
])

search_query_chain = search_query_prompt | model | StrOutputParser()


## Step 4 — Sentiment + Entity Analysis

In [None]:
class NewsSentiment(BaseModel):
    company_name: str = Field(..., description="Company full name")
    stock_code: str = Field(..., description="Ticker symbol of the company")
    newsdesc: str = Field(..., description="Short summary of the news headlines/articles")
    sentiment: str = Field(..., description="Overall sentiment: Positive, Negative, or Neutral")
    people_names: list[str] = Field(default_factory=list, description="List of people mentioned")
    places_names: list[str] = Field(default_factory=list, description="List of places mentioned")
    other_companies_referred: list[str] = Field(default_factory=list, description="Other companies mentioned")
    related_industries: list[str] = Field(default_factory=list, description="Industries related to the news")
    market_implications: str = Field(..., description="Implications for the market/stock price")
    confidence_score: float = Field(..., description="Confidence score between 0.0 and 1.0")

output_parser = StrOutputParser(pydantic_object=NewsSentiment)


In [None]:
analysis_prompt = ChatPromptTemplate([
    ("system",
     "Analyze the given news text and produce a structured sentiment profile strictly as JSON in the requested schema."),
    ("user",
     "Company: {company_name}\n"
     "Stock: {stock_code}\n"
     "News:\n{news_text}")
])

analysis_chain = analysis_prompt | model | output_parser


## Step 5 — Combine Pipeline

In [None]:
def run_company_sentiment(company_name: str):
    # Step 1: get stock symbol
    stock_code = symbol_chain.invoke({"company_name": company_name})
    
    # Step 2: create search query
    query = search_query_chain.invoke({"company_name": company_name, "stock_code": stock_code})
    
    # Step 3: get news
    news_results = news_tool.invoke(query)
    
    # Step 4: analyze
    result = analysis_chain.invoke({
        "company_name": company_name,
        "stock_code": stock_code,
        "news_text": news_results
    })
    
    return result


## Test the Pipeline

In [None]:
company_name="Apple Inc"

In [None]:
# with mlflow.start_run(run_name=company_name):
#     stock_code = symbol_chain.invoke({"company_name": company_name}, callbacks=callbacks)
#     query = search_query_chain.invoke({"company_name": company_name, "stock_code": stock_code}, callbacks=callbacks)
#     news_results = news_tool.invoke(query)
#     result = analysis_chain.invoke({
#         "company_name": company_name,
#         "stock_code": stock_code,
#         "news_text": news_results
#     }, callbacks=callbacks)

In [None]:
#result = run_company_sentiment(":contentReference[oaicite:3]{index=3}")
result = run_company_sentiment(company_name)
print(result)