In [11]:
from db.fetch import fetch_table


fetch_table("news_articles")

[(1,
  1,
  'Heavy rains expected to boost umbrella sales in Mumbai',
  'Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies.',
  'https://dummynews.com/umbrella-sales',
  None,
  None,
  None,
  datetime.datetime(2025, 10, 3, 10, 26, 37, 277915)),
 (101,
  1,
  'Heavy rains expected to boost umbrella sales in Mumbai',
  'Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies.',
  'https://dummynews.com/umbrella-sales',
  None,
  3,
  0.23,
  datetime.datetime(2025, 10, 3, 10, 30, 47, 356111)),
 (401,
  1,
  'Heavy rains expected to boost umbrella sales in Mumbai',
  'Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies.',
  'https://dummynews.com/umbrella-sales',
  None,
  101,
  0.0,
  datetime.datetime(2025, 10, 3, 11, 16, 25, 73088)),
 (301,
  1,
  'Heavy rains expected to boost umbrella sales in Mumbai',
  'Analysts suggest seasonal demand will drive short-term stock gains

In [None]:
from langgraph.graph import StateGraph, START, END, MessagesState
from db.insertion import insert_dummy_article
from agents.categorizer import categorize_node


class State(MessagesState):
    article_id: int
    title: str
    content: str
    summary: str = ""  # optional field


if __name__ == "__main__":

    # Step 1: insert dummy article
    # article_id = insert_dummy_article()
    article_id = 401

    # Step 2: build LangGraph workflow
    graph = StateGraph(State)
    graph.add_node("categorize", categorize_node)
    graph.add_edge(START, "categorize")
    graph.add_edge("categorize", END)

    graph = graph.compile()

    # Step 3: run categorization
    state = State(
        article_id=article_id,
        title="Heavy rains expected to boost umbrella sales in Mumbai",
        content="Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies."
    )

    result = graph.invoke(state, start="categorize")
    print("✅ Categorization Result:", result)


✅ Dummy article inserted with ID 401
✅ Categorization Result: {'messages': [], 'article_id': 401, 'title': 'Heavy rains expected to boost umbrella sales in Mumbai', 'content': 'Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies.'}


In [10]:
from db.insertion import save_category

save_category(101,"Seasonal", 0.23)

In [1]:
from agents.categorizer import categorize_node
categorize_node()

2025-10-03 17:12:39,320 - INFO - Using direct username/password for MLflow authentication
2025-10-03 17:12:41,014 - INFO - Successfully retrieved auth token


----------------LLM RESPONSE----------:  {
  "category": "Seasonal",
  "confidence": 0.95
}
----------------article_id----------:  101
----------------LLM category----------:  Seasonal
----------------LLM confidence----------:  0.95
(3,)
3
Seasonal
0.95


{'category': 'Seasonal', 'confidence': 0.95}

In [None]:
from llm_node import llm


resp = llm.invoke("Explain why umbrellas are seasonal stocks in India.")
print("Gemini response:", resp.content)



Gemini response: Umbrellas are indeed seasonal stocks in India due to several reasons:

1. **Monsoon Season**: India experiences a heavy monsoon season from June to September, which is the primary reason umbrellas are in high demand during this period. The monsoon rains can be intense and unpredictable, making umbrellas a necessity for daily commuters, students, and anyone stepping out of their homes.

2. **Variation in Demand**: The demand for umbrellas is significantly lower during the other months of the year. From October to May, India experiences a mix of winter, summer, and post-monsoon seasons, but the rainfall is minimal compared to the monsoon months. Therefore, the need for umbrellas decreases, making them seasonal stocks.

3. **Storage and Inventory Management**: Due to the variation in demand, it's not cost-effective for retailers to maintain a large inventory of umbrellas throughout the year. Instead, they stock up during the monsoon season to meet the high demand and clea

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from mlflow_client import mlflow_client

mlflow_client()
# Generate toy dataset
X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)


with mlflow.start_run():
    # Log parameters
    mlflow.log_param("fit_intercept", model.fit_intercept)
    
    # Log metric
    mlflow.log_metric("mse", mse)
    
    # Log the model
    mlflow.sklearn.log_model(model, artifact_path="model")

    print("Run ID:", mlflow.active_run().info.run_id)

mlflow.end_run()


KeyboardInterrupt: 

In [10]:

from dotenv import load_dotenv  
import yaml
import os

from langchain_google_genai import ChatGoogleGenerativeAI

# Load env + config
load_dotenv()
with open("config.yaml") as f:
    config = yaml.safe_load(f)

# Use Gemini API key from env
gemini_api_key = "AIzaSyBO-UXBTE0FM7n_Vc1asu9T7Vf_pLw-Yks"

# Initialize Gemini LLM

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",  # or "gemini-1.5-flash-latest"
    convert_system_message_to_human=True,
    api_key=gemini_api_key
)

E0000 00:00:1759511489.628297 10502339 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [None]:
# from langchain_core.prompts import ChatPromptTemplate
import json
# from db.insertion import save_category
# from mlflow_client import mlflow_client
# from llm_node import llm

# mlflow_client()

def json_formatter(llm_response):
    import json
    import re
    # Regex to extract the first {...} block
    match = re.search(r"\{.*\}", llm_response, re.DOTALL)
    if match:
        json_str = match.group()
        data = json.loads(json_str)  # Convert to Python dict
        print(data)                  # {'category': 'Seasonal', 'confidence': 0.95}
    else:
        print("No JSON found")

def categorize_node():
    """
    LangGraph node: classify article and store in DB
    state: {article_id, title, content}
    """


    # article_id = state["article_id"]
    # title = state["title"]
    # content = state.get("content", "")

    article_id = 101
    title = "Heavy rains expected to boost umbrella sales in Mumbai"
    content = "Analysts suggest seasonal demand will drive short-term stock gains for umbrella companies."
    

    prompt_template = """
        You are a financial news classifier.
        Task: Given a headline and article body, return the most relevant category.

        Categories: Finance, Economy, Seasonal, Sports, Politics, Global, Other

        Respond in JSON:
        {{
        "category": "<one of the categories>",
        "confidence": <0.0 - 1.0>
        }}

        Title: {title}
        Body: {body}
    """

    prompt = prompt_template.format(title=title, body=content)

    response = llm.invoke(prompt)  # single step call
    print("----------------LLM RESPONSE----------: ", response.content)
    try:
        
        result = json.loads(response.content)
        category = result["category"]
        confidence = float(result["confidence"])
    except Exception:
        category, confidence = "Other", 0.0

    # Save in Postgres
    print("----------------article_id----------: ", article_id)
    print("----------------LLM category----------: ", category)
    print("----------------LLM confidence----------: ", confidence)
    # save_category(article_id, category, confidence)

    return {"category": category, "confidence": confidence}


if __name__ == "__main__":
    categorize_node()



----------------LLM RESPONSE----------:  ```json
{
"category": "Seasonal",
"confidence": 0.95
}
```
----------------article_id----------:  101
----------------LLM category----------:  Other
----------------LLM confidence----------:  0.0


In [9]:
import google.generativeai as genai

genai.configure(api_key=gemini_api_key)
for m in genai.list_models():
    print(m.name)


E0000 00:00:1759511453.152347 10502339 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


models/embedding-gecko-001
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
mo