In [None]:
!pip -q install -U chromadb yfinance pandas feedparser groq
!pip -q install scipy sentence-transformers rfc3987



In [None]:
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import yfinance as yf
import pandas as pd

from datetime import datetime, timedelta
import os, getpass
import urllib.parse
import feedparser



In [None]:
# ‚úÖ GROQ
from groq import Groq

os.environ["GROQ_API_KEY"] = getpass.getpass("Enter Groq API key (gsk-...): ")

groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
GROQ_MODEL = "llama-3.3-70b-versatile"

print("‚úÖ Groq ready:", GROQ_MODEL)

In [None]:
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
print("‚úÖ Embedding model loaded")

chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
collection = chroma_client.get_or_create_collection("market_rag_groq_news")
print("‚úÖ ChromaDB collection ready")


In [None]:
TICKERS = {
    "nifty": "^NSEI",
    "banknifty": "^NSEBANK",
    "reliance": "RELIANCE.NS",
    "tcs": "TCS.NS",
    "infosys": "INFY.NS",
    "hdfcbank": "HDFCBANK.NS",
    "icicibank": "ICICIBANK.NS",
    "itc": "ITC.NS",
    "sbin": "SBIN.NS",
    "bhartiartl": "BHARTIARTL.NS",
}
print("‚úÖ Stocks:", list(TICKERS.keys()))


In [None]:
def fetch_price_df(ticker, days=250):
    end = datetime.today()
    start = end - timedelta(days=days)

    df = yf.download(ticker, start=start, end=end, progress=False)
    if df.empty:
        return None

    df = df.reset_index()
    df["Date"] = df["Date"].astype(str)
    return df


In [None]:
def row_to_doc(name, ticker, row):
    date = str(row["Date"])

    open_p = float(row["Open"].iloc[0]) if hasattr(row["Open"], "iloc") else float(row["Open"])
    close_p = float(row["Close"].iloc[0]) if hasattr(row["Close"], "iloc") else float(row["Close"])
    high = float(row["High"].iloc[0]) if hasattr(row["High"], "iloc") else float(row["High"])
    low = float(row["Low"].iloc[0]) if hasattr(row["Low"], "iloc") else float(row["Low"])
    vol = float(row["Volume"].iloc[0]) if hasattr(row["Volume"], "iloc") else float(row["Volume"])

    change = ((close_p - open_p) / open_p) * 100 if open_p != 0 else 0

    return (
        f"{name.upper()} ({ticker}) on {date}: "
        f"Open={open_p:.2f}, Close={close_p:.2f}, High={high:.2f}, Low={low:.2f}, "
        f"Volume={vol:.0f}, Change={change:.2f}%"
    )


In [None]:
def ingest_prices(days=250, keep_last_rows=160):
    docs, ids, embs, metas = [], [], [], []

    for name, ticker in TICKERS.items():
        df = fetch_price_df(ticker, days=days)
        if df is None or df.empty:
            print(f"‚ö†Ô∏è No data for {name} ({ticker})")
            continue

        df = df.tail(keep_last_rows)

        for _, row in df.iterrows():
            doc = row_to_doc(name, ticker, row)
            doc_id = f"{ticker}_{row['Date']}"
            emb = embed_model.encode(doc).tolist()

            docs.append(doc)
            ids.append(str(doc_id))
            embs.append(emb)

            metas.append({
                "name": str(name),
                "ticker": str(ticker),
                "date": str(row["Date"])
            })

    try:
        collection.delete(ids=ids)
    except:
        pass

    collection.add(
        documents=docs,
        ids=ids,
        embeddings=embs,
        metadatas=metas
    )

    print(f"‚úÖ Ingested {len(docs)} price docs into ChromaDB")


ingest_prices(days=250, keep_last_rows=160)


In [None]:
def retrieve_price_context(query, top_k=6):
    q_emb = embed_model.encode(query).tolist()

    results = collection.query(
        query_embeddings=[q_emb],
        n_results=top_k
    )

    docs = results["documents"][0]
    metas = results["metadatas"][0]

    context = "\n".join([f"- {d}" for d in docs])
    return context, docs, metas


In [None]:
def fetch_news_rss(search_query, max_items=7):
    q = urllib.parse.quote(search_query)
    url = f"https://news.google.com/rss/search?q={q}&hl=en-IN&gl=IN&ceid=IN:en"
    feed = feedparser.parse(url)

    if not feed.entries:
        return "No news headlines found."

    lines = []
    for e in feed.entries[:max_items]:
        title = e.title
        published = getattr(e, "published", "unknown time")
        lines.append(f"- {title} ({published})")

    return "\n".join(lines)


In [None]:
def detect_stock_name(query: str):
    q = query.lower()
    for name in TICKERS.keys():
        if name in q:
            return name
    return None


In [None]:
def market_chatbot(query, top_k=6):
    # 1) Price RAG context
    price_context, _, _ = retrieve_price_context(query, top_k=top_k)

    # 2) News context
    stock_name = detect_stock_name(query)
    if stock_name:
        news_query = f"{stock_name} share price fall reason"
    else:
        news_query = query + " reason"

    news_context = fetch_news_rss(news_query, max_items=7)

    # 3) Groq response (generation)
    prompt = f"""
You are a finance analytical chatbot.

You have 2 sources:
A) PRICE CONTEXT (from ChromaDB)
B) NEWS CONTEXT (Google News headlines)

QUESTION:
{query}

PRICE CONTEXT:
{price_context}

NEWS CONTEXT:
{news_context}

Rules:
- Use NEWS CONTEXT to explain the reason.
- Use PRICE CONTEXT to provide evidence (dates, %change).
- If news is unclear, say "No clear reason from headlines."
- Do NOT hallucinate.

Answer format:
1) Reason (news-based)
2) Price evidence (dates/%)
3) Conclusion
"""

    resp = groq_client.chat.completions.create(
        model=GROQ_MODEL,
        messages=[
            {"role": "system", "content": "You are a helpful financial analyst chatbot."},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
        max_tokens=600,
    )

    return resp.choices[0].message.content


In [None]:
print("‚úÖ Market Chatbot Ready (type 'exit' to stop)\n")

while True:
    query = input("You: ").strip()
    if query.lower() == "exit":
        print("Bye üëã")
        break

    ans = market_chatbot(query)
    print("\nBot:\n", ans, "\n")
