In [1]:
!pip install langchain langchain-huggingface transformers yfinance pandas torch accelerate sentencepiece
!pip install peft trl datasets pandas  bitsandbytes
!pip install chromadb
!pip install langchain-chroma
!pip install sentence-transformers

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Collecting langchain-core<1.0.0,>=0.3.66 (from langchain)
  Downloading langchain_core-0.3.78-py3-none-any.whl.metadata (3.2 kB)
Collecting huggingface-hub>=0.33.4 (from langchain-huggingface)
  Downloading huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collectin

In [3]:
import os
from langchain_huggingface import HuggingFacePipeline,HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain.agents import AgentExecutor, create_react_agent
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    TrainingArguments,
    BitsAndBytesConfig
)
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
from kaggle_secrets import UserSecretsClient
import wandb
from langchain_chroma import Chroma
from langchain.schema import Document

In [4]:
finetuned_model = AutoModelForCausalLM.from_pretrained(
    "/kaggle/input/phi-3/pytorch/default/1",
    device_map="auto",
)

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/phi-3/pytorch/default/1")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [6]:
pipe = pipeline(
    "text-generation",
    model=finetuned_model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.05,
    do_sample=True,
    return_full_text=False
)

Device set to use cuda:0


In [8]:
llm = HuggingFacePipeline(pipeline=pipe)

In [9]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
vectorstore = Chroma(
    collection_name="financial_advisor",
    embedding_function=embeddings,
    persist_directory="./financial_vectordb"
)

In [43]:
import hashlib

In [50]:
def fetch_and_store_stock_data(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)
    documents = []

    info = ticker.info
    stock_content = f"""
    {info.get('longName', ticker_symbol)} ({ticker_symbol})
    Current Price: ${info.get('currentPrice', 'N/A')}
    Market Cap: ${info.get('marketCap', 'N/A')}
    PE Ratio: {info.get('trailingPE', 'N/A')}
    52 Week High: ${info.get('fiftyTwoWeekHigh', 'N/A')}
    52 Week Low: ${info.get('fiftyTwoWeekLow', 'N/A')}
    Sector: {info.get('sector', 'N/A')}
    Industry: {info.get('industry', 'N/A')}
    """
    stock_id = hashlib.sha256(stock_content.encode("utf-8")).hexdigest()
    documents.append(Document(
        page_content=stock_content.strip(),
        metadata={
            "id": stock_id,
            "type": "stock_info",
            "ticker": ticker_symbol,
            "company": info.get('longName', ticker_symbol),
            "timestamp": datetime.utcnow().isoformat()
        }
    ))

    history = ticker.history(period="1mo")
    if not history.empty:
        price_content = f"""
        {ticker_symbol} price history (last 30 days):
        Starting price: ${history['Close'].iloc[0]:.2f}
        Current price: ${history['Close'].iloc[-1]:.2f}
        Highest: ${history['High'].max():.2f}
        Lowest: ${history['Low'].min():.2f}
        Average volume: {history['Volume'].mean():.0f}
        """
        price_id = hashlib.sha256(price_content.encode("utf-8")).hexdigest()
        documents.append(Document(
            page_content=price_content.strip(),
            metadata={
                "id": price_id,
                "type": "price_history",
                "ticker": ticker_symbol,
                "period": "1mo",
                "timestamp": datetime.utcnow().isoformat()
            }
        ))

    news = ticker.news
    for article in news[:50]:
        title = article.get('title', '')
        summary = article.get('summary', '')
        link = article.get('link', '')
        news_content = f"{title}. {summary}"
        news_id = hashlib.sha256(news_content.encode("utf-8")).hexdigest()
        documents.append(Document(
            page_content=news_content.strip(),
            metadata={
                "id": news_id,
                "type": "news",
                "ticker": ticker_symbol,
                "link": link,
                "timestamp": datetime.utcnow().isoformat()
            }
        ))
    vectorstore.add_documents(documents)


In [48]:
top_mncs_200 = {
    "Apple": "AAPL",
    "Microsoft": "MSFT",
    "Amazon.com": "AMZN",
    "Alphabet (Class A)": "GOOGL",
    "Alphabet (Class C)": "GOOG",
    "Nvidia": "NVDA",
    "Berkshire Hathaway": "BRK.B",
    "Meta Platforms": "META",
    "Tesla": "TSLA",
    "Johnson & Johnson": "JNJ",
    "JPMorgan Chase": "JPM",
    "Visa": "V",
    "Procter & Gamble": "PG",
    "UnitedHealth Group": "UNH",
    "ExxonMobil": "XOM",
    "Mastercard": "MA",
    "Home Depot": "HD",
    "Pfizer": "PFE",
    "Chevron": "CVX",
    "Merck & Co.": "MRK",
    "Coca-Cola": "KO",
    "PepsiCo": "PEP",
    "AbbVie": "ABBV",
    "Walmart": "WMT",
    "Broadcom": "AVGO",
    "Cisco Systems": "CSCO",
    "Adobe": "ADBE",
    "Oracle": "ORCL",
    "Salesforce": "CRM",
    "Comcast": "CMCSA",
    "Intel": "INTC",
    "Verizon Communications": "VZ",
    "AT&T": "T",
    "McDonald’s": "MCD",
    "Costco Wholesale": "COST",
    "Netflix": "NFLX",
    "Walt Disney": "DIS",
    "Nike": "NKE",
    "Goldman Sachs": "GS",
    "Morgan Stanley": "MS",
    "American Express": "AXP",
    "Qualcomm": "QCOM",
    "Texas Instruments": "TXN",
    "IBM": "IBM",
    "Caterpillar": "CAT",
    "Lockheed Martin": "LMT",
    "General Motors": "GM",
    "Ford Motor": "F",
    "Boeing": "BA",
    "3M": "MMM",
    "Medtronic": "MDT",
    "Eli Lilly": "LLY",
    "Abbott Laboratories": "ABT",
    "Honeywell": "HON",
    "Union Pacific": "UNP",
    "Raytheon Technologies": "RTX",
    "Dow Inc.": "DOW",
    "Philip Morris International": "PM",
    "Colgate-Palmolive": "CL",
    "Mondelez International": "MDLZ",
    "Schlumberger": "SLB",
    "American Airlines Group": "AAL",
    "Southwest Airlines": "LUV",
    "UPS": "UPS",
    "FedEx": "FDX",
    "Target": "TGT",
    "Wells Fargo": "WFC",
    "Bank of America": "BAC",
    "Citigroup": "C",
    "Cigna": "CI",
    "Anthem (Elevance Health)": "ELV",
    "CVS Health": "CVS",
    "PayPal": "PYPL",
    "Intuit": "INTU",
    "ServiceNow": "NOW",
    "AMD": "AMD",
    "Micron Technology": "MU",
    "Applied Materials": "AMAT",
    "Starbucks": "SBUX",
    "General Electric": "GE",
    "Marriott International": "MAR",
    "Hilton Worldwide": "HLT",
    "Estee Lauder": "EL",
    "Booking Holdings": "BKNG",
    "Uber Technologies": "UBER",
    "Lyft": "LYFT",
    "eBay": "EBAY",
    "Dominion Energy": "D",
    "NextEra Energy": "NEE",
    "Duke Energy": "DUK",
    "Southern Company": "SO",
    "Exelon": "EXC",
    "Crown Castle": "CCI",
    "T-Mobile US": "TMUS",
    "Zoom Video Communications": "ZM",
    "Snowflake": "SNOW",
    "Palantir Technologies": "PLTR",
    "Datadog": "DDOG",
    "Occidental Petroleum": "OXY",
    "ConocoPhillips": "COP",
    "Marathon Petroleum": "MPC",
    "Valero Energy": "VLO",
    "Phillips 66": "PSX",
    "Kinder Morgan": "KMI",
    "Newmont Corporation": "NEM",
    "Freeport-McMoRan": "FCX",
    "Cleveland-Cliffs": "CLF",
    "Alcoa Corporation": "AA",
    "Nucor": "NUE",
    "Steel Dynamics": "STLD",
    "Palo Alto Networks": "PANW",
    "Snowflake": "SNOW",
    "CrowdStrike": "CRWD",
    "Zscaler": "ZS",
    "Okta": "OKTA",
    "Workday": "WDAY",
    "Palantir": "PLTR",
    "DocuSign": "DOCU",
    "RingCentral": "RNG",
    "Twilio": "TWLO",
    "Dropbox": "DBX",
    "Slack Technologies": "WORK",
    "Square (Block)": "SQ",
    "Shopify": "SHOP",
    "ZoomInfo": "ZI",
    "MongoDB": "MDB",
    "Elastic": "ESTC",
    "Datadog": "DDOG",
    "CrowdStrike": "CRWD",
    "Snowflake": "SNOW",
    "Spotify": "SPOT",
    "Peloton": "PTON",
    "Roku": "ROKU",
    "Pinterest": "PINS",
    "Snap": "SNAP",
    "ByteDance (if listed)": "—",
    "Lyft": "LYFT",
    "Robinhood": "HOOD",
    "Coinbase": "COIN",
    "DoorDash": "DASH",
    "Instacart (if public)": "—",
    "Airbnb": "ABNB",
    "Uber": "UBER",
    "Lyft": "LYFT",
    "Beyond Meat": "BYND",
    "Zoom Video": "ZM",
    "Palantir": "PLTR",
    "Datadog": "DDOG",
    "CrowdStrike": "CRWD",
    "Snowflake": "SNOW",
    "Zillow": "Z",
    "Redfin": "RDFN",
    "Rivian": "RIVN",
    "Lucid Group": "LCID",
    "QuantumScape": "QS",
    "Nikola": "NKLA",
    "Plug Power": "PLUG",
    "Ballard Power Systems": "BLDP",
    "Enphase Energy": "ENPH",
    "SunPower": "SPWR",
    "First Solar": "FSLR",
    "SolarEdge": "SEDG",
    "NextEra": "NEE",
    "Brookfield Renewable Partners": "BEP",
    "Orsted (if US listing)": "—",
    "Tesla": "TSLA",
    "Rivian": "RIVN",
    "Lucid": "LCID",
    "Nikola": "NKLA"
}


In [51]:
for company, symbol in top_mncs_200.items():
    fetch_and_store_stock_data(symbol)


AttributeError: 'Chroma' object has no attribute 'persist'

In [54]:
user_query = "what are latest financila news"

In [55]:

    docs = vectorstore.similarity_search(
    query=user_query,
    k=50  )
    docs_sorted = sorted(docs, key=lambda x: x.metadata["timestamp"], reverse=True)
    latest_docs = docs_sorted[:10]

    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are an expert in giving financial advice, market trend and market news"),
        ("user", "Given the query: '{query}', suggest 5 relevant keywords or phrases for searching financial news and stock data.")
    ])
    context = "\n\n".join([doc.page_content for doc in latest_docs])
    chain = prompt | llm
    keywords = chain.invoke({"query": user_query})
    final_prompt = f"""
    You are a financial expert. Using the context provided, answer the user's question in a detailed, structured manner. Include:
    
    1. Current stock price
    2. Market trend (short-term and medium-term)
    3. Relevant news events
    4. Analyst sentiment
    5. Any important financial metrics (like P/E ratio, market cap)
    6. Summary and insights
    
    Context:
    {context}
    
    Question:
    {user_query}
    """
    answer = llm(final_prompt)
    print(answer)



Answer:
The latest financial news includes:

1. Current stock price: The current stock price of XYZ is $100.
2. Market trend: The market is currently experiencing a short-term uptrend, with a medium-term downtrend expected.
3. Relevant news events: XYZ has recently announced a new product launch, which has been well-received by investors.
4. Analyst sentiment: Analysts are generally positive about XYZ's prospects, with a consensus rating of "buy."
5. Important financial metrics: XYZ has a P/E ratio of 20 and a market cap of $10 billion.
6. Summary and insights: XYZ is a strong performer in the market, with positive news events and analyst sentiment. Investors should consider buying shares of XYZ at the current price.
