In [None]:
!pip install -q transformers torch-geometric yfinance pyngrok streamlit plotly

import os
import time
import random
import pickle
import warnings
import requests
import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from tqdm.auto import tqdm
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, mean_absolute_error

from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data

from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from datasets import Dataset

warnings.filterwarnings('ignore')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
SEED = 42

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(SEED)
print(f"Environment Ready. Processing on: {DEVICE}")

In [None]:
API_KEY = "PASTE_YOUR_POLYGON_KEY_HERE"
TICKER = "NVDA"
DATE_RANGES = [("2022-09-01", "2023-09-01"), ("2023-09-02", "2024-09-01"), ("2024-09-02", "2025-09-01")]
NEWS_FILENAME = f"{TICKER}_News_Raw.csv"

def fetch_news_data():
    if "PASTE" in API_KEY:
        print("Skipping download: No API Key provided.")
        return

    all_articles = []
    base_url = "https://api.polygon.io/v2/reference/news"

    for start, end in DATE_RANGES:
        print(f"Fetching news: {start} to {end}...")
        params = {
            "ticker": TICKER, "published_utc.gte": start, "published_utc.lte": end,
            "limit": 1000, "sort": "published_utc", "order": "desc", "apiKey": API_KEY
        }

        current_url = base_url
        while True:
            resp = requests.get(current_url, params=params if current_url == base_url else None)
            if resp.status_code == 429:
                time.sleep(60)
                continue
            if resp.status_code != 200:
                break

            data = resp.json()
            all_articles.extend(data.get('results', []))

            if 'next_url' in data:
                current_url = data['next_url'] + f"&apiKey={API_KEY}"
            else:
                break

    if all_articles:
        df = pd.DataFrame(all_articles)
        df['Date'] = pd.to_datetime(df['published_utc']).dt.date
        df = df.groupby('Date').head(30)

        df_clean = df[['Date', 'title', 'article_url', 'description']].rename(columns={
            'title': 'Headline', 'article_url': 'Link', 'description': 'Summary'
        })
        df_clean.fillna("No Data", inplace=True)
        df_clean.to_csv(NEWS_FILENAME, index=False)
        print(f"Saved {len(df_clean)} articles to {NEWS_FILENAME}")

if not os.path.exists(NEWS_FILENAME):
    fetch_news_data()
else:
    print(f"Data file {NEWS_FILENAME} already exists.")

In [None]:
PROCESSED_FILE = "NVDA_Processed_Features.csv"

def process_nlp():
    if not os.path.exists(NEWS_FILENAME):
        print("Raw news file not found.")
        return

    print("Initializing NLP Models...")
    df = pd.read_csv(NEWS_FILENAME).dropna()
    df['AI_Text'] = df['Headline'] + ". " + df['Summary']

    dataset = Dataset.from_pandas(df[['AI_Text']])

    sent_pipe = pipeline("text-classification", model="ProsusAI/finbert", device=0 if torch.cuda.is_available() else -1, truncation=True, max_length=512)
    rel_pipe = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3", device=0 if torch.cuda.is_available() else -1)
    RELATION_LABELS = ["Competitor", "Supplier", "Customer", "Partner", "Regulatory", "Neutral"]

    print(f"Processing {len(df)} articles...")

    sent_scores = []
    for out in tqdm(sent_pipe(KeyDataset(dataset, "AI_Text"), batch_size=64), total=len(df), desc="Sentiment"):
        score = out['score'] if out['label'] == 'positive' else -out['score'] if out['label'] == 'negative' else 0.0
        sent_scores.append(score)

    rel_types = []
    for out in tqdm(rel_pipe(KeyDataset(dataset, "AI_Text"), candidate_labels=RELATION_LABELS, batch_size=64), total=len(df), desc="Relations"):
        rel_types.append(out['labels'][0])

    df['Sentiment_Score'] = sent_scores
    df['Relation_Type'] = rel_types

    df[['Date', 'Headline', 'Sentiment_Score', 'Relation_Type']].to_csv(PROCESSED_FILE, index=False)
    print(f"NLP Processing Complete. Saved to {PROCESSED_FILE}")

if not os.path.exists(PROCESSED_FILE) and os.path.exists(NEWS_FILENAME):
    process_nlp()

In [None]:
GRAPH_FILE = "NVDA_Dynamic_Graph.pkl"
RELATION_MAP = {r: i for i, r in enumerate(['Ego', 'Competitor', 'Regulatory', 'Partner', 'Supplier', 'Customer', 'Neutral'])}

def build_graphs():
    if not os.path.exists(PROCESSED_FILE): return

    print("Constructing Dynamic Knowledge Graphs...")
    df = pd.read_csv(PROCESSED_FILE)
    grouped = df.groupby('Date')
    dataset = []

    for date, group in tqdm(grouped, desc="Building Daily Snapshots"):
        num_nodes = len(group) + 1
        x = torch.zeros((num_nodes, len(RELATION_MAP) + 1), dtype=torch.float)
        x[0, RELATION_MAP['Ego']] = 1.0

        for i, (_, row) in enumerate(group.iterrows()):
            node_idx = i + 1
            rel_idx = RELATION_MAP.get(row['Relation_Type'], RELATION_MAP['Neutral'])
            x[node_idx, rel_idx] = 1.0
            x[node_idx, -1] = row['Sentiment_Score']

        sources = torch.arange(1, num_nodes, dtype=torch.long)
        targets = torch.zeros(len(group), dtype=torch.long)
        edge_index = torch.stack([sources, targets], dim=0)

        data = Data(x=x, edge_index=edge_index)
        data.date = date
        dataset.append(data)

    with open(GRAPH_FILE, 'wb') as f:
        pickle.dump(dataset, f)
    print(f"Graph serialization complete: {len(dataset)} snapshots.")

if not os.path.exists(GRAPH_FILE):
    build_graphs()

In [None]:
class IntegratedTrader(nn.Module):
    def __init__(self, node_feat_dim, kpi_dim, gnn_out=32, lstm_hidden=128):
        super(IntegratedTrader, self).__init__()
        self.gnn1 = GCNConv(node_feat_dim, 64)
        self.gnn2 = GCNConv(64, gnn_out)
        self.lstm = nn.LSTM(gnn_out + kpi_dim, lstm_hidden, batch_first=True, dropout=0.2)
        self.head = nn.Linear(lstm_hidden, 1)

    def forward(self, graph_list, kpi_tensor):
        sentiment_vecs = []
        for g in graph_list:
            x, edge_index = g.x.to(DEVICE), g.edge_index.to(DEVICE)
            x = F.relu(self.gnn1(x, edge_index))
            x = self.gnn2(x, edge_index)
            batch = torch.zeros(x.size(0), dtype=torch.long).to(DEVICE)
            day_vec = global_mean_pool(x, batch)
            sentiment_vecs.append(day_vec)

        sentiment_seq = torch.stack(sentiment_vecs, dim=1)
        fusion = torch.cat((sentiment_seq, kpi_tensor), dim=2)
        lstm_out, _ = self.lstm(fusion)
        return self.head(lstm_out[:, -1, :])

class DirectionalLoss(nn.Module):
    def __init__(self, penalty=2.0):
        super().__init__()
        self.mse = nn.MSELoss()
        self.penalty = penalty

    def forward(self, pred, target, prev_price):
        mse = self.mse(pred, target)
        true_dir = torch.sign(target - prev_price)
        pred_dir = torch.sign(pred - prev_price)
        dir_mismatch = (true_dir != pred_dir).float()
        return mse + (self.penalty * torch.mean(dir_mismatch * torch.abs(target - pred)))

In [None]:
START_DATE = "2022-09-01"
END_DATE = "2025-09-02"
TRAIN_CUTOFF = "2025-06-15"
SEQ_LEN = 5
EPOCHS = 50

def train_and_evaluate():
    print("Fetching Market Data and Aligning Sequences...")

    with open(GRAPH_FILE, 'rb') as f:
        raw_graphs = pickle.load(f)

    graph_map = {pd.to_datetime(g.date).strftime('%Y-%m-%d'): Data(x=g.x, edge_index=g.edge_index) for g in raw_graphs}

    df = yf.download(TICKER, start=START_DATE, end=END_DATE, progress=False)
    if isinstance(df.columns, pd.MultiIndex): df.columns = [c[0] for c in df.columns]
    df.reset_index(inplace=True)
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

    df['SMA_14'] = df['Close'].rolling(14).mean()
    df['MACD'] = df['Close'].ewm(span=12).mean() - df['Close'].ewm(span=26).mean()
    df['Return'] = df['Close'].pct_change()
    df['RSI'] = 100 - (100 / (1 + df['Close'].diff().clip(lower=0).rolling(14).mean() / -df['Close'].diff().clip(upper=0).rolling(14).mean()))
    df['Target'] = df['Close'].shift(-1)
    df.dropna(inplace=True)

    train_df = df[df['Date'] <= TRAIN_CUTOFF].copy()
    test_df = df[df['Date'] > TRAIN_CUTOFF].copy()

    KPI_COLS = ['Close', 'RSI', 'MACD', 'SMA_14', 'Return', 'Target']
    scaler = MinMaxScaler()
    train_df[KPI_COLS] = scaler.fit_transform(train_df[KPI_COLS])
    test_df[KPI_COLS] = scaler.transform(test_df[KPI_COLS])

    full_df = pd.concat([train_df, test_df])
    date_to_kpi = {r['Date']: r[KPI_COLS[:-1]].values.astype(float) for _, r in full_df.iterrows()}
    date_to_target = {r['Date']: r['Target'] for _, r in full_df.iterrows()}

    sequences = []
    targets = []
    dates = []

    common_dates = sorted(list(set(date_to_kpi.keys()) & set(graph_map.keys())))
    for i in range(len(common_dates) - SEQ_LEN):
        seq_dates = common_dates[i:i+SEQ_LEN]
        tgt_date = seq_dates[-1]
        sequences.append(([graph_map[d] for d in seq_dates], [date_to_kpi[d] for d in seq_dates]))
        targets.append(date_to_target[tgt_date])
        dates.append(tgt_date)

    split_idx = len([d for d in dates if d <= TRAIN_CUTOFF])
    train_seqs, test_seqs = sequences[:split_idx], sequences[split_idx:]
    train_y, test_y = targets[:split_idx], targets[split_idx:]

    sample_g = train_seqs[0][0][0]
    model = IntegratedTrader(node_feat_dim=sample_g.x.shape[1], kpi_dim=len(KPI_COLS)-1).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=0.001)
    crit = DirectionalLoss()

    print(f"Starting Training ({len(train_seqs)} samples)...")
    model.train()
    for ep in tqdm(range(EPOCHS), desc="Epochs"):
        for i in range(len(train_seqs)):
            gs, kpis = train_seqs[i]
            y_true = torch.tensor([[train_y[i]]], dtype=torch.float).to(DEVICE)
            kpi_in = torch.tensor(np.array(kpis), dtype=torch.float).unsqueeze(0).to(DEVICE)
            prev_price = kpi_in[:, -1, 0].unsqueeze(1)

            opt.zero_grad()
            pred = model(gs, kpi_in)
            loss = crit(pred, y_true, prev_price)
            loss.backward()
            opt.step()

    torch.save(model.state_dict(), "NVDA_Sniper_Model.pth")

    print("Evaluating Performance...")
    model.eval()
    preds, acts = [], []
    with torch.no_grad():
        for i in range(len(test_seqs)):
            gs, kpis = test_seqs[i]
            kpi_in = torch.tensor(np.array(kpis), dtype=torch.float).unsqueeze(0).to(DEVICE)
            preds.append(model(gs, kpi_in).item())
            acts.append(test_y[i])

    dummy = np.zeros((len(preds), len(KPI_COLS)))
    dummy[:, -1] = preds
    final_preds = scaler.inverse_transform(dummy)[:, -1]
    dummy[:, -1] = acts
    final_acts = scaler.inverse_transform(dummy)[:, -1]

    mae = mean_absolute_error(final_acts, final_preds)

    prev_prices_scaled = [seq[1][-1][0] for seq in test_seqs]
    dummy[:, 0] = prev_prices_scaled
    prev_prices_real = scaler.inverse_transform(dummy)[:, 0]

    true_dir = (final_acts > prev_prices_real).astype(int)
    pred_dir = (final_preds > prev_prices_real).astype(int)
    acc = accuracy_score(true_dir, pred_dir)

    print(f"Results:")
    print(f"MAE: ${mae:.2f}")
    print(f"Directional Accuracy: {acc:.2%}")

    plt.figure(figsize=(12, 6))
    plt.plot(final_acts, label='Actual', color='black', alpha=0.7)
    plt.plot(final_preds, label='AI Prediction', color='blue', linestyle='--')
    plt.title(f"NVDA Prediction vs Reality | Acc: {acc:.2%}")
    plt.legend()
    plt.show()

train_and_evaluate()

In [None]:
%%writefile app.py
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import yfinance as yf
import streamlit as st
import plotly.graph_objects as go
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data

DEVICE = torch.device('cpu')
TICKER = "NVDA"
MODEL_FILE = "NVDA_Sniper_Model.pth"
SEQ_LEN = 5

class IntegratedTrader(nn.Module):
    def __init__(self, node_feat_dim, kpi_dim, gnn_out=32, lstm_hidden=128):
        super(IntegratedTrader, self).__init__()
        self.gnn1 = GCNConv(node_feat_dim, 64)
        self.gnn2 = GCNConv(64, gnn_out)
        self.lstm = nn.LSTM(gnn_out + kpi_dim, lstm_hidden, batch_first=True, dropout=0.2)
        self.head = nn.Linear(lstm_hidden, 1)

    def forward(self, graph_list, kpi_tensor):
        sentiment_vecs = []
        for g in graph_list:
            x, edge_index = g.x.to(DEVICE), g.edge_index.to(DEVICE)
            x = F.relu(self.gnn1(x, edge_index))
            x = self.gnn2(x, edge_index)
            batch = torch.zeros(x.size(0), dtype=torch.long).to(DEVICE)
            sentiment_vecs.append(global_mean_pool(x, batch))

        fusion = torch.cat((torch.stack(sentiment_vecs, dim=1), kpi_tensor), dim=2)
        return self.head(self.lstm(fusion)[0][:, -1, :])

def get_data(end_date):
    end_dt = datetime.strptime(end_date, '%Y-%m-%d')
    start_dt = end_dt - timedelta(days=90)
    df = yf.download(TICKER, start=start_dt, end=end_dt + timedelta(days=1), progress=False)
    if isinstance(df.columns, pd.MultiIndex): df.columns = [c[0] for c in df.columns]
    df.reset_index(inplace=True)
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

    df['SMA_14'] = df['Close'].rolling(14).mean()
    df['MACD'] = df['Close'].ewm(span=12).mean() - df['Close'].ewm(span=26).mean()
    df['Return'] = df['Close'].pct_change()
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    df['RSI'] = 100 - (100 / (1 + gain/loss))

    return df[df['Date'] <= end_date].copy()

st.set_page_config(page_title="NVDA AI Agent", layout="wide")
st.title("NVDA Graph-AI Trader")

date_in = st.sidebar.date_input("Target Date", datetime.today())
if st.sidebar.button("Run Prediction"):
    target_str = date_in.strftime('%Y-%m-%d')

    with st.spinner("Analyzing Market Structure..."):
        df = get_data(target_str)
        if len(df) < SEQ_LEN:
            st.error("Insufficient Data.")
            st.stop()

        KPI_COLS = ['Close', 'RSI', 'MACD', 'SMA_14', 'Return']
        scaler = MinMaxScaler()
        df_scaled = df.copy()
        df_scaled[KPI_COLS] = scaler.fit_transform(df[KPI_COLS])

        kpi_input = torch.tensor(df_scaled[KPI_COLS].tail(SEQ_LEN).values, dtype=torch.float).unsqueeze(0).to(DEVICE)

        NODE_DIM = 8
        x = torch.zeros((10, NODE_DIM)); x[:, 0] = 1.0
        edge_index = torch.tensor([[0,1],[1,0]], dtype=torch.long)
        graph_seq = [Data(x=x, edge_index=edge_index) for _ in range(SEQ_LEN)]

        try:
            model = IntegratedTrader(node_feat_dim=NODE_DIM, kpi_dim=len(KPI_COLS)).to(DEVICE)
            if os.path.exists(MODEL_FILE):
                model.load_state_dict(torch.load(MODEL_FILE, map_location=DEVICE))
            model.eval()
            with torch.no_grad():
                pred = model(graph_seq, kpi_input).item()

            dummy = np.zeros((1, len(KPI_COLS)))
            dummy[0, 0] = pred
            price = scaler.inverse_transform(dummy)[0,0]

            last_close = df['Close'].iloc[-1]
            pct_change = (price - last_close)/last_close * 100

            col1, col2 = st.columns(2)
            col1.metric("Previous Close", f"${last_close:.2f}")
            col2.metric("AI Prediction", f"${price:.2f}", f"{pct_change:.2f}%")

            fig = go.Figure()
            fig.add_trace(go.Scatter(x=df['Date'].tail(30), y=df['Close'].tail(30), name="History"))
            fig.add_trace(go.Scatter(x=[target_str], y=[price], mode='markers', marker=dict(size=12, color='red'), name="AI Target"))
            st.plotly_chart(fig)

        except Exception as e:
            st.error(f"Prediction Error: {e}")

In [None]:

!pip install -q pyngrok
from pyngrok import ngrok

ngrok.kill()

!nohup streamlit run app.py --server.port 8501 &


public_url = ngrok.connect(8501).public_url
print(f" Dashboard Live at: {public_url}")