<a href="https://colab.research.google.com/github/Veena24-hub/Intern-Phase-1/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# ===========================================
# IMPORTS
# ===========================================
import requests
import re
import pandas as pd
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch







In [8]:
# ===========================================
# STEP 1: FETCH DATA (with mock fallback)
# ===========================================
def fetch_stocktwits(symbol="AAPL"):
    """
    Try fetching real StockTwits data;
    if blocked (403) or bad JSON, use mock dataset.
    """
    url = f"https://api.stocktwits.com/api/2/streams/symbol/{symbol}.json"
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code != 200:
            print(f" Live fetch failed ({resp.status_code}); using mock data.")
            raise Exception("bad_status")
        data = resp.json()
        return data.get("messages", [])
    except Exception:
        mock_data = {
            "messages": [
                {
                    "id": 11111,
                    "body": " $AAPL just CRUSHED earnings!!! Revenue beat by 8%!",
                    "created_at": "2025-10-24T14:00:00Z",
                    "user": {"username": "trader123"},
                },
                {
                    "id": 11112,
                    "body": "Disappointed with $AAPL today... down -3% on weak guidance ",
                    "created_at": "2025-10-24T14:05:00Z",
                    "user": {"username": "investorJane"},
                },
                {
                    "id": 11113,
                    "body": "$AAPL holding steady. Nothing new. #stocks #trading",
                    "created_at": "2025-10-24T14:10:00Z",
                    "user": {"username": "marketWatcher"},
                },
            ]
        }
        return mock_data["messages"]

In [9]:
# ===========================================
# STEP 2: CLEAN TEXT
# ===========================================
def clean_text(text):
    text = re.sub(r"http\S+", "", text)  # remove URLs
    text = re.sub(r"[^A-Za-z0-9$ ]+", " ", text)  # remove emojis/punct
    text = re.sub(r"\s+", " ", text).strip()  # trim spaces
    return text


In [16]:
# ===========================================
# STEP 3: EXTRACT METADATA
# ===========================================
def extract_metadata(msg):
    symbol_matches = re.findall(r"\$[A-Za-z]+", msg.get("body", ""))
    symbol = symbol_matches[0][1:] if symbol_matches else "AAPL"  # default to AAPL
    return {
        "post_id": msg.get("id"),
        "user": msg.get("user", {}).get("username"),
        "text": msg.get("body"),
        "cleaned_text": clean_text(msg.get("body", "")),
        "timestamp": msg.get("created_at"),
        "symbol": symbol
    }






In [11]:
# ===========================================
# STEP 4: LOAD FINBERT MODEL
# ===========================================
print("Loading FinBERT model...")
model_name = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
print("FinBERT loaded successfully!")



Loading FinBERT model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

FinBERT loaded successfully!


In [12]:
# ===========================================
# STEP 5: SENTIMENT INFERENCE
# ===========================================
def get_finbert_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        labels = ["Negative", "Neutral", "Positive"]
        sentiment = labels[torch.argmax(probs)]
        confidence = probs[0][torch.argmax(probs)].item()
    return sentiment, round(confidence, 3)

In [17]:
# ===========================================
# STEP 6: ALERT LOGIC
# ===========================================
def generate_alerts(df, threshold=0.85):
    alerts = []
    for _, row in df.iterrows():
        if row["sentiment"] == "Negative" and row["confidence"] > threshold:
            alerts.append(f"🚨 Alert: {row['symbol']} sentiment NEGATIVE — '{row['text']}'")
        elif row["sentiment"] == "Positive" and row["confidence"] > threshold:
            alerts.append(f"📈 Positive buzz: {row['symbol']} — '{row['text']}'")
    return alerts

In [15]:
# ===========================================
# STEP 7: RUN ONE FULL CYCLE
# ===========================================
def run_one_cycle(symbol="AAPL"):
    print(f"\nFetching latest Stocktwits messages for {symbol}...")

In [18]:
def run_one_cycle(symbol="AAPL"):
    print(f"\n🔍 Fetching latest posts for ${symbol}...")
    messages = fetch_stocktwits(symbol)
    print(f"Fetched {len(messages)} posts.\n")

    # Extract and clean
    processed = [extract_metadata(msg) for msg in messages]

    # Sentiment
    for item in processed:
        sent, conf = get_finbert_sentiment(item["cleaned_text"])
        item["sentiment"] = sent
        item["confidence"] = conf

    df = pd.DataFrame(processed)

    print("🧾 Sentiment Summary:")
    display(df[["symbol", "cleaned_text", "sentiment", "confidence"]])

    alerts = generate_alerts(df)
    print("\n🔔 Generated Alerts:")
    if alerts:
        for a in alerts:
            print(a)
    else:
        print("No alerts triggered. (Check threshold or text)")

In [20]:
run_one_cycle("AAPL")



🔍 Fetching latest posts for $AAPL...
 Live fetch failed (403); using mock data.
Fetched 3 posts.

🧾 Sentiment Summary:


Unnamed: 0,symbol,cleaned_text,sentiment,confidence
0,AAPL,$AAPL just CRUSHED earnings Revenue beat by 8,Neutral,1.0
1,AAPL,Disappointed with $AAPL today down 3 on weak g...,Positive,1.0
2,AAPL,$AAPL holding steady Nothing new stocks trading,Negative,0.982



🔔 Generated Alerts:
📈 Positive buzz: AAPL — 'Disappointed with $AAPL today... down -3% on weak guidance '
🚨 Alert: AAPL sentiment NEGATIVE — '$AAPL holding steady. Nothing new. #stocks #trading'
