In [1]:
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from transformers import pipeline



In [2]:
!pip install GoogleNews


Collecting GoogleNews
  Downloading GoogleNews-1.6.15-py3-none-any.whl.metadata (4.5 kB)
Collecting dateparser (from GoogleNews)
  Downloading dateparser-1.2.2-py3-none-any.whl.metadata (29 kB)
Downloading GoogleNews-1.6.15-py3-none-any.whl (8.8 kB)
Downloading dateparser-1.2.2-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dateparser, GoogleNews
Successfully installed GoogleNews-1.6.15 dateparser-1.2.2


In [3]:
from GoogleNews import GoogleNews


In [4]:
from collections import Counter
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", tokenizer="ProsusAI/finbert")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


In [5]:
def compute_rsi(series, period=14):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def compute_macd(series, fast=12, slow=26, signal=9):
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    hist = macd - signal_line
    return macd, signal_line, hist

def compute_bollinger(series, period=20, stddev=2):
    sma = series.rolling(window=period).mean()
    std = series.rolling(window=period).std()
    upper = sma + stddev * std
    lower = sma - stddev * std
    return lower, sma, upper


In [6]:
def analyze_earnings_event(ticker, company_name, earnings_date_str):
    import re
    from collections import Counter
    from GoogleNews import GoogleNews
    import yfinance as yf
    import pandas as pd

    earnings_date = pd.to_datetime(earnings_date_str)

    # --- 1. Earnings Call Tone (FinBERT) ---
    with open(f"{ticker}_transcript.txt", "r", encoding="utf-8") as f:
        transcript = f.read()

    sentences = re.split(r'(?<=[.!?])\s+', transcript)
    tone = finbert(sentences[:100])  # run in chunks if larger
    call_summary = Counter([r['label'] for r in tone])

    # --- 2. News Sentiment (GoogleNews + FinBERT) ---
    gn = GoogleNews(lang='en')
    gn.set_time_range(
        (earnings_date - pd.Timedelta(days=3)).strftime("%m/%d/%Y"),
        (earnings_date + pd.Timedelta(days=3)).strftime("%m/%d/%Y")
    )
    gn.search(f"{company_name} earnings")
    headlines = [r['title'] for r in gn.results()]
    news_tone = finbert(headlines)
    news_summary = Counter([r['label'] for r in news_tone])

    # --- 3. Chart Trends (Indicators + Price Movement) ---
    data = yf.Ticker(ticker).history(
        start=(earnings_date - pd.Timedelta(days=10)).strftime("%Y-%m-%d"),
        end=(earnings_date + pd.Timedelta(days=5)).strftime("%Y-%m-%d")
    ).reset_index()

    # Remove timezone to avoid comparison errors
    data['Date'] = pd.to_datetime(data['Date']).dt.tz_localize(None)

    delta = data['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data['RSI'] = 100 - (100 / (1 + rs))
    # SMA
    data['SMA_20'] = data['Close'].rolling(window=20).mean()
    data['EMA12'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['EMA26'] = data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA12'] - data['EMA26']
    data['MACD_Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()
    data['MACD_Crossover'] = (
        (data['MACD'] > data['MACD_Signal']) &
        (data['MACD'].shift(1) <= data['MACD_Signal'].shift(1))
    )

    # Bollinger Bands
   # Bollinger Bands
    rolling_mean = data['Close'].rolling(window=20).mean()
    rolling_std = data['Close'].rolling(window=20).std()
    data['BB_Lower'] = rolling_mean - 2 * rolling_std
    data['BB_Mid'] = rolling_mean
    data['BB_Upper'] = rolling_mean + 2 * rolling_std

    # --- Focus on Earnings Date Row ---
    focus_df = data[data['Date'] == earnings_date]
    if not focus_df.empty:
        focus = focus_df.iloc[0]
    else:
        fallback_df = data[data['Date'] < earnings_date]
        focus = fallback_df.iloc[-1] if not fallback_df.empty else data.iloc[-1]

    # --- Price Movement ---
    before_data = data[data['Date'] < earnings_date]
    after_data = data[data['Date'] > earnings_date]

    before = before_data.iloc[-1]['Close'] if not before_data.empty else None
    after_1 = after_data.iloc[0]['Close'] if len(after_data) > 0 else None
    after_3 = after_data.iloc[2]['Close'] if len(after_data) > 2 else None

    if None in [before, after_1, after_3]:
        change_1 = change_3 = None
    else:
        change_1 = round((after_1 - before) / before * 100, 2)
        change_3 = round((after_3 - before) / before * 100, 2)

    # --- Chart Sentiment ---
    chart_signal = {
        'RSI': 'Overbought' if focus['RSI'] > 70 else 'Oversold' if focus['RSI'] < 30 else 'Neutral',
        'Trend': 'Bullish' if focus['Close'] > focus['SMA_20'] else 'Bearish',
        'MACD_Crossover': bool(focus['MACD_Crossover']),
        'Price_Change_1D': change_1,
        'Price_Change_3D': change_3
    }

    return {
        'Ticker': ticker,
        'Company': company_name,
        'Earnings_Date': earnings_date_str,
        'Call_Tone': dict(call_summary),
        'News_Tone': dict(news_summary),
        'Chart_Signal': chart_signal
    }



In [8]:
from pprint import pprint

events = [
    ("AAPL", "Apple", "2025-05-01"),
    ("MSFT", "Microsoft", "2025-04-30"),
    ("TSLA", "Tesla", "2025-04-25")
]

results = [analyze_earnings_event(*e) for e in events]
for r in results:
    pprint(r)


{'Call_Tone': {'negative': 3, 'neutral': 54, 'positive': 43},
 'Chart_Signal': {'MACD_Crossover': False,
                  'Price_Change_1D': None,
                  'Price_Change_3D': None,
                  'RSI': 'Neutral',
                  'Trend': 'Bearish'},
 'Company': 'Apple',
 'Earnings_Date': '2025-05-01',
 'News_Tone': {'negative': 6, 'neutral': 2, 'positive': 2},
 'Ticker': 'AAPL'}
{'Call_Tone': {'neutral': 65, 'positive': 35},
 'Chart_Signal': {'MACD_Crossover': False,
                  'Price_Change_1D': None,
                  'Price_Change_3D': None,
                  'RSI': 'Neutral',
                  'Trend': 'Bearish'},
 'Company': 'Microsoft',
 'Earnings_Date': '2025-04-30',
 'News_Tone': {'negative': 2, 'neutral': 8},
 'Ticker': 'MSFT'}
{'Call_Tone': {'negative': 4, 'neutral': 82, 'positive': 14},
 'Chart_Signal': {'MACD_Crossover': False,
                  'Price_Change_1D': None,
                  'Price_Change_3D': None,
                  'RSI': 'Neutral',
   

In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Example: Your JSON-style analysis results
results = [
    {
        'Ticker': 'AAPL',
        'Company': 'Apple',
        'Earnings_Date': '2025-05-01',
        'Call_Tone': {'negative': 3, 'neutral': 54, 'positive': 43},
        'News_Tone': {'negative': 6, 'neutral': 2, 'positive': 2},
        'Chart_Signal': {
            'MACD_Crossover': False,
            'Price_Change_1D': 1.5,     # <— You need to FILL this in manually for now
            'Price_Change_3D': -2.3,    # <— This is your target for prediction
            'RSI': 'Neutral',
            'Trend': 'Bearish'
        }
    },
    {
        'Ticker': 'MSFT',
        'Company': 'Microsoft',
        'Earnings_Date': '2025-04-30',
        'Call_Tone': {'neutral': 65, 'positive': 35},
        'News_Tone': {'negative': 2, 'neutral': 8},
        'Chart_Signal': {
            'MACD_Crossover': False,
            'Price_Change_1D': 0.9,
            'Price_Change_3D': 2.1,
            'RSI': 'Neutral',
            'Trend': 'Bearish'
        }
    },
    {
        'Ticker': 'TSLA',
        'Company': 'Tesla',
        'Earnings_Date': '2025-04-25',
        'Call_Tone': {'negative': 4, 'neutral': 82, 'positive': 14},
        'News_Tone': {'negative': 9, 'neutral': 1},
        'Chart_Signal': {
            'MACD_Crossover': False,
            'Price_Change_1D': 1.5,     # <— You need to FILL this in manually for now
            'Price_Change_3D': -2.3,    # <— This is your target for prediction
            'RSI': 'Neutral',
            'Trend': 'Bearish'
        }
    }# ... add Tesla and others the same way

]  # replace with your actual results list

# Convert to DataFrame
rows = []
for r in results:
    if r['Chart_Signal']['Price_Change_3D'] is None:
        continue  # skip incomplete records

    row = {
        'Ticker': r['Ticker'],
        'Earnings_Date': r['Earnings_Date'],

        # Call Tone
        'Call_Pos': r['Call_Tone'].get('positive', 0),
        'Call_Neut': r['Call_Tone'].get('neutral', 0),
        'Call_Neg': r['Call_Tone'].get('negative', 0),

        # News Tone
        'News_Pos': r['News_Tone'].get('positive', 0),
        'News_Neut': r['News_Tone'].get('neutral', 0),
        'News_Neg': r['News_Tone'].get('negative', 0),

        # Chart signals
        'RSI': 50 if r['Chart_Signal']['RSI'] == 'Neutral' else (80 if r['Chart_Signal']['RSI'] == 'Overbought' else 20),
        'Trend': 1 if r['Chart_Signal']['Trend'] == 'Bullish' else 0,
        'MACD_Crossover': int(r['Chart_Signal']['MACD_Crossover']),

        # Target
        'Target': 1 if r['Chart_Signal']['Price_Change_3D'] > 0 else 0
    }
    rows.append(row)

df = pd.DataFrame(rows)

# Features & Target
X = df.drop(['Ticker', 'Earnings_Date', 'Target'], axis=1)
y = df['Target']

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

