In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
# Load Pre-Trained Financial Model (FinBERT)

# Pre-trained on hugging face for general financial news
model_name = "yiyanghkust/finbert-tone"  
tokenizer = BertTokenizer.from_pretrained(model_name)
# Positive, Neutral, Negative
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)  

# Do not forget to turn on T4 on Colab
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load Tesla Earnings Call Dataset
# Assume we have a CSV with Tesla earnings call text & stock movement
# Sample dataset in the structure (Text, Label)
df = pd.read_csv("tesla_earnings_calls.csv")  


In [None]:
# Encode Labels: Positive (2), Neutral (1), Negative (0)
df['label'] = df['label'].map({'positive': 2, 'neutral': 1, 'negative': 0})

#Split Data (80% Train, 20% Test)
train_texts, val_texts, train_labels, val_labels = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)



In [None]:
#  Create Custom Dataset Class with text, labels
class TeslaEarningsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512):
        self.texts = texts.tolist()
        self.labels = labels.tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=self.max_len, return_tensors="pt")
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

In [None]:
#Create train and validation datasets
train_dataset = TeslaEarningsDataset(train_texts, train_labels, tokenizer)
val_dataset = TeslaEarningsDataset(val_texts, val_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [None]:
#  Define Optimizer & Scheduler
optimizer = optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

# Warmup + Step Decay Scheduler
def lr_lambda(epoch):
    return 1 if epoch < 5 else 0.5 ** ((epoch - 5) // 5)

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [None]:
# Fine-Tuning Loop
criterion = nn.CrossEntropyLoss()
num_epochs = 10
gradient_accumulation_steps = 4  


In [None]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for step, batch in enumerate(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = criterion(outputs.logits, batch['labels']) / gradient_accumulation_steps
        loss.backward()
        total_loss += loss.item()

        if (step + 1) % gradient_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
            optimizer.step()
            optimizer.zero_grad()

    scheduler.step()
    print(f"Epoch {epoch+1} | LR: {optimizer.param_groups[0]['lr']:.6e} | Loss: {total_loss:.4f}")

In [None]:
# Evaluate on Tesla's Earnings Calls
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        preds = torch.argmax(outputs.logits, dim=1)
        correct += (preds == batch['labels']).sum().item()
        total += batch['labels'].size(0)

print(f"📈 Accuracy on Tesla Earnings Calls: {correct / total:.2%}")