In [1]:
from transformers import AutoTokenizer, RobertaForSequenceClassification
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from datasets import load_from_disk
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


# Combined news MUST be processed before 
### execute  `python preprocess-combined-news.py`

In [2]:
data = load_from_disk("data/processed/")

In [3]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig

# 1. Setup and Initialization
# Use the financial sentiment fine-tuned checkpoint as requested.
MODEL_NAME = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"

# Load the model with its full head structure (sequence classification) to access the pre-trained weights.
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
roberta_model = RobertaForSequenceClassification.from_pretrained(MODEL_NAME)
config = AutoConfig.from_pretrained(MODEL_NAME)

# The model's sentiment mapping is directly available in the config
SENTIMENT_MAPPING = config.id2label

# The DistilRoBERTa model uses 768 dimensions.
EMBEDDING_DIM = roberta_model.config.hidden_size 
NUM_SENTIMENT_CLASSES = roberta_model.config.num_labels # Should be 3

print(f"Loaded Full Pre-trained Model with Head (Input Dim: {EMBEDDING_DIM}, Output Classes: {NUM_SENTIMENT_CLASSES})")
print(f"Classification Head Structure (Weights used for prediction): {roberta_model.classifier}")



Loaded Full Pre-trained Model with Head (Input Dim: 768, Output Classes: 3)
Classification Head Structure (Weights used for prediction): RobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=3, bias=True)
)


In [4]:
def apply_pretrained_head(daily_vector, model):
    """
    Manually passes the custom daily vector (V_D) through the pre-trained classification head
    of the RoBERTaForSequenceClassification model.
    """
    # The classification head is usually wrapped in a 'classifier' module
    classifier = model.classifier
    
    # 1. Dense Layer (Linear)
    x = classifier.dense(daily_vector)
    # 2. Activation (Tanh)
    x = torch.tanh(x)
    # 3. Dropout (usually disabled in eval/no_grad mode, but kept for structure)
    x = classifier.dropout(x)
    # 4. Output Projection (Final Linear layer)
    logits = classifier.out_proj(x)
    
    return logits

In [8]:
row = 0
vd = data[row]["merged_embeddings"]


In [9]:

# --- Run the Pipeline ---
try:
    # Set the model to evaluation mode
    roberta_model.eval()

    # 1. Get the final Daily News Vector (V_D)
    # daily_vector_Vd = get_daily_vector(news_items, importance_scores)
    daily_vector_Vd = vd

    # 2. Apply the existing pre-trained head to V_D
    # This step uses the weights from the loaded checkpoint.
    with torch.no_grad():
        logits = apply_pretrained_head(daily_vector_Vd, roberta_model)
    
    print(logits)

    # Convert logits to probabilities (Log Softmax for sentiment)
    log_probabilities = F.log_softmax(logits, dim=1)
    probabilities = torch.exp(log_probabilities)
    
    # Find the predicted class index (0, 1, or 2)
    predicted_index = torch.argmax(probabilities, dim=1).item()
    predicted_sentiment = SENTIMENT_MAPPING[predicted_index]

    print("\n--- Sentiment Prediction Result ---")
    print(f"Input V_D shape to pre-trained head: {daily_vector_Vd.shape}")
    print("Probabilities (Labels from Checkpoint Config):")
    
    # Print probabilities using the config mapping
    for i in range(NUM_SENTIMENT_CLASSES):
        label = SENTIMENT_MAPPING[i]
        prob = probabilities[0, i].item()
        print(f"  {label}: {prob:.4f}")
        
    print(f"\nFinal Predicted Daily Sentiment: {predicted_sentiment}")

except Exception as e:
    print(f"An error occurred: {e}")


tensor([[-1.3175,  4.3451, -2.7907]])

--- Sentiment Prediction Result ---
Input V_D shape to pre-trained head: torch.Size([1, 768])
Probabilities (Labels from Checkpoint Config):
  negative: 0.0035
  neutral: 0.9957
  positive: 0.0008

Final Predicted Daily Sentiment: neutral
