In [5]:
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from preprocess import FinanceNews, TwitterNews, merge_df
from bert.model import Finbert
# Assuming merge_df, FinanceNews, TwitterNews, and Finbert are defined as in your example.

finance = "../dataset/sentiment/all-data.csv"
twitter_train = "../dataset/twitter/sent_train.csv"
twitter_test = "../dataset/twitter/sent_valid.csv"

# Step 1: Load data
finance_news = FinanceNews(finance)
twitter_news = TwitterNews(twitter_train, twitter_test)

# Merge datasets (if you want to combine the two)
merged_data = merge_df(finance_news.train, twitter_news.train)

# Step 2: Initialize the FinBERT model
model = Finbert()

# Step 3: Run inference on the merged data
texts = merged_data['text'].tolist()  # Extract text column as list
true_labels = merged_data['label'].tolist()  # Extract true labels

# Get predictions
predictions, _ = model.infernece(texts)

# Convert predictions to numerical labels (assuming model gives a label from 0, 1, 2)
predicted_labels = []
for prediction in predictions:
    predicted_labels.append(prediction['label'])

# Map the predicted sentiment (positive/neutral/negative) to numerical labels
sentiment_mapping = {
    'positive': 2.0,
    'neutral': 1.0,
    'negative': 0.0
}

predicted_numerical_labels = [sentiment_mapping[label] for label in predicted_labels]

# Step 4: Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_numerical_labels)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Optional: Classification Report
print("Classification Report:")
print(classification_report(true_labels, predicted_numerical_labels))


Device set to use mps:0
