### Compare Sentiment analysis Model Accuracy with FinancialPhrase Bank DataSet

In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score
import re

# NLTK Vader download (only one time)
# try:
#     nltk.data.find('sentiment/vader.zip')
# except nltk.downloader.DownloadError:
#     nltk.download('vader_lexicon')


In [None]:

def load_and_preprocess_data(file_path):
    """
    Load and preprocess sentence, sentiment label from text file.
    """
    sentences = []
    labels = []
    with open(file_path, 'r', encoding='latin-1') as f:
        for line in f:
            line = line.strip()
            if line:
                # split sentence and sentiment by '@'
                parts = line.rsplit('@', 1)
                if len(parts) == 2:
                    sentence = parts[0].strip()
                    sentiment = parts[1].strip().lower()
                    sentences.append(sentence)
                    labels.append(sentiment)
    return sentences, labels

def convert_vader_to_sentiment(compound_score):
    """
    Convert VADER compound score to 'positive', 'negative', or 'neutral'.
    """
    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

def evaluate_vader(sentences, true_labels):
    """
    Evaluates the accuracy of the VADER model.
    """
    sid = SentimentIntensityAnalyzer()
    predicted_labels = []
    for sentence in sentences:
        scores = sid.polarity_scores(sentence)
        predicted_labels.append(convert_vader_to_sentiment(scores['compound']))

    accuracy = accuracy_score(true_labels, predicted_labels)
    return accuracy

def evaluate_bert_model(model_name, sentences, true_labels):
    """
    Evaluates the accuracy of BERT-based models (FinBERT, DistilBERT).
    """
    sentiment_pipeline = pipeline("sentiment-analysis", model=model_name, tokenizer=model_name, truncation=True, padding=True, max_length=512)

    results = sentiment_pipeline(sentences)

    predicted_labels = []
    for res in results:
        label = res['label'].lower()
        predicted_labels.append(label)

    accuracy = accuracy_score(true_labels, predicted_labels)
    return accuracy

base_path = "/Users/hwang-yejin/Desktop/Summer1/Proposal/coding/FinancialPhraseBank-v1.0/" 

# load data 
try:
    sentences_50agree, labels_50agree = load_and_preprocess_data(base_path + 'Sentences_50Agree.txt')
    sentences_66agree, labels_66agree = load_and_preprocess_data(base_path + 'Sentences_66Agree.txt')
    sentences_75agree, labels_75agree = load_and_preprocess_data(base_path + 'Sentences_75Agree.txt')
    sentences_allagree, labels_allagree = load_and_preprocess_data(base_path + 'Sentences_AllAgree.txt')
except FileNotFoundError as e:
    print(f"Error: file cannot be found - {e}. check the file path")
    exit()

print("--- Sentiment Analysis Model Accuracy Evaluation ---")

# VADER evaluation
print("\n--- VADER Model Evaluation (Sentences_AllAgree.txt) ---")
vader_accuracy_allagree = evaluate_vader(sentences_allagree, labels_allagree)
print(f"VADER Accuracy (100% Agree): {vader_accuracy_allagree:.4f}")

# FinBERT evaluation
print("\n--- FinBERT Model Evaluation (Sentences_AllAgree.txt) ---")
try:
    finbert_accuracy_allagree = evaluate_bert_model("ProsusAI/finbert", sentences_allagree, labels_allagree)
    print(f"FinBERT Accuracy (100% Agree): {finbert_accuracy_allagree:.4f}")
except Exception as e:
    print(f"Error during FinBERT evaluation: {e}")

# DistilBERT evaluation
print("\n--- DistilBERT Model Evaluation (Sentences_AllAgree.txt) ---")
try:
    distilbert_accuracy_allagree = evaluate_bert_model("distilbert-base-uncased-finetuned-sst-2-english", sentences_allagree, labels_allagree)
    print(f"DistilBERT Accuracy (100% Agree) - Note: This is a binary classification model, so there might be issues with handling 'neutral' labels: {distilbert_accuracy_allagree:.4f}")
except Exception as e:
    print(f"Error during DistilBERT evaluation: {e}")


--- Sentiment Analysis Model Accuracy Evaluation ---

--- VADER Model Evaluation (Sentences_AllAgree.txt) ---
VADER Accuracy (100% Agree): 0.5707

--- FinBERT Model Evaluation (Sentences_AllAgree.txt) ---


Device set to use mps:0


FinBERT Accuracy (100% Agree): 0.9717

--- DistilBERT Model Evaluation (Sentences_AllAgree.txt) ---


Device set to use mps:0


DistilBERT Accuracy (100% Agree) - Note: This is a binary classification model, so there might be issues with handling 'neutral' labels: 0.2584


In [None]:
print("\n--- Evaluating with Additional Datasets (Optional) ---")

# VADER Evaluation (75% Agree)
print("\n--- VADER Model Evaluation (Sentences_75Agree.txt) ---")
vader_accuracy_75agree = evaluate_vader(sentences_75agree, labels_75agree)
print(f"VADER Accuracy (75% Agree): {vader_accuracy_75agree:.4f}")

# FinBERT Evaluation (75% Agree)
print("\n--- FinBERT Model Evaluation (Sentences_75Agree.txt) ---")
try:
    finbert_accuracy_75agree = evaluate_bert_model("ProsusAI/finbert", sentences_75agree, labels_75agree)
    print(f"FinBERT Accuracy (75% Agree): {finbert_accuracy_75agree:.4f}")
except Exception as e:
    print(f"Error during FinBERT evaluation: {e}")

# DistilBERT Evaluation (75% Agree)
print("\n--- DistilBERT Model Evaluation (Sentences_75Agree.txt) ---")
try:
    distilbert_accuracy_75agree = evaluate_bert_model("distilbert-base-uncased-finetuned-sst-2-english", sentences_75agree, labels_75agree)
    print(f"DistilBERT Accuracy (75% Agree) - Note: This is a binary classification model, so there might be issues with handling 'neutral' labels: {distilbert_accuracy_75agree:.4f}")
except Exception as e:
    print(f"Error during DistilBERT evaluation: {e}")


--- Evaluating with Additional Datasets (Optional) ---

--- VADER Model Evaluation (Sentences_75Agree.txt) ---
VADER Accuracy (75% Agree): 0.5627

--- FinBERT Model Evaluation (Sentences_75Agree.txt) ---


Device set to use mps:0


FinBERT Accuracy (75% Agree): 0.9473

--- DistilBERT Model Evaluation (Sentences_75Agree.txt) ---


Device set to use mps:0


DistilBERT Accuracy (75% Agree) - Note: This is a binary classification model, so there might be issues with handling 'neutral' labels: 0.2667


### Test with one sentence(previous)

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from nltk.sentiment import SentimentIntensityAnalyzer
import torch
import nltk
nltk.download("vader_lexicon")

[nltk_data] Downloading package vader_lexicon to /Users/hwang-
[nltk_data]     yejin/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
text = "TSLA is going to explode next week. I'm all in."

# ① VADER/NLTK (Lexicon based)
vader = SentimentIntensityAnalyzer()
print("VADER:", vader.polarity_scores(text))

# ② Hugging Face distilBERT (Deep Learning, General domain)
hf = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", framework="pt")
print("Hugging Face (distilBERT):", hf(text)[0])

# ③ FinBERT (Deep Learning, Fiance domain-specialized)
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
inputs = finbert_tokenizer(text, return_tensors="pt")
outputs = finbert_model(**inputs)
probs = softmax(outputs.logits.detach().numpy()[0])
labels = ["negative", "neutral", "positive"]
print("FinBERT:", dict(zip(labels, probs)))


VADER: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


Device set to use mps:0


Hugging Face (distilBERT): {'label': 'POSITIVE', 'score': 0.9973575472831726}
FinBERT: {'negative': 0.056402043, 'neutral': 0.04639243, 'positive': 0.89720553}


In [None]:
# import pandas as pd

# file_path = "/Users/hwang-yejin/Desktop/Summer1/Proposal/coding/FinancialPhraseBank-v1.0/Sentences_AllAgree.txt"

# with open(file_path, "r", encoding="latin-1") as f:
#     lines = f.readlines()

# data = []
# for line in lines:
#     if "@" in line:
#         sentence, label = line.strip().rsplit("@", 1)
#         data.append({"text": sentence.strip(), "label": label.strip().lower()})
# df = pd.DataFrame(data)
# print(df.head())

                                                text     label
0  According to Gran , the company has no plans t...   neutral
1  For the last quarter of 2010 , Componenta 's n...  positive
2  In the third quarter of 2010 , net sales incre...  positive
3  Operating profit rose to EUR 13.1 mn from EUR ...  positive
4  Operating profit totalled EUR 21.1 mn , up fro...  positive
