In [4]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Initialize FinBERT model and tokenizer
model_name = "yiyanghkust/finbert-tone"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

#opinion mining function
def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=1)[0]
    labels = ['negative', 'neutral', 'positive']
    sentiment = labels[torch.argmax(probabilities)]
    confidence = probabilities[torch.argmax(probabilities)].item()
    return sentiment, confidence

#file paths
file_paths = {
    "reddit_data": r"C:\Users\merci\Documents\SNU\capstone\dataset_opinion_mining\reddit\preprocessed_reddit_amazon.csv",
    "newsapi_general": r"C:\Users\merci\Documents\SNU\capstone\dataset_opinion_mining\news_api_general\preprocessed_amazon_news.csv",
    "newsapi_finance": r"C:\Users\merci\Documents\SNU\capstone\dataset_opinion_mining\news_api_finance\preprocessed_amazon_news_fin.csv"
}

# DataFrame to store all results
all_data = pd.DataFrame()

for name, file_path in file_paths.items():
    
    data = pd.read_csv(file_path)

   
    data['sentiment'], data['confidence'] = zip(*data['processed_text'].apply(get_sentiment))

    data = data[['processed_text', 'sentiment', 'confidence']]

    all_data = pd.concat([all_data, data], ignore_index=True)

output_path = r"C:\Users\merci\Documents\SNU\capstone\sentiment_analysis_results_amazon.csv"
all_data.to_csv(output_path, index=False)
print(f"Sentiment analysis complete. Results saved to '{output_path}'")


Sentiment analysis complete. Results saved to 'C:\Users\merci\Documents\SNU\capstone\sentiment_analysis_results_amazon.csv'
