## Step 2: Performing sentiment analysis on news headings

In [None]:
!pip install torch




[notice] A new release of pip is available: 24.1.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import json
import torch
from sklearn.metrics import precision_score, recall_score, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


## USING BERT MODEL

### 2.1. Importing a sentiment model from Huggingface

In [2]:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"

tokenizer = AutoTokenizer.from_pretrained(model_name, force_download=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, force_download=True)


### 2.2. Writing functions to calculate average sentiment for each day

In [3]:
def calculateDailySentiment(headlines):
    texts = [headline['headline'] for headline in headlines]
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, return_attention_mask=True)
    outputs = model(**inputs)
    logits = outputs.logits
    scores = logits.softmax(dim=1)
    averageScore = scores.mean(dim=0).tolist()
    return averageScore
#enddef

def analyzeAndSaveSentiment(inputFile, outputFile):
    with open(inputFile, 'r') as file:
        data = json.load(file)
    #endwith

    result = {}

    for date, headlines in data.items():
        averageScore = calculateDailySentiment(headlines)
        print(f"{date} > {averageScore}")
        result[date] = averageScore
    #endfor

    with open(outputFile, 'w') as outputFile:
        json.dump(result, outputFile, indent=2)
    #endwith
#enddef


### 2.3. Getting the news heading and outputting the sentiment score in JSON format
- Score for each day is saved in the file ([daily_scores.json](./data/news2023/daily_scores.json))

In [4]:
bertinputJsonFile = './data/news/Headlines.json'
bertoutputJsonFile = './data/news/bert_daily_scores.json'

analyzeAndSaveSentiment(bertinputJsonFile, bertoutputJsonFile)

2015-01-01 > [0.08557845652103424, 0.05136130377650261, 0.07145218551158905, 0.17430974543094635, 0.6172983050346375]
2015-01-02 > [0.03513030707836151, 0.04333778843283653, 0.07190804928541183, 0.2649349272251129, 0.5846889019012451]
2015-01-03 > [0.13386961817741394, 0.08151617646217346, 0.08502551913261414, 0.21400311589241028, 0.48558560013771057]
2015-01-04 > [0.014707638882100582, 0.01659909263253212, 0.061063289642333984, 0.3243460953235626, 0.5832838416099548]
2015-01-05 > [0.009732341393828392, 0.010922754183411598, 0.03575123846530914, 0.2549060583114624, 0.6886875629425049]
2015-01-06 > [0.02793833240866661, 0.024906989187002182, 0.062337517738342285, 0.2682053744792938, 0.6166117787361145]
2015-01-07 > [0.208579421043396, 0.09835243225097656, 0.08405030518770218, 0.22933156788349152, 0.37968623638153076]
2015-01-08 > [0.046814270317554474, 0.06283639371395111, 0.09586086124181747, 0.2833739221096039, 0.5111145377159119]
2015-01-09 > [0.010802625678479671, 0.0134716378524899

## USING FINBERT

### 2.4. Importing a sentiment model from Huggingface

In [4]:
finbert_model_name = "yiyanghkust/finbert-tone"

# Force download to avoid cache corruption
finbert_tokenizer = AutoTokenizer.from_pretrained(finbert_model_name, force_download=True)
finbert_model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name, force_download=True)

print("FinBERT model and tokenizer loaded successfully!")

FinBERT model and tokenizer loaded successfully!


### 2.5. Writing functions to calculate average sentiment for each day

In [None]:
def calculateDailySentiment(headlines):
    texts = [headline['headline'] for headline in headlines]
    
    inputs = finbert_tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, return_attention_mask=True)  # Highlight: Changed variable name
    
    outputs = finbert_model(**inputs)  
    logits = outputs.logits
    
    scores = logits.softmax(dim=1)
    
    average_score = scores.mean(dim=0).tolist()  
    
    return average_score 

def analyzeAndSaveSentiment(inputFile, outputFile):
    
    with open(inputFile, 'r') as file:
        data = json.load(file)

    result = {}

    for date, headlines in data.items():
        average_score = calculateDailySentiment(headlines) 
        print(f"{date} > {average_score}")  
        result[date] = average_score  

    
    with open(outputFile, 'w') as outputFile:
        json.dump(result, outputFile, indent=2)


In [None]:
fininputJsonFile = './data/news/Headlines.json'
finoutputJsonFile = './data/news/finbert_daily_scores.json'

analyzeAndSaveSentiment(fininputJsonFile, finoutputJsonFile)

2015-01-01 > [0.0001529758592369035, 0.9995682835578918, 0.00027867170865647495]
2015-01-02 > [0.02550957165658474, 0.974485456943512, 4.939814516546903e-06]
2015-01-03 > [8.384435204789042e-06, 0.9999915361404419, 1.5136204467580683e-07]
2015-01-04 > [0.3102041482925415, 0.6897690892219543, 2.6795607482199557e-05]
2015-01-05 > [3.043868768148883e-10, 1.0, 8.75061356708784e-09]
2015-01-06 > [6.979316094657406e-05, 0.9999294877052307, 7.201252856248175e-07]
2015-01-07 > [0.33314192295074463, 0.33345088362693787, 0.3334071636199951]
2015-01-08 > [0.02803081087768078, 0.9719678163528442, 1.4086575674809865e-06]
2015-01-09 > [0.0001222361606778577, 0.9998772740364075, 4.943545377500413e-07]
2015-01-10 > [5.358609111993928e-09, 1.0, 3.805414650059902e-09]
2015-01-11 > [7.833857671357691e-05, 0.9999209046363831, 7.71650547903846e-07]
2015-01-12 > [0.06210513785481453, 0.937862753868103, 3.205702523700893e-05]
2015-01-13 > [4.638524387701182e-06, 0.999995231628418, 1.7174987476664683e-07]
201