## Sentiment analysis

In [25]:
import mysql.connector
from transformers import pipeline
import numpy as np
from credentials import ipCred, usernameCred, passwordCred, databaseCred

# Use GPU if available (set device=0) or CPU with device=-1.
# Adjust this parameter based on your environment.
classifier = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=-1)

ticker = 'INTC'
db_config = {
    'host': ipCred,
    'user': usernameCred,
    'password': passwordCred,
    'database': databaseCred
}

conn = mysql.connector.connect(**db_config)

# --- Cursor A: Fetch rows that need sentiment scores ---
cursor_fetch = conn.cursor()
fetch_query = f"""
    SELECT news_id, summary
    FROM {ticker}_news
    WHERE sentiment IS NULL
    LIMIT 10000;
"""
cursor_fetch.execute(fetch_query)
rows = cursor_fetch.fetchall()
cursor_fetch.close()  # Close fetch cursor

# If no rows, exit early
if not rows:
    print("No rows to update.")
    conn.close()
    exit()

# Separate out the IDs and summaries (filter out any empty summaries)
id_summary_pairs = [(news_id, summary) for news_id, summary in rows if summary]
if not id_summary_pairs:
    print("No valid summaries found.")
    conn.close()
    exit()

# Unzip the list into separate lists
news_ids, summaries = zip(*id_summary_pairs)

# Process summaries in batch for efficiency.
# You might adjust the batch size if you have a lot of data.
batch_size = 32
results = []
for i in range(0, len(summaries), batch_size):
    batch = list(summaries[i:i+batch_size])
    # The pipeline can process a list of strings.
    batch_results = classifier(batch, truncation=True)
    results.extend(batch_results)

# --- Cursor B: Update rows with sentiment scores ---
cursor_update = conn.cursor()
update_query = f"""
    UPDATE {ticker}_news
    SET sentiment = %s
    WHERE news_id = %s
"""

# FinBERT returns a dict per input.
# For each result, compute bipolar sentiment: positive minus negative.
for news_id, result in zip(news_ids, results):
    # The classifier might return a list of results for each input;
    # FinBERT usually returns one result per input, so we take the first.
    r = result[0] if isinstance(result, list) else result
    label = r['label'].upper()
    score = r['score']
    # Compute bipolar sentiment: if positive, score is positive; if negative, negative.
    # For neutral, we assume a score of 0.
    if label == 'POSITIVE':
        sentiment_score = score
    elif label == 'NEGATIVE':
        sentiment_score = -score
    else:
        sentiment_score = 0.0

    cursor_update.execute(update_query, (sentiment_score, news_id))

conn.commit()
cursor_update.close()
conn.close()

print("Sentiment scores updated successfully.")

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Sentiment scores updated successfully.
