In [None]:
# Import libraries

import requests
import json
import os 
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline
import torch

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [3]:
# Collect and parse data from the NYT API

load_dotenv()

api_key = os.getenv("NYT_API_KEY")
section = "opinion" 

base_url = "https://api.nytimes.com/svc/topstories/v2"
endpoint = f"/{section}.json"
api_url = base_url + endpoint

params = {"api-key": api_key}

try:
    response = requests.get(api_url, params=params)
    response.raise_for_status()

    data = response.json()

    if data and data.get('status') == 'OK' and data.get('results'):
        article_texts = []
        articles = data['results']
        for article in articles:
            title = article.get('title')
            abstract = article.get('abstract')
            if title and abstract:
                article_texts.append(f"{article['title']}: {article['abstract']}")
        
    else:
        print("Failed to retrieve top stories data.")
        if data.get('fault'):
            print(f"Error Message: {data['fault']['faultstring']}")

except requests.exceptions.RequestException as e:
    print(f"Error fetching the API: {e}")
except json.JSONDecodeError:
    print("Error decoding the JSON response.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [4]:
# Call model and predict sentiment

model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def predict_sentiment(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}
    return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]

for text, sentiment in zip(article_texts, predict_sentiment(article_texts)):
    if sentiment in ("Positive","Very Positive"):
        print(f"Text: {text}\nSentiment: {sentiment}\n")



Text: Pope Francis Was a Champion, if an Imperfect One, for L.G.B.T.Q. People: He helped L.G.B.T.Q. Catholics feel more at home in their church. And that also meant that their families and friends also felt more at home.
Sentiment: Very Positive

Text: Francis and the End of the Imperial Papacy: Papal weakness has also opened up other possibilities for Christian and Catholic witness.
Sentiment: Very Positive

Text: How Francis Changed the Symbols of a Pope’s Funeral: In life and death, Francis wanted the symbols of his papacy to be humbler.
Sentiment: Very Positive

Text: Another Blackout Forces the Question of Puerto Rico’s Political Future: For over half a century, the island’s commonwealth status was justified by promises of security, stability and the material comforts of modern life.
Sentiment: Very Positive

Text: The New Science of Aging Can Predict Your Future: A new era of medical care is upon us.
Sentiment: Very Positive



In progress below: figuring out the code for sending the automated email messages.

In [None]:
import smtplib
from email.mime.text import MIMEText

SMTP_SERVER = "sandbox.smtp.mailtrap.io"
SMTP_PORT = 465  # 465 for SSL

USERNAME = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")

sender_email = os.getenv("EMAIL")
receiver_email = os.getenv("EMAIL")
subject = "The Good News Project"
body = "Hello, this is a test email!"

message = MIMEText(body, "plain")
message["Subject"] = subject
message["From"] = sender_email
message["To"] = receiver_email

# Open secure connection and send the email
with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as server:
    server.starttls()
    server.login(USERNAME, PASSWORD)
    server.sendmail(sender_email, receiver_email, message.as_string())

print("Email sent successfully!")

In progress below: prepping data to try transfer learning on the pre-trained model used above.

In [25]:
import pandas as pd 
data = pd.read_csv("data/news_sentiment_analysis.csv")
# Data from: https://www.kaggle.com/datasets/clovisdalmolinvieira/news-sentiment-analysis/data

In [26]:
data = data[["Title","Description","Sentiment"]]

In [27]:
data.head()

Unnamed: 0,Title,Description,Sentiment
0,Pine View High teacher wins Best in State awar...,"ST. GEORGE — Kaitlyn Larson, a first-year teac...",positive
1,Businesses Face Financial Strain Amid Liquidit...,"Harare, Zimbabwe – Local businesses are grappl...",neutral
2,Musk donates to super pac working to elect Tru...,(marketscreener.com) Billionaire Elon Musk has...,positive
3,US FTC issues warning to franchisors over unfa...,(marketscreener.com) A U.S. trade regulator on...,negative
4,Rooftop solar's dark side,4.5 million households in the U.S. have solar ...,positive


In [28]:
data["News"] = data["Title"] + ": " + data["Description"]

# Change labels from strings to integers
mapping = {
    'positive': 2,
    'negative': 0,
    'neutral': 1,
}

data["Label"] = data["Sentiment"].map(mapping)
data.head()


Unnamed: 0,Title,Description,Sentiment,News,Label
0,Pine View High teacher wins Best in State awar...,"ST. GEORGE — Kaitlyn Larson, a first-year teac...",positive,Pine View High teacher wins Best in State awar...,2
1,Businesses Face Financial Strain Amid Liquidit...,"Harare, Zimbabwe – Local businesses are grappl...",neutral,Businesses Face Financial Strain Amid Liquidit...,1
2,Musk donates to super pac working to elect Tru...,(marketscreener.com) Billionaire Elon Musk has...,positive,Musk donates to super pac working to elect Tru...,2
3,US FTC issues warning to franchisors over unfa...,(marketscreener.com) A U.S. trade regulator on...,negative,US FTC issues warning to franchisors over unfa...,0
4,Rooftop solar's dark side,4.5 million households in the U.S. have solar ...,positive,Rooftop solar's dark side: 4.5 million househo...,2


In [32]:
data.drop(["Sentiment","Title","Description"],axis=1,inplace=True)

In [33]:
data

Unnamed: 0,News,Label
0,Pine View High teacher wins Best in State awar...,2
1,Businesses Face Financial Strain Amid Liquidit...,1
2,Musk donates to super pac working to elect Tru...,2
3,US FTC issues warning to franchisors over unfa...,0
4,Rooftop solar's dark side: 4.5 million househo...,2
...,...,...
3495,"Arrow Electronics, Inc. (NYSE:ARW) Shares Purc...",2
3496,"3,120 Shares in NICE Ltd. (NASDAQ:NICE) Bought...",2
3497,"QRG Capital Management Inc. Has $857,000 Stock...",2
3498,Biotechnology Market: Surging Investments and ...,1


In [30]:
# Imbalanced dataset
data['Label'].value_counts()

2    2134
1     789
0     577
Name: Label, dtype: int64

To clean here the dataset to be used for transfer learning.

In [None]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    report_to=[]
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')


    if len(np.unique(labels)) > 2:
        roc_auc = None
    else:
        roc_auc = roc_auc_score(labels, pred.predictions[:, 1])

    return {
        'accuracy': accuracy,
        'f1': f1,
        'roc_auc': roc_auc
    }


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics = compute_metrics
)