In [1]:
# Import libraries

import requests
import json
import os 
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [13]:
# Collect and parse data from the NYT API

load_dotenv()

api_key = os.getenv("NYT_API_KEY")
section = "world" 

base_url = "https://api.nytimes.com/svc/topstories/v2"
endpoint = f"/{section}.json"
api_url = base_url + endpoint

params = {"api-key": api_key}

try:
    response = requests.get(api_url, params=params)
    response.raise_for_status()

    data = response.json()

    if data and data.get('status') == 'OK' and data.get('results'):
        article_texts = []
        articles = data['results']
        for article in articles:
            title = article.get('title')
            abstract = article.get('abstract')
            if title and abstract:
                article_texts.append(f"{article['title']}: {article['abstract']}")
        
    else:
        print("Failed to retrieve top stories data.")
        if data.get('fault'):
            print(f"Error Message: {data['fault']['faultstring']}")

except requests.exceptions.RequestException as e:
    print(f"Error fetching the API: {e}")
except json.JSONDecodeError:
    print("Error decoding the JSON response.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [15]:
# Call model and predict sentiment

model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def predict_sentiment(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}
    return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]

for text, sentiment in zip(article_texts, predict_sentiment(article_texts)):
    if sentiment in ("Positive","Very Positive"):
        print(f"Text: {text}\nSentiment: {sentiment}\n")



Text: Trump Showed His Pain Point in His Standoff With China: Xi Jinping, who rules with absolute authority, has shown he is willing to let the Chinese people endure hardship. President Trump revealed he has limits.
Sentiment: Very Positive

Text: World’s Friendliest Superpower? As Trump Upends Globe, Europe Positions Itself.: Ursula von der Leyen is trying to ensure that if the international trading system is remade, the E.U. is at the center of what comes next.
Sentiment: Very Positive

Text: African Breakaway State Offers U.S. a Chance to Stick It to China: After years of building relationships with congressional Republicans and conservative think tanks, officials in Somaliland believe President Trump will grant their ultimate wish: statehood.
Sentiment: Very Positive

Text: Pope Makes a Surprise Visit — and Style Statement — in St. Peter’s: Appearing publicly in street clothes for the first time as pontiff, Pope Francis went into St. Peter’s Basilica to pray and greet the faithful.

In progress below: prepping data to try transfer learning on the pre-trained model used above

In [52]:
import pandas as pd 
data = pd.read_csv("data/news_sentiment_analysis.csv")
# Data from: https://www.kaggle.com/datasets/clovisdalmolinvieira/news-sentiment-analysis/data

In [53]:
data = data[["Title","Description","Sentiment"]]

In [None]:
data.head()

Unnamed: 0,Title,Description,Sentiment
0,Pine View High teacher wins Best in State awar...,"ST. GEORGE — Kaitlyn Larson, a first-year teac...",positive
1,Businesses Face Financial Strain Amid Liquidit...,"Harare, Zimbabwe – Local businesses are grappl...",neutral
2,Musk donates to super pac working to elect Tru...,(marketscreener.com) Billionaire Elon Musk has...,positive
3,US FTC issues warning to franchisors over unfa...,(marketscreener.com) A U.S. trade regulator on...,negative
4,Rooftop solar's dark side,4.5 million households in the U.S. have solar ...,positive


In [None]:
# Change labels from strings to integers

mapping = {
    'positive': 2,
    'negative': 0,
    'neutral': 1,
}

data["Sentiment_num"] = data["Sentiment"].map(mapping)
print("\nDataFrame after mapping:")
data.head()



DataFrame after mapping:


Unnamed: 0,Title,Description,Sentiment,Sentiment_num
0,Pine View High teacher wins Best in State awar...,"ST. GEORGE — Kaitlyn Larson, a first-year teac...",positive,2
1,Businesses Face Financial Strain Amid Liquidit...,"Harare, Zimbabwe – Local businesses are grappl...",neutral,1
2,Musk donates to super pac working to elect Tru...,(marketscreener.com) Billionaire Elon Musk has...,positive,2
3,US FTC issues warning to franchisors over unfa...,(marketscreener.com) A U.S. trade regulator on...,negative,0
4,Rooftop solar's dark side,4.5 million households in the U.S. have solar ...,positive,2
