In [None]:
import os
import json
import time
import requests
import datetime
import dateutil
import pandas as pd
from dateutil.relativedelta import relativedelta
from nltk.sentiment import SentimentIntensityAnalyzer


end = datetime.date.today()
start = end - relativedelta(years=1)

api = f"https://api.nytimes.com/svc/search/v2/articlesearch.json?q=election&api-key=YOUR_API_KEY_HERE"
url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=election&api-key=YOUR_API_KEY_HERE'


In [None]:
api_key = "YOUR_API_KEY_HERE" # replace with your NYT Archive API key
base_url = "https://api.nytimes.com/svc/archive/v1/{}/{}.json?api-key={}"

analyzer = SentimentIntensityAnalyzer() # initialize VADER

start_year = 2013 # replace with your desired start year
end_year = 2023 # replace with your desired end year

articles = []

for year in range(start_year, end_year):
    for month in range(1, 13):
        time.sleep(6) # add a delay to avoid hitting the API rate limit
        url = base_url.format(year, month, api_key)
        response = requests.get(url)
        data = response.json()
        for article in data["response"]["docs"]:
            parsed_article = {
                "abstract": article.get("abstract"),
                "headline": article.get("headline", {}).get("main"),
                "document_type": article.get("document_type"),
                "section_name": article.get("section_name"),
                "pub_date": pd.to_datetime(article.get("pub_date")).strftime("%Y-%m")
            }
            parsed_article["sentiment_score"] = analyzer.polarity_scores(parsed_article["headline"])["compound"]
            if parsed_article["sentiment_score"] >= 0.25:
                parsed_article["sentiment_category"] = "Positive"
            elif parsed_article["sentiment_score"] <= -0.25:
                parsed_article["sentiment_category"] = "Negative"
            else:
                parsed_article["sentiment_category"] = "Moderate"
            articles.append(parsed_article)
df = pd.DataFrame(articles)

In [None]:
df.head()

In [None]:
#get_data(months_in_range)


In [None]:
#slow nlp below

#from transformers import pipeline
#sentiment_pipeline = pipeline('sentiment-analysis')

#def get_sentiment_score(text):
   # result = sentiment_pipeline(text)[0]
    #score = result['score']
   # label = result['label']
   # return score if label == 'POSITIVE' else -score  # normalize score to [-1, 1]

#df['sentiment_score'] = df['headline'].apply(get_sentiment_score)



In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt

# Subset dataframe to only include sentiment_score and date columns
df_subset = df[['pub_date', 'sentiment_score','sentiment_category']]

# Group by date and sentiment to get counts per day for each sentiment
df_counts = df_subset.groupby(['pub_date', 'sentiment_category']).size().reset_index(name='count')

# Pivot the dataframe to get positive and negative counts as separate columns
df_pivot = df_counts.pivot(index='pub_date', columns='sentiment_category', values='count')

# Create the plot
fig, ax = plt.subplots()
df_pivot.plot(ax=ax)
ax.set_xlabel('Date')
ax.set_ylabel('Count')
ax.set_title('Number of Positive and Negative Articles per Day')
ax.legend()
ax.get_legend().set_title("Sentiment")

plt.show()


In [None]:
import plotly.express as px

df2 = df[['headline', 'pub_date', 'section_name', 'sentiment_score']].copy()

df2['pub_date'] = pd.to_datetime(df2['pub_date'])

fig = px.scatter(df2, x='pub_date', y='sentiment_score', color='section_name',
                 hover_name='headline', title='Sentiment Analysis by Material Type')
fig.update_layout(xaxis_range=['2013-01', '2023-03'])
fig.show()

In [None]:
df_pivot = df.pivot_table(index="pub_date", columns="sentiment_category", values="headline", aggfunc="count", fill_value=0)
df_pivot["Total Articles"] = df_pivot.sum(axis=1)

fig = px.line(df_pivot, x=df_pivot.index, y=["Moderate", "Positive", "Negative", "Total Articles"],
              title="Sentiment Categories and Total Articles Over Time")
fig.show()

In [None]:
df_pivot = df.pivot_table(index="pub_date", columns="section_name", values="sentiment_score", aggfunc="mean")

fig = px.line(df_pivot, x=df_pivot.index, y=df_pivot.columns, title="Average Monthly Sentiment by Section")
fig.update_layout(xaxis_range=['2013-01', '2023-03']) # set the X axis range from 2013 to 2022
fig.show()