In [1]:
import requests as r
import pandas as pd
from io import StringIO
from datetime import datetime
import json
import matplotlib.pyplot as plt
import seaborn as sns

from config import API_KEY

In [93]:
#function to get article search NYT API
def get_article_search(section, start_date, end_date):
    #establishing URL for Filter/Search using Article Search API
    url = (f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q={section}'
           f'&begin_date={start_date}&end_date={end_date}&api-key={API_KEY}')

    response = r.get(url).content.decode()

    #converting response to DF
    articles = pd.read_json(StringIO(response))
    articles_search = pd.json_normalize(articles['response']['docs'])    #pub_date located in under response > docs

    return articles_search


#function to create DF of Key Words
def keywords_review(articles_search):
    #function to identify keyword
    keywords = articles_search['keywords'].explode().value_counts()
    #Resetting index to create new data frame of keywords
    keywords = keywords.reset_index()

    #creating a new column only containing the keyword values
    keywords['keyword_value'] = keywords['keywords'].apply(lambda x: x['value'])

    #dropping origincal keywords columns
    keywords.drop('keywords',axis=1,inplace=True)
    return keywords

#function to use datetime library to create (periodicity) frequency buckets
def nyt_search_date(articles_search, start_date, end_date, frequency):
    #ensuring the dataframe is filtered for dates input by user
    date_range_results = articles_search[
    (articles_search['pub_date'] >= pd.to_datetime(start_date)) &
    (articles_search['pub_date'] <= pd.to_datetime(end_date))
    ]

    #ensuring corect grouping based on frequency input
    if frequency == 'daily':
        return date_range_results.groupby(articles_search['pub_date'].dt.to_period('D')).size()
    elif frequency == 'weekly':
        return date_range_results.groupby(articles_search['pub_date'].dt.to_period('W')).size()
    elif frequency == 'monthly':
        return date_range_results.groupby(articles_search['pub_date'].dt.to_period('M')).size()
    else:
        return ValueError('Invalid frequency. Please choose between daily, weekly, or monthly')
        

#function to set style for Seaborn Plots
def set_plot_style():
    sns.set_style('whitegrid')
    sns.set_context('paper')

#function to create bar graph of key words
def bar_graph_keywords(keywords):
    set_plot_style()
    
    plt.figure(figsize=(12,6))
    
    
    sns.barplot(data = keywords.head(10), y='keyword_value', x='count', palette='pastel',hue='count',legend=False)

    plt.title('Count of Keywords')
    plt.ylabel('Keywords')
    plt.xlabel('Count')
    plt.show()

#function to create line graph to illustrate # of articles published per selected period
def line_graph_headline_time(pub_date_new, frequency):
    set_plot_style()
    
    plt.figure(figsize=(12,6))

    #resetting index to get dataframe
    pub_date_new = date_range_results.reset_index()

    #creating line plot
    sns.lineplot(data = pub_date_new, x='pub_date',y=0, marker='o',color='purple')

    plt.title(f'Number of Articles Published Over Time ({frequency}')
    plt.xlabel('Date')
    plt.ylabel('Number of Articles')
    plt.xticks(rotation=45)
    plt.show()

def main_nyt():
    #user input for NYT section (strip/lower to ensure it's entered properly into URL
    section = input('Please input NYT section (eg, arts, world,fashion):').strip().lower()
    #user input for date & frequency
    start_date_user = input('Please input Start Date (YYYY-MM-DD):')
    end_date_user = input('Please input End Date (YYYY-MM-DD):')
    frequency = input('Select Frequency (daily,weekly, monthly):').strip().lower()

    #Parsing user input into a datetime object given a corresponding format
    try:
        start_date = datetime.strptime(start_date_user,'%Y%m%d')
        end_date = datetime.strptime(end_date_user,'%Y%m%d')
        
    except ValueError:
        print('Invalid date format. Correct format: YYYY-MM-DD')
        
        
        #create df results for section
        article_search = get_article_search(section, start_date, end_date)

        if article_search.empty:
            print('No headlines found')
        return

        #analyze keywords function
        keywords = keywords_review(article_search)

        #plotting bar graph of keywords
        bar_graph_keywords(keywords)

        #analyze headlines
        pub_date_new = nyt_search_date(article_search,start_date,end_date,frequency)
        #plot line graph
        line_graph_headline_time(pub_date_new, frequency)

#Run the analysis

if __name__ == '__main__':
    main_nyt()

Please input NYT section (eg, arts, world,fashion): arts
Please input Start Date (YYYY-MM-DD): 2024-01-01
Please input End Date (YYYY-MM-DD): 2024-03-13
Select Frequency (daily,weekly, monthly): monthly


Invalid date format. Correct format: YYYY-MM-DD


UnboundLocalError: cannot access local variable 'start_date' where it is not associated with a value