# Accessing Data via Web APIs: Solutions

In [10]:
# Import required libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from pynytimes import NYTAPI

In [15]:
import configparser
import os
from getpass import getpass

def get_api_key(api_name):
    config_file_path = os.path.expanduser("~/.notebook-api-keys")
    config = configparser.ConfigParser(interpolation=None)  # Disable interpolation to avoid issues with special characters
    
    # Try reading the existing config file
    if os.path.exists(config_file_path):
        config.read(config_file_path)
    
    # Check if API key is present
    if config.has_option("API_KEYS", api_name):
        # Ask if the user wants to update the key
        update_key = input(f"An API key for {api_name} already exists. Do you want to update it? (y/n): ").lower()
        if update_key == 'n':
            return config.get("API_KEYS", api_name)
    
    # If no key exists or user opts to update, prompt for the new key
    api_key = getpass(f"Enter your {api_name} API key: ")

    # Save the API key in the config file
    if not config.has_section("API_KEYS"):
        config.add_section("API_KEYS")
    config.set("API_KEYS", api_name, api_key)
    
    with open(config_file_path, "w") as f:
        config.write(f)
    
    return api_key

# Example usage to retrieve the NYT API key
api_key = get_api_key("NYT")

print("NYT API key retrieved successfully.")


An API key for NYT already exists. Do you want to update it? (y/n):  n


NYT API key retrieved successfully.


In [19]:
# Intialize the NYT API class into an object using your API key
nyt = NYTAPI(api_key, parse_dates=True)

## 🥊 Challenge: Find the top stories for a section

- Choose a section. Grab the top stories and store it in a list.
- How many stories are in the section?
- What is the title of the first story?

In [20]:
# Sports
section = "sports"
top_sports_stories = nyt.top_stories(section=section)
print(f"There are {len(top_sports_stories)} {section} stories.")

There are 9 sports stories.


In [21]:
# Grab first story
top_sport_story = top_sports_stories[0]
top_sport_story_title = top_sport_story["title"]
top_sport_story_title

'The N.B.A. Season Begins Tonight. Here Are Some Bold Predictions.'

| Attribute      | Data Type | Definition      |
| ----------- | ----------- | ----------- |
| url      | string       | Article's URL.       |
| adx_keywords   | string        | Semicolon separated list of keywords.        |
| subsection   | string        | Article's subsection (e.g. Politics). Can be empty |
| column   | string        | Deprecated. Set to null.        |
| eta_id   | integer        | Deprecated. Set to 0.|
| section   | string        | Article's section (e.g. Sports).        |
| id   | integer        | Asset ID number (e.g. 100000007772696).        |
| asset_id   | integer        | Asset ID number (e.g. 100000007772696).        |
| nytdsection   | string        | Article's section|
| byline   | string        | Article's byline (e.g. By Thomas L. Friedman).        |
| type   | string        | Asset type (e.g. Article, Interactive, ...).        |
| title   | string        | Article's headline (e.g. When the Cellos Play, the Cows Come Home).        |
| abstract   | string        | Brief summary of the article.|
| published_date   | string        | When the article was published on the web (e.g. 2021-04-19).        |
| source   | string        | Publisher (e.g. New York Times).        |
| updated   | string        | When the article was last updated (e.g. 2021-05-12 06:32:03).|
| des_facet   | array        | Array of description facets (e.g. Quarantine (Life and Culture)).        |
| org_facet   | array        | Array of organization facets (e.g. Sullivan Street Bakery).        |
| per_facet   | array        | Array of person facets (e.g. Bittman, Mark).        |
| geo_facet   | array        | Array of geographic facets (e.g. Canada).        |
| media   | array        | Array of images.        |
| media.type   | string        | Asset type (e.g. image).        |
| media.subtype   | string        | Asset subtype (e.g. photo).        |
| media.caption   | string        | Media caption        |
| media.copyright   | string        | Media credit        |
| media.approved_for_syndication   | boolean        | Whether media is approved for syndication.        |
| media.media-metadata   | array        | Media metadata (url, width, height, ...).        |
| media.media-metadata.url   | string        | Image's URL.        |
| media.media-metadata.format   | string        | Image's crop name     |
| media.media-metadata.height   | integer        | Image's height |
| media.media-metadata.width   | integer        | Image's width      |
| uri   | string        | An article's globally unique identifier.      |

In [None]:
# Get most shared stories
email = nyt.most_shared(days=30, method = 'email')
facebook = nyt.most_shared(days=30, method = 'facebook')

In [None]:
# Get unique identifier for each story
email_ids = [story["uri"] for story in email]
facebook_ids = [story["uri"] for story in facebook]

In [None]:
# Calculate the intersection of unique IDs
len(set(email_ids).intersection(set(facebook_ids)))

## 🥊 Challenge: Article Searching

- Retrieve a set of articles for a query of your choice.
- Use a relevant time interval in constructing your `dates` dictionary
- Use `type_of_material` and `section_name` as keys in your `options` dictionary.
    - For `type_of_material` values refer to this [list](https://github.com/michadenheijer/pynytimes/blob/main/VALID_SEARCH_OPTIONS.md#type-of-material-values).
    - For `section_name` values refer to this [list](https://github.com/michadenheijer/pynytimes/blob/main/VALID_SEARCH_OPTIONS.md#section-name-values).

In [22]:
# Example: query about COVID in the first 6 months of the pandemic
query = "COVID-19"
begin = datetime(2020, 2, 15)
end = datetime(2020, 8, 15)
date_dict = {"begin": begin, "end": end}

options_dict = {
    "sort": "oldest",
    "sources": ["New York Times", "AP"],
    "type_of_material": [
        "News Analysis",
        "News",
        "Article",
        "Column",
        "Editorial",
        "Front Page"
    ]
}

articles = nyt.article_search(
    query=query,
    results=100,
    dates=date_dict,
    options=options_dict)



In [24]:
print(f"{len(articles)} articles about {query}.")
# Print first article title
print(articles[0]['headline']['main'])

100 articles about COVID-19.
Couple Tests Positive for Coronavirus After Returning From Vacation in Hawaii


## 🥊 Challenge: Most Positive, Most Negative

What are the top 3 most positive and negative texts? Tip: try using the `sort_values()` method on the "sentiment" column in your df!

In [4]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np 

df = pd.read_csv("../data/election2020_articles.csv")
# Initialize analyzer object
analyzer = SentimentIntensityAnalyzer()
# Calculate the polarity scores of the lead paragraph and save it in df
df["sentiment"] = df["lead_paragraph"].apply(lambda x: analyzer.polarity_scores(x) if isinstance(x, str) else np.nan)
df["sentiment"] = df["sentiment"].apply(lambda x: x["compound"] if isinstance(x, dict) else np.nan)

In [5]:
# Most positive texts
df.sort_values("sentiment", ascending = False)["headline.main"].iloc[:3].tolist()

['How the Black Vote Became a Monolith',
 'Podcasts to Inform Your Vote',
 'Having Made Peace Abroad, Ethiopia’s Leader Goes to War at Home']

In [6]:
# Most negative texts
df.sort_values("sentiment", ascending = True)["headline.main"].iloc[:3].tolist()

['How Trump’s ‘Voter Fraud’ Lie Is Disenfranchising Americans',
 'At the End of a Chaotic Campaign, an Election Day That Defied the Worst Fears',
 'Days From Election, Police Killing of Black Man Roils Philadelphia']