In [1]:
#test run for api

import requests 
import pandas as pd 

#URL for Oscars category/edition nominees 
url = "https://theawards.vercel.app/api/oscars/editions/45/categories/1342/nominees"
headers={"accept": "application/json"}

try:
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        print("API request successful! Status code:", response.status_code)
        #extract JSON data from the response 
        my_data = response.json()
    else:  
        #print error info for debugging 
        print("API request failed. Status code:", response.status_code, "Response:", response.text)
except Exception as e:
    print("Error during API request:", e)

#convert JSON to DataFrame 
df = pd.DataFrame(my_data)
print("Number of nominees retrieved:", len(df))
df.head()


API request successful! Status code: 200
Number of nominees retrieved: 5


Unnamed: 0,id,name,more,note,winner
0,2901,Eddie Albert,"The Heartbreak Kid {""Mr. Corcoran""}",,False
1,4295,James Caan,"The Godfather {""Sonny Corleone""}",,False
2,4296,Robert Duvall,"The Godfather {""Tom Hagen""}",,False
3,4297,Joel Grey,"Cabaret {""The Master of Ceremonies""}",,True
4,4298,Al Pacino,"The Godfather {""Michael Corleone""}",,False


In [1]:

import requests 
import pandas as pd 

'''
Use the API to request Oscar edition data, id, and corresponding year. 
An edition stands for a specific year's Oscar ceremony. 
The API uses "id" as an identifier for each item, and the number is different for editions, categories, and nominees.
'''

url = "https://theawards.vercel.app/api/oscars/editions"
headers={"accept": "application/json"}

response = requests.get(url, headers=headers)
editions = response.json()

#convert to dataFrame 
editions_df = pd.DataFrame(editions)
print(editions_df.head(10)) #show sampling of output 
print() 
print(editions_df.tail(2))


   id                 name  edition  year
0   1   1st Academy Awards        1  1927
1   2   2nd Academy Awards        2  1928
2   3   3rd Academy Awards        3  1929
3   4   4th Academy Awards        4  1930
4   5   5th Academy Awards        5  1931
5   6   6th Academy Awards        6  1932
6   7   7th Academy Awards        7  1934
7   8   8th Academy Awards        8  1935
8   9   9th Academy Awards        9  1936
9  10  10th Academy Awards       10  1937

    id                 name  edition  year
92  93  93rd Academy Awards       93  2020
93  94  94th Academy Awards       94  2021


In [3]:
'''
Use API to pull edition number for the desired time periods (1991-1997 and 2015-2021)
'''
years_needed = list(range(1991,1998)) + list(range(2015,2022)) #creates list of all years 
editions_wanted = []
#loop through data frame, and append to editions_wanted with editions for desired time periods.
for index, row in editions_df.iterrows():
    if row["year"] in years_needed:
        editions_wanted.append({"edition": row["edition"], "year":row["year"]}) 
#convert edition and year paired data to data frame 
editions_wanted_df = pd.DataFrame(editions_wanted)
print(editions_wanted_df)


    edition  year
0        64  1991
1        65  1992
2        66  1993
3        67  1994
4        68  1995
5        69  1996
6        70  1997
7        88  2015
8        89  2016
9        90  2017
10       91  2018
11       92  2019
12       93  2020
13       94  2021


In [7]:
import requests
import pandas as pd
import time

headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}

#create a list containing all of the categories to extract, including different formatting for older versus newer years 
categories_needed = [
    "Best Picture",
    "Directing",
    "Actor In A Leading Role",
    "Actress In A Leading Role",
    "Writing (Original Screenplay)",
    "Writing (Adapted Screenplay)",
    "Writing (Screenplay Written Directly For The Screen)",
    "Writing (Screenplay Based On Material Previously Produced Or Published)"
]

#use empty list to collect all relevant nominees
all_nominees = []

#Loop through each edition in the editions dataframe.
for index, row in editions_wanted_df.iterrows():
    id_edition = row["edition"]
    year = row["year"]

    #Use API to pull all categories available for each relevant Oscar edition
    cat_url = f"https://theawards.vercel.app/api/oscars/editions/{id_edition}/categories"
    cat_response = requests.get(cat_url, headers=headers)
    if cat_response.status_code != 200:
        print(f"Failed to get categories for edition {id_edition}, {year}")
        continue
    categories = cat_response.json()

    #Loop through every category in the edition
    for each in categories:
        cat_name = each["name"]
        #only keep the necessary categories 
        if cat_name not in categories_needed:
            continue
        #Pull the id corresponding to that catgeory for use in the nominees url.
        id_cat = each["id"]
        nominees_url = f"https://theawards.vercel.app/api/oscars/editions/{id_edition}/categories/{id_cat}/nominees"
        print(f"Trying nominees URL for {cat_name} in {year}: {nominees_url}")

        # Add delay to avoid crashing
        time.sleep(1)  

        nominees_response = requests.get(nominees_url, headers=headers)
        if nominees_response.status_code != 200:
            print(f"Failed to get nominees for {cat_name} for {year}, status: {nominees_response.status_code}")
            continue

        #API returns a dictionary or list of nominee information including id, movie title, nominee name, etc. 
        #Store that information as JSON
        nominees_info = nominees_response.json()

        #To handle inconsistent structuring in json file. 

        #Handles if the API returned a dict, and converts it to a list
        if isinstance(nominees_info, dict):
            #newer editions use "nominees" as the key
            if "nominees" in nominees_info:
                nominees_info_list = nominees_info["nominees"]
            #older editions use "items" as the key
            elif "items" in nominees_info:
                nominees_info_list = nominees_info["items"]
            else:
                print(f"Unexpected nominee key for {cat_name} in {year}")
                continue
                
        #If API returns list, or converted above
        elif isinstance(nominees_info, list):
            nominees_info_list = nominees_info
        else:
            print(f"Unexpected nominee format for {cat_name} in {year}")
            continue

        #Loop through list of nominee information to pull movie title and nominee name, handles all possible formatting styles.
        for nominee_obj in nominees_info_list:
             # Special handling for Acting categories
            if "Actor In A Leading Role" in cat_name or "Actress In A Leading Role" in cat_name:
                nominee_name = nominee_obj.get("name")  # the actor's name
                movie_title = nominee_obj.get("film", {}).get("title") or nominee_obj.get("more")
            else:
                # default extraction for all other categories
                movie_title = nominee_obj.get("name") or nominee_obj.get("film", {}).get("title")
                nominee_name = nominee_obj.get("more") or nominee_obj.get("primaryNominee", {}).get("name")
            winner_flag = nominee_obj.get("winner", False)
            if movie_title is None and nominee_name is None:
                print(f"Skipping unexpected nominee format: {nominee_obj}")
                continue

            #Append cleaned info into master list
            all_nominees.append({
                "year": year,
                "category": cat_name,
                "nominee": nominee_name,
                "movie": movie_title,
                "winner": winner_flag
            })

#Convert to Data Frame
nominees_df = pd.DataFrame(all_nominees)
# Clean movie titles
import re
nominees_df['movie'] = nominees_df['movie'].apply(
    lambda x: re.sub(r"\{.*?\}", "", x).strip() if isinstance(x, str) else x   #generated by ChatGPT  
)   

# check the cleaned DataFrame
print("Number of nominees:", len(nominees_df))
print(nominees_df)
print("Number of nominees:", len(nominees_df))
print(nominees_df)


Trying nominees URL for Actor In A Leading Role in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/4576/nominees
Trying nominees URL for Actress In A Leading Role in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/4582/nominees
Trying nominees URL for Directing in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/1065/nominees
Trying nominees URL for Best Picture in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/3636/nominees
Trying nominees URL for Writing (Screenplay Based On Material Previously Produced Or Published) in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/5749/nominees
Trying nominees URL for Writing (Screenplay Written Directly For The Screen) in 1991: https://theawards.vercel.app/api/oscars/editions/64/categories/4635/nominees
Trying nominees URL for Actor In A Leading Role in 1992: https://theawards.vercel.app/api/oscars/editions/65/categories/4576/nominees
Trying n

In [6]:
#save to CSV 
nominees_df.to_csv("Oscar_information.csv", index=False)