## API Request to retrieve trigger information from doesthedogdie's open API

Move IDs used:
- Fight Club: 9593
- Howl's Moving Castle: 10016
- Wall-E: 12533
- Midsommar: 20449

In [None]:
import pandas as pd
import json
import os
import requests
from dotenv import load_dotenv

load_dotenv(dotenv_path='../.env') # load environment variables

api_key = os.getenv('API_KEY_DOESTHEDOGDIE') # retrieve API key from .env

# verify API key
if not api_key:
    raise ValueError('API_KEY_DOESTHEDOGDIE not found in environment variables')

# API request
url = 'https://www.doesthedogdie.com/media/10016'
headers = {
    'Accept': 'application/json',
    'X-API-KEY': api_key
}

response = requests.get(url, headers=headers)
# print(response.json())

if response.status_code == 200:
    data = response.json()

    # init result structure to store data
    result = {
        'id': data['item'].get('id', 'Unknown'),
        'name': data['item'].get('name', 'Unknown'),
        'cleanName': data['item'].get('cleanName', 'Unknown'),
        'genre': data['item'].get('genre', 'Unknown'),
        'releaseYear': data['item'].get('releaseYear', 'Unknown'),
        'triggers': []  # store all triggers
    }

    # iterate topicItemStats to collect data
    for topic_item in data.get('topicItemStats', []):
        # extract data
        topic = topic_item.get('topic', {})
        if topic:
            does_name = topic.get('doesName', 'Unknown')
            yes_sum = topic_item.get('yesSum', 0)
            no_sum = topic_item.get('noSum', 0)

            # append to list
            result['triggers'].append({
                'doesName': does_name,
                'yesSum': yes_sum,
                'noSum': no_sum
            })

    # print structured result
    print(json.dumps(result, indent=4))

else:
    print(f'Error: {response.status_code} - {response.text}')

Flatten Data, extract information and generate .csv file

In [None]:
if response.status_code == 200:
    data = response.json()

    # Initialize a list to store data for the DataFrame
    rows = []

    # Define the new column names
    column_names = [
    'movie_id', 'movie_name', 'clean_movie_name', 'genre', 'release_year', 'event_description', 'yes_count', 'no_count'
    ]

    # Extract the main item data (id, name, cleanName, genre, releaseYear)
    item_data = {
        'id': data['item'].get('id', 'Unknown'),
        'name': data['item'].get('name', 'Unknown'),  # cleanName formatted as name
        'cleanName': data['item'].get('cleanName', 'Unknown').lower().replace(' ', '_'),
        'genre': data['item'].get('genre', 'Unknown'),
        'releaseYear': data['item'].get('releaseYear', 'Unknown'),
    }

    # Iterate over the topicItemStats to collect triggers and add rows
    for topic_item in data.get('topicItemStats', []):
        topic = topic_item.get('topic', {})
        if topic:
            does_name = topic.get('doesName', 'Unknown')
            yes_sum = topic_item.get('yesSum', 0)
            no_sum = topic_item.get('noSum', 0)

            # Add a row to the list of rows with the relevant data
            rows.append([
                item_data['id'],
                item_data['name'],
                item_data['cleanName'],
                item_data['genre'],
                item_data['releaseYear'],
                does_name,
                yes_sum,
                no_sum
            ])

    # Create the DataFrame from the rows with the specified column names
    df = pd.DataFrame(rows, columns=column_names)

    # Create the CSV file name based on the cleanName (lowercase with underscores)
    file_name = f"{item_data['cleanName']}_df.csv"

    # Save the DataFrame to a CSV file
    df.to_csv(f'../data/local/raw/{file_name}', index=False)

    # Print confirmation message
    print(f'Data saved to {file_name}')

else:
    print(f'Error: {response.status_code} - {response.text}')