In [13]:
import requests
import pandas as pd 
import numpy as np
import time
from datetime import datetime

In [14]:
API_KEY = 'YOUR_KEY_HERE'
BASE_URL = 'https://api.themoviedb.org/3'
IMAGE_BASE_URL = 'https://image.tmdb.org/t/p/' 

In [15]:
# Function to fetch movies from a list
def fetch_movies_from_list(list_id):
    url = f"{BASE_URL}/list/{list_id}"
    params = {
        'api_key': API_KEY,
        'page': 1  # Start with the first page
    }
    movies = []
    
    while True:
        response = requests.get(url, params=params)
        response.raise_for_status()
        data = response.json()
        
        # Add movies from the current page
        movies.extend(data.get('items', []))
        
        # Check if we have reached the last page
        if data['page'] >= data['total_pages']:
            break
        
        # Increment the page number for the next request
        params['page'] += 1
    
    return movies

# Function to fetch movie details
def fetch_movie_details(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}"
    params = {
        'api_key': API_KEY
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def fetch_movie_backdrop(movie_id):
    images_url = f"{BASE_URL}/movie/{movie_id}/images"
    response = requests.get(images_url, params={'api_key': API_KEY})
    if response.status_code == 200:
        images_data = response.json()
        # Filter backdrops for neutral (no language) images
        neutral_backdrops = [
            backdrop for backdrop in images_data['backdrops']
            if backdrop.get('iso_639_1') is None
        ]
        # Use the first neutral backdrop if available
        backdrop_path = neutral_backdrops[0]['file_path'] if neutral_backdrops else None
        return f"{IMAGE_BASE_URL}w1280{backdrop_path}" if backdrop_path else None
    else:
        return None

# Function to calculate the performance coefficient
def calculate_performance(profit, roi_percentage):
    # Normalize profit and ROI percentage into scores between 0 and 1
    normalized_profit = min(max(profit / 1000, 0), 1)  # Assuming max profit ~ $1 billion
    normalized_roi = min(max(roi_percentage / 100, 0), 1)

    # Combine the two scores for a final coefficient
    return (normalized_profit + normalized_roi) / 2

# Marvel Cinematic Universe list ID
LIST_ID = 8501919  # Your custom list ID for Marvel Cinematic Universe

# Fetch all movies in the list
movies = fetch_movies_from_list(LIST_ID)

In [16]:
movie_data_list = []
for movie in movies:
    movie_id = movie['id']
    try:
        movie_data = fetch_movie_details(movie_id)

        # Extract relevant fields
        release_date = datetime.strptime(movie_data['release_date'], '%Y-%m-%d').strftime('%b %d, %Y')
        title = movie_data['title']
        production_budget = movie_data.get('budget', 0)
        box_office = movie_data.get('revenue', 0)
        poster_path = movie_data.get('poster_path', None)

        profit = (box_office - production_budget) / 1_000_000  # Profit in millions
        roi_percentage = ((box_office - production_budget) / production_budget * 100) if production_budget > 0 else 0
        performance = calculate_performance(profit, roi_percentage)

        production_budget = movie_data.get('budget', 0) / 1_000_000 
        box_office = movie_data.get('revenue', 0) / 1_000_000 

        # Generate full URLs for poster and video links
        poster_url = f"{IMAGE_BASE_URL}w500{poster_path}" if poster_path else None
        backdrop_url = fetch_movie_backdrop(movie_id)

        movie_data_list.append({
            "release_date": release_date,
            "title": title,
            "production_budget": production_budget,
            "box_office": box_office,
            "profit": profit,
            "roi_percentage": roi_percentage,
            "performance": performance, 
            "poster_url": poster_url,
            "backdrop_url": backdrop_url,
        })
    except Exception as e:
        print(f"Failed to process movie ID {movie_id}: {e}")

# Create a DataFrame
df = pd.DataFrame(movie_data_list)


In [17]:
# Calculate mean and standard deviation for profit and ROI
profits = df["profit"]
rois = df["roi_percentage"]
mean_profit, std_profit = profits.mean(), profits.std()
mean_roi, std_roi = rois.mean(), rois.std()

# Calculate z-scores and performance
z_profits = (profits - mean_profit) / std_profit
z_rois = (rois - mean_roi) / std_roi
weighted_scores = 0.5 * z_profits + 0.5 * z_rois
performance = 1 / ((1 + np.exp(-weighted_scores))).round(2)  # Sigmoid function for 0-1 range

df["performance"] = performance

# Format numeric columns
df["production_budget"] = df["production_budget"].apply(lambda x: f"${x:.1f}M")
df["box_office"] = df["box_office"].apply(lambda x: f"${x:.1f}M")
df["profit"] = df["profit"].apply(lambda x: f"${x:.1f}M")
df["roi_percentage"] = df["roi_percentage"].apply(lambda x: f"{x:.2f}%")
df["performance"] = df["performance"].apply(lambda x: f"{x:.2f}")

In [18]:
df.head()

Unnamed: 0,release_date,title,production_budget,box_office,profit,roi_percentage,performance,poster_url,backdrop_url
0,"Jul 24, 2024",Deadpool & Wolverine,$200.0M,$1338.1M,$1138.1M,569.04%,0.72,https://image.tmdb.org/t/p/w500/8cdWjvZQUExUUT...,https://image.tmdb.org/t/p/w1280/6amNYUYvoKsZb...
1,"Nov 08, 2023",The Marvels,$274.8M,$207.1M,$-67.7M,-24.64%,0.16,https://image.tmdb.org/t/p/w500/9GBhzXMFjgcZ3F...,https://image.tmdb.org/t/p/w1280/57oimlHZzrmct...
2,"May 03, 2023",Guardians of the Galaxy Vol. 3,$250.0M,$845.6M,$595.6M,238.24%,0.41,https://image.tmdb.org/t/p/w500/r2J02Z2OpNTctf...,https://image.tmdb.org/t/p/w1280/bY6R8Cx0u98pT...
3,"Feb 15, 2023",Ant-Man and the Wasp: Quantumania,$388.4M,$476.1M,$87.7M,22.58%,0.2,https://image.tmdb.org/t/p/w500/qnqGbB22YJ7dSs...,https://image.tmdb.org/t/p/w1280/jJiAJ05XSa81O...
4,"Nov 09, 2022",Black Panther: Wakanda Forever,$250.0M,$859.1M,$609.1M,243.64%,0.42,https://image.tmdb.org/t/p/w500/sv1xJUazXeYqAL...,https://image.tmdb.org/t/p/w1280/uu4On1xXiiDGe...


In [19]:
df.to_csv('marvel_movies.csv', index=False)