In [1]:
import pandas as pd
import requests
import json
import time
from datetime import datetime
from pprint import pprint
from config import api_key

In [2]:
url = "https://api.themoviedb.org/3/"

In [3]:
popular_100_df = pd.read_csv("Resources/Most popular 100 movies.csv")

# Casting info in these movies
### Most prominent actor

In [40]:
# For every popular movie in the list, find the cast info for 5 most prominent characters

pop_100_cast = []

timeout_count = 1

for i in range(len(popular_100_df["movie id"])):
    credits_url = url + "movie/" + str(popular_100_df["movie id"][i]) + "/credits?api_key=" + api_key

    casting_data = requests.get(credits_url).json()
    
    try:
        if casting_data["cast"][0]["gender"] == 1:
            gender = "Female"
        elif casting_data["cast"][0]["gender"] == 2:
            gender = "Male"
        else:
            gender = "Unknown"

        pop_100_cast.append({"movie id": popular_100_df["movie id"][i], 
                              "title": popular_100_df["title"][i],
                              "character": casting_data["cast"][0]["character"],
                              "actor id": casting_data["cast"][0]["id"],
                              "actor name": casting_data["cast"][0]["name"],
                              "gender": gender,
                              "movie release year": popular_100_df["year"][i]
                            })
    except IndexError:
        print("Cast info not available for movie: " + popular_100_df["title"][i])
    
    timeout_count+=1
    if (timeout_count%500) == 0:
        time.sleep(10)

Cast info not available for movie:The Seventh Brother
Cast info not available for movie:Urotsukidoji III: Return of the Overfiend
Cast info not available for movie:One Man Band


In [41]:
pop_100_cast_df = pd.DataFrame.from_dict(pop_100_cast)

pop_100_cast_df = pop_100_cast_df[pop_100_cast_df["gender"] != 'Unknown']
pop_100_cast_df.head()

Unnamed: 0,actor id,actor name,character,gender,movie id,movie release year,title
0,1109,Kevin Peter Hall,The Predator,Male,169,1990,Predator 2
1,85,Johnny Depp,Edward Scissorhands,Male,162,1990,Edward Scissorhands
2,62,Bruce Willis,John McClane,Male,1573,1990,Die Hard 2
3,1204,Julia Roberts,Vivian Ward,Female,114,1990,Pretty Woman
5,1158,Al Pacino,Don Michael Corleone,Male,242,1990,The Godfather: Part III


In [42]:
pop_100_cast_df.to_csv("Resources/Cast_100_popular_movies.csv")

### Age
We need to know how old they have been when they were cast in their latest movies and where have they come from

---
Function for calculating age

In [43]:
def calculate_age(birth_date, release_year):
    if birth_date is not None:
        dob = datetime.strptime(birth_date, "%Y-%m-%d")
        age = release_year - dob.year
    else:
        age = 0
    return age

* Pulling info on cast

In [44]:
cast_max_age = pop_100_cast_df.drop_duplicates(subset="actor id", keep='last', inplace=False)
cast_max_age = cast_max_age.reset_index()
cast_max_age_df = cast_max_age.drop(columns="index")
cast_max_age_df.head()

Unnamed: 0,actor id,actor name,character,gender,movie id,movie release year,title
0,1109,Kevin Peter Hall,The Predator,Male,169,1990,Predator 2
1,65683,Alex Vincent,Andy Barclay,Male,11186,1990,Child's Play 2
2,16170,Zach Galligan,Billy Peltzer,Male,928,1990,Gremlins 2: The New Batch
3,45041,Judith Hoag,April O'Neal,Female,1498,1990,Teenage Mutant Ninja Turtles
4,3085,James Caan,Paul Sheldon,Male,1700,1990,Misery


In [45]:
# Calculate the age at which the actor was cast most recently 
age = []
birth_place = []
timeout_count = 1

for i in range(len(cast_max_age_df)):
    person_url = url + "person/" + str(cast_max_age_df['actor id'][i]) + "?api_key=" + api_key + "&language=en-US"
    
    person_data = requests.get(person_url).json()
    age.append(calculate_age(person_data["birthday"], cast_max_age_df['movie release year'][i]))
    birth_place.append(person_data["place_of_birth"])
    
    timeout_count+=1
    if (timeout_count%1000) == 0:
        time.sleep(10)

In [46]:
cast_max_age_df["age"] = age
cast_max_age_df["place of birth"] = birth_place
cast_max_age_df.head()

Unnamed: 0,actor id,actor name,character,gender,movie id,movie release year,title,age,place of birth
0,1109,Kevin Peter Hall,The Predator,Male,169,1990,Predator 2,35,"Pittsburgh, Pennsylvania, U.S."
1,65683,Alex Vincent,Andy Barclay,Male,11186,1990,Child's Play 2,9,"Newark, New Jersey, USA"
2,16170,Zach Galligan,Billy Peltzer,Male,928,1990,Gremlins 2: The New Batch,26,"New York City, New York, USA"
3,45041,Judith Hoag,April O'Neal,Female,1498,1990,Teenage Mutant Ninja Turtles,22,"Newburyport, Massachusetts, USA"
4,3085,James Caan,Paul Sheldon,Male,1700,1990,Misery,50,"The Bronx, New York, USA"


In [47]:
cast_max_age_df.to_csv("Resources/Lead actors info.csv")