In [154]:
# import libraries and modules
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import gmaps
import time
import scipy.stats as scistats
import random
import requests
import json
from datetime import datetime
from config import api_key
from config import gkey

In [155]:
# Kristina: Module to generate list of movie IDs
def highest_revenue(x=1960,y=2010):
    year_df = pd.DataFrame()
    decade_df = pd.DataFrame()
    for year in range(x,y): 
        time.sleep(0.25)
        response = requests.get('https://api.themoviedb.org/3/discover/movie?api_key=' 
                            +  api_key + '&primary_release_year=' + str(year) + '&sort_by=revenue.desc&language=en-US')

        #store parsed json response
        highest_revenue = response.json() 
        # highest_revenue
        highest_revenue_films = highest_revenue['results']
        data_df = pd.DataFrame(highest_revenue_films)

        year_df[str(year)] = data_df['id']

    for z in range(0,int((y-x)/10)):    
        decade_list = []
        for year in range(x+(z*10),(x+10)+(z*10)):
            decade_list.append(year_df[str(year)])
        flat_decade_list = [item for x in decade_list for item in x]
        decade_df[str(year-9)] = flat_decade_list
    return decade_df

In [156]:
# Evan: Module to get list of people from cast
def top2people(movies):
    
    peopleDict = {}

    # loop through each movie in each column
    for column in movies.iteritems():
        peopleDict[column[0]] = []
        for movie in column[1]:
#             print(movie)
            # request credits list for movie
            cast_url = f'https://api.themoviedb.org/3/movie/{movie}/credits?api_key={api_key}'
            data = requests.get(cast_url).json()
            
            time.sleep(0.1)

            if data['cast']:
                # get list of cast IDs
                idList = {}
                for n,x in enumerate(data['cast']):
                    idList[n] = x['cast_id']

                # determine top 2 IDs
                idDF = pd.DataFrame.from_dict(idList, orient='index', columns=['cast_id'])
                idDF.sort_values('cast_id', inplace=True)
                idDF.reset_index(drop=False, inplace=True)

                # get person ID for top 2
                person1 = data['cast'][idDF.iloc[0]['index']]['id']
#                 print(f"    {person1}")
                if len(data['cast']) > 1:
                    person2 = data['cast'][idDF.iloc[1]['index']]['id']
#                     print(f"    {person2}")

                # check for uniqueness 
                if person1 not in peopleDict[column[0]]:
                    peopleDict[column[0]].append(person1)
                if len(data['cast']) > 1:
                    if person2 not in peopleDict[column[0]]:
                        peopleDict[column[0]].append(person2)
                
    # convert dictionary of people IDs to DataFrame
    peopleDF = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in peopleDict.items()]))
    return peopleDF

In [157]:
# Aruna: Module to generate DataFrame of people with birth state and places
def birthplaces(people):
    birthplace = {}
    for column in people.iteritems():
        birthplace[column[0]] = []
        for x in range(column[1].count()):
            time.sleep(0.25)
            ids = column[1][x]
    #         print (ids)
    #         print(type(ids))
            if ids != 'nan':
                url = f'https://api.themoviedb.org/3/person/{ids}?api_key={api_key}'
                response = requests.get(url).json()
                #names.append(response['name'])

            #convert dictionary of people IDs to DataFrame
                if response['place_of_birth'] != 'null':
                     birthplace[column[0]].append(response['place_of_birth']) 
    birthDF = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in birthplace.items()]))
    return birthDF

In [158]:
# function to retrieve lat/longs for each birthplace
def placesList(places,column):
    
    # location dictionary
    placesDict = {'lat':[], 'lng':[]}
    
    for x in range(places[column].count()):

        place = places[column][x]

        if place != None:
            # format location data
            place.replace(" ", "%20")

            # maps url
            mapUrl = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?inputtype=textquery'

            # select fields with lat/long data
            fields = 'id,geometry'
            # build query url
            url = f'{mapUrl}&input={place}&fields={fields}&key={gkey}'
            # generate json data
            data = requests.get(url).json()
            # grab lat and long data from json (try/except loop to avoid countries that no longer exist)
            try:
                lat = data['candidates'][0]['geometry']['location']['lat']
                lng = data['candidates'][0]['geometry']['location']['lng']
            except:
                print(f'{place} not plotable')
            # add data to dictionary
            placesDict['lat'].append(lat)
            placesDict['lng'].append(lng)

    # location dataframe
    locationDF = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in placesDict.items()]))

    # Configure gmaps with API key
    gmaps.configure(api_key=gkey)

    # Store 'Lat' and 'Lng' into  locations 
    locations = locationDF[['lat','lng']]
    
    return locations


# Plotting function
def placesMap(locations):
    # create a heatmap layer
    fig = gmaps.figure(map_type='HYBRID')
    heat_layer = gmaps.heatmap_layer(locations, dissipating=True)

    fig.add_layer(heat_layer)

    return fig

In [159]:
movies = highest_revenue(1960,2010)

In [160]:
people = top2people(movies)

In [161]:
birthDF = birthplaces(people)

In [162]:
# Lat/Longs for 1960
locations1960 = placesList(birthDF,'1960')

Aumale, French Algeria not plotable
Vienna-Penzing, Austria-Hungary [now Austria] not plotable
Rózsahegy (now Ružomberok), Austria-Hungary (now Slovakia) not plotable
Chirravuru, Guntur district, Madras Presidency, British India not plotable
Vrútky, Czechoslovakia [now Slovakia] not plotable
Nová Včelnice, Czechoslovakia [now Czech Republic] not plotable
 Tunis, French Protectorate Tunisia [now Tunisia] not plotable


In [163]:
#1960 Map
placesMap(locations1960)

Figure(layout=FigureLayout(height='420px'))

In [164]:
# Lat/Longs for 1970
locations1970 = placesList(birthDF,'1970')

In [165]:
#1970 Map
placesMap(locations1970)

Figure(layout=FigureLayout(height='420px'))

In [166]:
# Lat/Longs for 1980
locations1980 = placesList(birthDF,'1980')

Idar-Oberstein, Allemagne de l'Ouest not plotable


In [167]:
#1980 Map
placesMap(locations1980)

Figure(layout=FigureLayout(height='420px'))

In [168]:
# Lat/Longs for 1990
locations1990 = placesList(birthDF,'1990')

Idar-Oberstein, Allemagne de l'Ouest not plotable
Manciano La Misericordia, Castiglion Fiorentino, Arezzo, Italia not plotable


In [169]:
#1990 Map
placesMap(locations1990)

Figure(layout=FigureLayout(height='420px'))

In [170]:
# Lat/Longs for 1990
locations2000 = placesList(birthDF,'2000')

Idar-Oberstein, Allemagne de l'Ouest not plotable


In [171]:
#2000 Map
placesMap(locations2000)

Figure(layout=FigureLayout(height='420px'))

In [172]:
movies.head(5)

Unnamed: 0,1960,1970,1980,1990,2000
0,967,9062,1891,251,955
1,539,10671,8536,771,98
2,284,11202,525,114,8358
3,1941,651,21629,581,3981
4,439,10112,813,861,10567
