In [1]:
# import libraries and modules
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import gmaps
import time
import scipy.stats as scistats
import random
import requests
import json
from datetime import datetime
from config import api_key
from config import gkey

In [2]:
# Kristina: Module to generate list of movie IDs
def highest_revenue():
    year_df = pd.DataFrame()
    decade_df = pd.DataFrame()
    for year in range(1960,2010): 
        time.sleep(0.25)
        response = requests.get('https://api.themoviedb.org/3/discover/movie?api_key=' 
                            +  api_key + '&primary_release_year=' + str(year) + '&sort_by=revenue.desc&language=en-US')

        #store parsed json response
        highest_revenue = response.json() 
        # highest_revenue
        highest_revenue_films = highest_revenue['results']
        data_df = pd.DataFrame(highest_revenue_films)

        year_df[str(year)] = data_df['id']

    for x in range(0,5):    
        decade_list = []
        for year in range(1960+(x*10),1970+(x*10)):
            decade_list.append(year_df[str(year)])
        flat_decade_list = [item for x in decade_list for item in x]
        decade_df[str(year-9)] = flat_decade_list
    return decade_df

In [27]:
# Evan: Module to get list of people from cast
def top2people(movies):
    
    peopleDict = {'1960':[],'1970':[],'1980':[],'1990':[],'2000':[]}

    # loop through each movie in each column
    for column in movies.iteritems():
        for movie in column[1]:
            print(movie)
            # request credits list for movie
            cast_url = f'https://api.themoviedb.org/3/movie/{movie}/credits?api_key={api_key}'
            data = requests.get(cast_url).json()
            
            time.sleep(0.25)

            if data['cast']:
                # get list of cast IDs
                idList = {}
                for n,x in enumerate(data['cast']):
                    idList[n] = x['cast_id']

                # determine top 2 IDs
                idDF = pd.DataFrame.from_dict(idList, orient='index', columns=['cast_id'])
                idDF.sort_values('cast_id', inplace=True)
                idDF.reset_index(drop=False, inplace=True)

                # get person ID for top 2
                person1 = data['cast'][idDF.iloc[0]['index']]['id']
                if len(data['cast']) > 1:
                    person2 = data['cast'][idDF.iloc[1]['index']]['id']

                # check for uniqueness 
                if person1 not in peopleDict[column[0]]:
                    peopleDict[column[0]].append(person1)
                if len(data['cast']) > 1:
                    if person2 not in peopleDict[column[0]]:
                        peopleDict[column[0]].append(person2)
                
    # convert dictionary of people IDs to DataFrame
    peopleDF = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in peopleDict.items()]))
    return peopleDF

In [5]:
movies = highest_revenue()

In [None]:
movies

In [28]:
people = top2people(movies)


967
539
284
1941
439
22013
15788
43039
11209
18973
25767
966
23439
11656
11167
29259
2216
59020
86297
116385
12230
1725
10911
36362
821
164
11536
990
2160
248
18647
28284
430
28501
31522
84249
40894
101778
63680
195576
982
947
646
9289
11897
11085
595
802
11697
13671
41521
898
1162
29235
122845
1626
31287
29873
79857
66433
657
8095
11576
5769
9078
4808
5925
571
936
24748
27759
15875
28172
29056
30137
266
26535
4324
71986
56749
658
433
11113
391
1594
704
14703
935
14433
506
3482
17277
2094
95358
8073
36245
187703
136446
123281
130332
15121
660
907
10338
11575
11694
2428
886
938
33364
45256
36815
39220
3481
539411
19120
29715
42736
81594
158867
14689
396
874
5923
21779
5780
2161
26268
429
6644
46041
23832
17295
17102
289179
20108
10772
33937
38793
180586
9325
667
37247
1879
475
3055
1654
12208
44695
10633
903
18900
27945
11206
16081
27277
86059
26039
2984
45335
62
16085
11356
916
6003
17917
805
871
10331
11046
52959
41857
22377
4929
5721
335
26170
42633
39209
8069
642
668
3116
14030
2039

In [29]:
# Aruna: Module to generate DataFrame of people with birth state and places
people

Unnamed: 0,1960,1970,1980,1990,2000
0,2090.0,46597,2.0,723.0,500.0
1,3359.0,31070,3.0,3416.0,15336.0
2,7301.0,13784,20006.0,11510.0,934.0
3,7302.0,4299,20011.0,4517.0,73421.0
4,3151.0,862,707.0,1204.0,31.0
5,4090.0,9857,7171.0,1205.0,9994.0
6,3636.0,55636,3460.0,1269.0,2461.0
7,2639.0,827,9309.0,1581.0,61962.0
8,5676.0,57329,14414.0,1100.0,1981.0
9,5961.0,34759,2177.0,4430.0,206.0


In [16]:
# mock data
mockPlaces = pd.DataFrame({'place':['Seattle, WA, USA',
                                   'Portland, OR, USA',
                                   'Los Angeles, CA, USA',
                                   'Kansas City, MO, USA',
                                   'Toronto, CAN']
                       })
def placesMap(places):
    
    # format location data
    searchPlaces = [loc.replace(" ", "%20") for loc in places['place']]

    # maps url
    mapUrl = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?inputtype=textquery'

    # location dictionary
    placesDict = {'lat':[], 'lng':[]}

    # iterate through places dataframe
    for city in searchPlaces:
        # pull city/state variables from the row
        search = city
        # select fields with lat/long data
        fields = 'id,geometry'
        # build query url
        url = f'{mapUrl}&input={search}&fields={fields}&key={gkey}'
        # generate json data
        data = requests.get(url).json()
        # grab lat and long data from json
        lat = data['candidates'][0]['geometry']['location']['lat']
        lng = data['candidates'][0]['geometry']['location']['lng']
        # add data to dictionary
        placesDict['lat'].append(lat)
        placesDict['lng'].append(lng)

    # location dataframe
    locationDF = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in placesDict.items()]))

    # Configure gmaps with API key
    gmaps.configure(api_key=gkey)

    # Store 'Lat' and 'Lng' into  locations 
    locations = locationDF[['lat','lng']]

    # Create a poverty Heatmap layer
    fig = gmaps.figure()

    bank_layer = gmaps.symbol_layer(
        locations, fill_color='rgba(0, 10, 0, 0.4)',
        stroke_color='rgba(0, 10, 100, 0.4)', scale=2
    )

    fig = gmaps.figure()
    fig.add_layer(bank_layer)

    # heat_layer = gmaps.heatmap_layer(locations,
    #                                  dissipating=False, max_intensity=100,
    #                                  point_radius = 1)

    # # Adjust heat_layer setting to help with heatmap dissipating on zoom
    # heat_layer.dissipating = False
    # heat_layer.max_intensity = 100
    # heat_layer.point_radius = 1

    # fig.add_layer(heat_layer)

    return fig

In [17]:
placesMap(mockPlaces)

Figure(layout=FigureLayout(height='420px'))

Unnamed: 0,1970,1980,1990,2000,2010,comb
0,13784.0,55636.0,10017,10017,10017,69420.0
1,4299.0,827.0,18643,18643,18643,5126.0
2,862.0,1936.0,239289,239289,239289,2798.0
3,9857.0,68411.0,9768,9768,9768,78268.0
4,57329.0,1903.0,57329,57329,57329,59232.0
5,34759.0,,34759,34759,34759,
6,,,46597,46597,46597,
7,,,31070,31070,31070,
8,,,1936,1936,2880,
9,,,68411,68411,49824,


full
