In [65]:
import numpy as np
import pandas as pd
import ujson

def summary(movie):
    keys = ["Title","budget","director","distributor",'writers',"domestic_BO","foreign_BO","genre","rating","runtime"]
    summary = {key: movie[key] for key in keys}
    summary['director'] = str(summary['director']).replace('[','').replace(']','').replace("u'",'').replace("'",'').replace("nan", '')
    summary['writers'] = str(summary['writers']).replace('[','').replace(']','').replace("u'",'').replace("'",'').replace("nan",'')
    ratings_map = {"PG": 1, "PG-13": 2,"R": 3}
    runtime_map = {"1": range(91), "2": range(91,121) , "3" : range(121,151), "4":range(151,181), "5":range(151,500)}
    
    def rating_coef(rating):
        for key in ratings_map.keys():
            if key == rating: return ratings_map[key]
    
    def runtime_coef(runtime):
        for key, values in runtime_map.items():
            if runtime in values: 
                return int(key)
            
    summary['rating_coef'] = rating_coef(summary['rating'])
    summary['runtime_coef'] = runtime_coef(summary['runtime'])
    
    summary['international_BO'] = summary['domestic_BO'] + summary['foreign_BO']
    summary['weighted_international_BO_avg'] = np.average([summary['domestic_BO'],summary['international_BO']], weights = [0.6,0.4])
    return summary


def weekly_avgs(record):
    weekly_avgs= {key: record[key] for key in list(filter((lambda key: '_avg' in key), record.keys()))}
    weekly_avgs= {key: weekly_avgs[key] for key in weekly_avgs.keys() if np.isnan(weekly_avgs[key]) != True}
    def get_sum(weekly_avgs):
        weighted_sum = 0
        for key, value in weekly_avgs.items():
            for n in range(len(weekly_avgs.keys())):
                if (str(n) in key) and (value != 'nan'):
                    weighted_sum+= value*n
        return weighted_sum
    
    weekly_avgs['weighted_weekly_avg'] = np.round(get_sum(weekly_avgs)/len(weekly_avgs.keys()))
    weekly_avgs['weighted_weekly_avg_score'] = weekly_avgs['weighted_weekly_avg']/10000
    return weekly_avgs


def weekly_ranks(record):
    weekly_ranks= {key: record[key] for key in list(filter((lambda key: '_rank' in key), record.keys()))}
    weekly_ranks= {key: weekly_ranks[key] for key in weekly_ranks.keys() if np.isnan(weekly_ranks[key]) != True}
    
    def get_sum(weekly_ranks):
        points = 0
        for key, value in weekly_ranks.items():
            points+= float(key.split('_')[1])/value
        return points
    
    weekly_ranks['weekly_rank_score'] = round(get_sum(weekly_ranks)/len(weekly_ranks.keys()),3)
    return weekly_ranks

In [66]:
data = pd.read_csv("../dc_bom.csv")

In [67]:
np.array(data.columns)

array(['Title', 'budget', 'director', 'distributor', 'domestic_BO',
       'foreign_BO', 'genre', 'rating', 'runtime', 'week_10_avg',
       'week_10_change', 'week_10_rank', 'week_11_avg', 'week_11_change',
       'week_11_rank', 'week_12_avg', 'week_12_change', 'week_12_rank',
       'week_13_avg', 'week_13_change', 'week_13_rank', 'week_14_avg',
       'week_14_change', 'week_14_rank', 'week_15_avg', 'week_15_change',
       'week_15_rank', 'week_1_avg', 'week_1_gross', 'week_1_rank',
       'week_2_avg', 'week_2_change', 'week_2_rank', 'week_3_avg',
       'week_3_change', 'week_3_rank', 'week_4_avg', 'week_4_change',
       'week_4_rank', 'week_5_avg', 'week_5_change', 'week_5_rank',
       'week_6_avg', 'week_6_change', 'week_6_rank', 'week_7_avg',
       'week_7_change', 'week_7_rank', 'week_8_avg', 'week_8_change',
       'week_8_rank', 'week_9_avg', 'week_9_change', 'week_9_rank',
       'writers'], dtype=object)

In [105]:
def weekly_gross_percent(record):
    weekly_gross_percent= {key: record[key] for key in list(filter((lambda key: '_change' in key), record.keys()))}
    weekly_gross_percent= {key: weekly_gross_percent[key] for key in weekly_gross_percent.keys() if np.isnan(weekly_gross_percent[key]) != True}
    
    def transform(weekly_gross_percent):
        for key, value in weekly_gross_percent.items():
            if value < 0 : weekly_gross_percent[key] = 1+value
        weekly_gross_percent['week_1_change'] = 1
        return weekly_gross_percent
    
    weekly_gross_percent = transform(weekly_gross_percent)
    keys = weekly_gross_percent.keys()
    values = []
    for k in range(len(keys)):
        for key in keys:
            if int(k+1) == int(key.split('_')[1]):
                values.append(weekly_gross_percent[key])
    
    values = np.cumprod(values)
    for k in range(len(keys)):
        key = "week_{}_change".format(k+1)
        weekly_gross_percent[key] = values[k]
    
    weekly_gross_percent['weekly_change_score'] = round(np.average(values, weights = range(1, len(keys)+1)) / len(keys) ,3)
    return weekly_gross_percent

    
jsondata = data.to_dict(orient= 'records')
weekly_gross_percent(jsondata[1])


{'week_1_change': 1.0,
 'week_2_change': 0.621,
 'week_3_change': 0.21921299999999999,
 'week_4_change': 0.053926398,
 'week_5_change': 0.014290495470000001,
 'week_6_change': 0.011475267862410002,
 'week_7_change': 0.0055540296454064404,
 'week_8_change': 0.0023493545400069238,
 'week_9_change': 0.00083871957078247177,
 'weekly_change_score': 0.008}

In [90]:
 for k in range(1,16):
        print "weekly_{}_change".format(k)

weekly_1_change
weekly_2_change
weekly_3_change
weekly_4_change
weekly_5_change
weekly_6_change
weekly_7_change
weekly_8_change
weekly_9_change
weekly_10_change
weekly_11_change
weekly_12_change
weekly_13_change
weekly_14_change
weekly_15_change


In [74]:
jsondata[1]

{'Title': 'The Dark Knight Rises',
 'budget': 250000000.0,
 'director': "[u'Christopher Nolan']",
 'distributor': 'Warner Bros.',
 'domestic_BO': 448139099.0,
 'foreign_BO': 636800000.0,
 'genre': 'Action Thriller',
 'rating': 'PG-13',
 'runtime': 165L,
 'week_10_avg': 1825.0,
 'week_10_change': -0.42100000000000004,
 'week_10_rank': 14.0,
 'week_11_avg': 1690.0,
 'week_11_change': -0.36,
 'week_11_rank': 16.0,
 'week_12_avg': 1728.0,
 'week_12_change': -0.10300000000000001,
 'week_12_rank': 15.0,
 'week_13_avg': 1540.0,
 'week_13_change': -0.27899999999999997,
 'week_13_rank': 16.0,
 'week_14_avg': 1420.0,
 'week_14_change': -0.316,
 'week_14_rank': 19.0,
 'week_15_avg': 1364.0,
 'week_15_change': -0.19,
 'week_15_rank': 19.0,
 'week_1_avg': 51092.0,
 'week_1_gross': 225011359.0,
 'week_1_rank': 1L,
 'week_2_avg': 21159.0,
 'week_2_change': -0.586,
 'week_2_rank': 1.0,
 'week_3_avg': 12355.0,
 'week_3_change': -0.43799999999999994,
 'week_3_rank': 1.0,
 'week_4_avg': 7633.0,
 'week_4_