# Importing libraries

In [128]:
import matplotlib as plt
from matplotlib import pyplot as py
import pandas as pd
import numpy as np
import seaborn as sns
import plotly as pl 
from plotly import express as ex
import json 
from datetime import datetime

# Reading csv file after parsing json strings

In [129]:
clean = pd.read_csv("../data/not_json.csv", index_col = 0)

In [130]:
clean.shape

(4803, 16)

In [131]:
clean.head()

Unnamed: 0,title,cast,budget,genres,original_language,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,vote_average,vote_count,NaN,director
0,Avatar,"['Sam Worthington', ' Zoe Saldana', ' Sigourne...",237000000.0,"['Action', 'Adventure', 'Fantasy', 'Science Fi...",en,150.437577,"['Ingenious Film Partners', 'Twentieth Century...","['United States of America', 'United Kingdom']",2009-12-10,2787965000.0,162.0,"['English', 'Español']",7.2,11800.0,False,James Cameron
1,Pirates of the Caribbean: At World's End,"['Johnny Depp', ' Orlando Bloom', ' Keira Knig...",300000000.0,"['Adventure', 'Fantasy', 'Action']",en,139.082615,"['Walt Disney Pictures', 'Jerry Bruckheimer Fi...",['United States of America'],2007-05-19,961000000.0,169.0,['English'],6.9,4500.0,False,Gore Verbinski
2,Spectre,"['Daniel Craig', ' Christoph Waltz', ' Léa Sey...",245000000.0,"['Action', 'Adventure', 'Crime']",en,107.376788,"['Columbia Pictures', 'Danjaq', 'B24']","['United Kingdom', 'United States of America']",2015-10-26,880674600.0,148.0,"['Français', 'English', 'Español', 'Italiano',...",6.3,4466.0,False,Sam Mendes
3,The Dark Knight Rises,"['Christian Bale', ' Michael Caine', ' Gary Ol...",250000000.0,"['Action', 'Crime', 'Drama', 'Thriller']",en,112.31295,"['Legendary Pictures', 'Warner Bros.', 'DC Ent...",['United States of America'],2012-07-16,1084939000.0,165.0,['English'],7.6,9106.0,False,Christopher Nolan
4,John Carter,"['Taylor Kitsch', ' Lynn Collins', ' Samantha ...",260000000.0,"['Action', 'Adventure', 'Science Fiction']",en,43.926995,['Walt Disney Pictures'],['United States of America'],2012-03-07,284139100.0,132.0,['English'],6.1,2124.0,False,Andrew Stanton


In [132]:
clean = clean.drop(columns = ["revenue", "budget"])

In [133]:
clean.shape

(4803, 14)

In [134]:
clean.isna().sum()

title                    0
cast                     0
genres                   0
original_language        0
popularity               0
production_companies     0
production_countries     0
release_date             1
runtime                  2
spoken_languages         0
vote_average            63
vote_count              62
NaN                      0
director                30
dtype: int64

In [135]:
clean.NaN = clean.isna().any(axis = 1)

In [136]:
clean.NaN.value_counts()

False    4722
True       81
Name: NaN, dtype: int64

In [137]:
clean = clean.query("NaN == False")

In [138]:
clean.shape

(4722, 14)

In [139]:
clean.drop(columns = "NaN", inplace = True)

In [140]:
clean.shape

(4722, 13)

In [141]:
clean.genres.unique() #string format

array(["['Action', 'Adventure', 'Fantasy', 'Science Fiction']",
       "['Adventure', 'Fantasy', 'Action']",
       "['Action', 'Adventure', 'Crime']", ...,
       "['Crime', 'Horror', 'Mystery', 'Thriller']",
       "['Foreign', 'Thriller']",
       "['Comedy', 'Drama', 'Romance', 'TV Movie']"], dtype=object)

In [142]:
clean.dtypes

title                    object
cast                     object
genres                   object
original_language        object
popularity              float64
production_companies     object
production_countries     object
release_date             object
runtime                 float64
spoken_languages         object
vote_average            float64
vote_count              float64
director                 object
dtype: object

# Defining functions to convert strings to list

In [143]:
def convert_list(x):
    return list(x.replace("[","").replace("]", "").replace("'","").split(", "))

#function to convert strings into list

In [144]:
def convert_list_space(x):
    return list(x.replace("[","").replace("]", "").replace("'","").split(",  "))

#function to convert strings into list with a different split

# Genres

In [145]:
clean.genres = clean.genres.apply(convert_list)

In [146]:
clean.genres[0]

['Action', 'Adventure', 'Fantasy', 'Science Fiction']

# Actors

In [147]:
clean.cast = clean.cast.apply(convert_list_space)

In [148]:
clean.cast[0]

['Sam Worthington', 'Zoe Saldana', 'Sigourney Weaver']

# Production companies

In [149]:
clean.production_companies = clean.production_companies.apply(convert_list)

In [150]:
clean.production_companies[0]

['Ingenious Film Partners',
 'Twentieth Century Fox Film Corporation',
 'Dune Entertainment',
 'Lightstorm Entertainment']

# Production countries

In [151]:
clean.production_countries = clean.production_countries.apply(convert_list)

In [152]:
clean.production_countries[0]

['United States of America', 'United Kingdom']

# Spoken languages

In [153]:
clean.spoken_languages = clean.spoken_languages.apply(convert_list)

In [154]:
clean.spoken_languages[0]

['English', 'Español']

# Getting dummies

Converting the categorical data to numeric by creating a column, named like each element on the list, and with values of 0 and 1 depending if that element appears in the row or not.

In [155]:
def get_dummies(lst, df):
    for i in lst:
        clean[i] = df.apply(lambda x: 1 if i in x else 0)

# Genre

In [156]:
genre_list = ["Action", "Adventure", "Animation", "Fantasy", "Science Fiction", "Romance", "TV Movie", "Crime", "Thriller", "Horror", "Drama", "Comedy", "Foreign", "Family", "Mystery", "War", "History", "Documentary",'Music',"Western"]

In [157]:
get_dummies(genre_list, clean.genres)

In [158]:
#clean.head()

# Top Actors

In [159]:
cast_list = list(clean.cast) 
#creating list to iterate over and get dummies

In [160]:
actors = pd.DataFrame(cast_list, columns=["Actor1", "Actor2", "Actor3"])
#creating dataframe with the top 3 actors per row

In [161]:
count1 = actors.Actor1.value_counts()

In [162]:
count1_df = pd.DataFrame(count1)

In [163]:
count1_df.reset_index()

Unnamed: 0,index,Actor1
0,Robert De Niro,30
1,Bruce Willis,30
2,Nicolas Cage,29
3,Johnny Depp,27
4,Denzel Washington,26
...,...,...
2045,Abbie Cornish,1
2046,Richard Tyson,1
2047,"""Scott Carrot Top Thompson""",1
2048,Marina Golbahari,1


In [164]:
top_actors = count1_df.query("Actor1 > 10") 
#actors that appear in more than 10 movies

In [165]:
top_actors.reset_index()

Unnamed: 0,index,Actor1
0,Robert De Niro,30
1,Bruce Willis,30
2,Nicolas Cage,29
3,Johnny Depp,27
4,Denzel Washington,26
...,...,...
64,Vin Diesel,11
65,Martin Lawrence,11
66,Seth Rogen,11
67,Cameron Diaz,11


In [166]:
actors_list = list(top_actors.index)
#just actors from 1st column because the ones in the other columns 
#made too few movies

In [167]:
get_dummies(actors_list, clean.cast)

In [168]:
clean.shape

(4722, 102)

# Top directors

In [169]:
fam_dir = clean.director.value_counts()

In [170]:
fam_dir_df = pd.DataFrame(fam_dir)

In [171]:
fam_dir_df.head() 
#counting movies done by each director

Unnamed: 0,director
Steven Spielberg,27
Woody Allen,21
Clint Eastwood,20
Martin Scorsese,20
Spike Lee,16


In [172]:
fam_dir_df.rename(columns = {"director":"count"}, inplace = True)

In [173]:
top_dir = fam_dir_df.query("count > 6") 
#directors with more than 6 movies

In [174]:
top_dir.shape

(97, 1)

In [175]:
top_dir.reset_index()

Unnamed: 0,index,count
0,Steven Spielberg,27
1,Woody Allen,21
2,Clint Eastwood,20
3,Martin Scorsese,20
4,Spike Lee,16
...,...,...
92,Gus Van Sant,7
93,Tim Story,7
94,Roger Donaldson,7
95,Malcolm D. Lee,7


In [176]:
director_list = list(top_dir.index)

In [177]:
#director_list

In [178]:
for i in director_list: 
    clean[i]=clean.director.apply(lambda x: 1 if i == x else 0)

In [179]:
clean.shape

(4722, 197)

In [180]:
clean.head()

Unnamed: 0,title,cast,genres,original_language,popularity,production_companies,production_countries,release_date,runtime,spoken_languages,...,James Cameron,Frank Coraci,Peter Segal,David Cronenberg,George Miller,Gus Van Sant,Tim Story,Roger Donaldson,Malcolm D. Lee,Terry Gilliam
0,Avatar,"[Sam Worthington, Zoe Saldana, Sigourney Weaver]","[Action, Adventure, Fantasy, Science Fiction]",en,150.437577,"[Ingenious Film Partners, Twentieth Century Fo...","[United States of America, United Kingdom]",2009-12-10,162.0,"[English, Español]",...,1,0,0,0,0,0,0,0,0,0
1,Pirates of the Caribbean: At World's End,"[Johnny Depp, Orlando Bloom, Keira Knightley]","[Adventure, Fantasy, Action]",en,139.082615,"[Walt Disney Pictures, Jerry Bruckheimer Films...",[United States of America],2007-05-19,169.0,[English],...,0,0,0,0,0,0,0,0,0,0
2,Spectre,"[Daniel Craig, Christoph Waltz, Léa Seydoux]","[Action, Adventure, Crime]",en,107.376788,"[Columbia Pictures, Danjaq, B24]","[United Kingdom, United States of America]",2015-10-26,148.0,"[Français, English, Español, Italiano, Deutsch]",...,0,0,0,0,0,0,0,0,0,0
3,The Dark Knight Rises,"[Christian Bale, Michael Caine, Gary Oldman]","[Action, Crime, Drama, Thriller]",en,112.31295,"[Legendary Pictures, Warner Bros., DC Entertai...",[United States of America],2012-07-16,165.0,[English],...,0,0,0,0,0,0,0,0,0,0
4,John Carter,"[Taylor Kitsch, Lynn Collins, Samantha Morton]","[Action, Adventure, Science Fiction]",en,43.926995,[Walt Disney Pictures],[United States of America],2012-03-07,132.0,[English],...,0,0,0,0,0,0,0,0,0,0


# Top production companies

In [181]:
clean.production_companies = clean.production_companies.apply(lambda x: x[:3])
clean.production_companies
#selecting top 3 companies

0       [Ingenious Film Partners, Twentieth Century Fo...
1       [Walt Disney Pictures, Jerry Bruckheimer Films...
2                        [Columbia Pictures, Danjaq, B24]
3       [Legendary Pictures, Warner Bros., DC Entertai...
4                                  [Walt Disney Pictures]
                              ...                        
4798                                  [Columbia Pictures]
4799                                                   []
4800    [Front Street Pictures, Muse Entertainment Ent...
4801                                                   []
4802         [rusty bear entertainment, lucky crow films]
Name: production_companies, Length: 4722, dtype: object

In [182]:
prod_list = list(clean.production_companies)

In [183]:
prod_comp = pd.DataFrame(prod_list, columns=["Prod1", "Prod2", "Prod3"])

In [184]:
prod1 = prod_comp.Prod1.value_counts()

In [185]:
prod1_df = pd.DataFrame(prod1)

In [186]:
count1 = prod1_df.query("Prod1 > 50") 
#production companies that did more than 50 movies

In [187]:
count1.reset_index()

Unnamed: 0,index,Prod1
0,,298
1,Paramount Pictures,281
2,Universal Pictures,260
3,Columbia Pictures,200
4,Twentieth Century Fox Film Corporation,177
5,New Line Cinema,157
6,Walt Disney Pictures,113
7,Miramax Films,87
8,United Artists,72
9,Village Roadshow Pictures,71


In [188]:
comp_list = list(count1.index)

In [189]:
#comp_list

In [190]:
get_dummies(comp_list, clean.production_companies)

In [191]:
clean.shape

(4722, 210)

In [192]:
clean.head()

Unnamed: 0,title,cast,genres,original_language,popularity,production_companies,production_countries,release_date,runtime,spoken_languages,...,Twentieth Century Fox Film Corporation,New Line Cinema,Walt Disney Pictures,Miramax Films,United Artists,Village Roadshow Pictures,Columbia Pictures Corporation,Warner Bros.,DreamWorks SKG,Fox Searchlight Pictures
0,Avatar,"[Sam Worthington, Zoe Saldana, Sigourney Weaver]","[Action, Adventure, Fantasy, Science Fiction]",en,150.437577,"[Ingenious Film Partners, Twentieth Century Fo...","[United States of America, United Kingdom]",2009-12-10,162.0,"[English, Español]",...,1,0,0,0,0,0,0,0,0,0
1,Pirates of the Caribbean: At World's End,"[Johnny Depp, Orlando Bloom, Keira Knightley]","[Adventure, Fantasy, Action]",en,139.082615,"[Walt Disney Pictures, Jerry Bruckheimer Films...",[United States of America],2007-05-19,169.0,[English],...,0,0,1,0,0,0,0,0,0,0
2,Spectre,"[Daniel Craig, Christoph Waltz, Léa Seydoux]","[Action, Adventure, Crime]",en,107.376788,"[Columbia Pictures, Danjaq, B24]","[United Kingdom, United States of America]",2015-10-26,148.0,"[Français, English, Español, Italiano, Deutsch]",...,0,0,0,0,0,0,0,0,0,0
3,The Dark Knight Rises,"[Christian Bale, Michael Caine, Gary Oldman]","[Action, Crime, Drama, Thriller]",en,112.31295,"[Legendary Pictures, Warner Bros., DC Entertai...",[United States of America],2012-07-16,165.0,[English],...,0,0,0,0,0,0,0,1,0,0
4,John Carter,"[Taylor Kitsch, Lynn Collins, Samantha Morton]","[Action, Adventure, Science Fiction]",en,43.926995,[Walt Disney Pictures],[United States of America],2012-03-07,132.0,[English],...,0,0,1,0,0,0,0,0,0,0


# Top production countries

In [193]:
#clean.production_countries = clean.production_countries.apply(lambda x: x[:3])
#clean.production_countries
#selecting top 3 countries

In [194]:
#count_list = list(clean.production_countries)

In [195]:
#countries = pd.DataFrame(count_list, columns=["Count1", "Count2", "Count3"])


In [196]:
#count1 = countries.Count1.value_counts()

In [197]:
#count1_df = pd.DataFrame(count1)

In [198]:
#top_countries = count1_df.query("Count1 > 20") #more than 10 movies

In [199]:
#top_countries.reset_index()

In [200]:
#count_list = list(top_countries.index)

In [201]:
#count_list

In [202]:
#get_dummies(count_list, clean.production_countries)

In [203]:
#clean.head()

In [204]:
#clean.shape

# Original language

In [205]:
#lang = list(clean.original_language.unique())

In [206]:
#good = ["en", "fr", "es"]
#others = []
#for i in lang:
#    if i not in good: 
#        others.append(i)
#print(others)

In [207]:
#for i in good: 
#    clean[i]=clean.original_language.apply(lambda x: 1 if i in x else 0)

In [208]:
#get_dummies(good, clean.original_language)

In [209]:
#clean.head()

# Correlation matrix: Revenue

In [210]:
clean_drop = clean.drop(columns = ["title", "cast", "genres", "original_language", "production_companies", "production_countries", "spoken_languages", "director"])

In [211]:
clean_drop.head()

Unnamed: 0,popularity,release_date,runtime,vote_average,vote_count,Action,Adventure,Animation,Fantasy,Science Fiction,...,Twentieth Century Fox Film Corporation,New Line Cinema,Walt Disney Pictures,Miramax Films,United Artists,Village Roadshow Pictures,Columbia Pictures Corporation,Warner Bros.,DreamWorks SKG,Fox Searchlight Pictures
0,150.437577,2009-12-10,162.0,7.2,11800.0,1,1,0,1,1,...,1,0,0,0,0,0,0,0,0,0
1,139.082615,2007-05-19,169.0,6.9,4500.0,1,1,0,1,0,...,0,0,1,0,0,0,0,0,0,0
2,107.376788,2015-10-26,148.0,6.3,4466.0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,112.31295,2012-07-16,165.0,7.6,9106.0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,43.926995,2012-03-07,132.0,6.1,2124.0,1,1,0,0,1,...,0,0,1,0,0,0,0,0,0,0


In [212]:
data_vote = clean_drop.to_csv("data_vote.csv")

In [79]:
clean_drop.dtypes

budget                           float64
popularity                       float64
revenue                          float64
runtime                          float64
vote_average                     float64
                                  ...   
Miramax Films                      int64
United Artists                     int64
Columbia Pictures Corporation      int64
DreamWorks SKG                     int64
Warner Bros.                       int64
Length: 158, dtype: object

In [80]:
corr_matrix = clean_drop.corr()

In [81]:
corr_matrix

Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count,Action,Adventure,Animation,Fantasy,...,Columbia Pictures,Twentieth Century Fox Film Corporation,New Line Cinema,Walt Disney Pictures,Village Roadshow Pictures,Miramax Films,United Artists,Columbia Pictures Corporation,DreamWorks SKG,Warner Bros.
budget,1.000000,0.431744,0.705306,0.229712,-0.035757,0.539997,0.308384,0.414029,0.224938,0.271160,...,0.118528,0.066358,0.000990,0.219556,0.118455,-0.052167,-0.081444,0.016747,0.079653,0.112448
popularity,0.431744,1.000000,0.602122,0.182388,0.288189,0.749005,0.142519,0.230412,0.118494,0.131937,...,0.042859,0.052914,0.011075,0.096234,0.052298,-0.017755,-0.013549,-0.023620,0.016515,0.075031
revenue,0.705306,0.602122,1.000000,0.233236,0.188014,0.756143,0.187823,0.344038,0.210396,0.216217,...,0.062068,0.101060,0.017613,0.165154,0.051348,-0.036833,-0.043478,-0.011613,0.072980,0.067651
runtime,0.229712,0.182388,0.233236,1.000000,0.386199,0.258101,0.064185,0.071798,-0.227501,-0.020815,...,0.015120,0.004643,-0.017824,-0.060564,0.000038,0.015518,0.079807,0.019639,0.050656,0.122046
vote_average,-0.035757,0.288189,0.188014,0.386199,1.000000,0.380825,-0.134989,-0.046052,0.038478,-0.058236,...,-0.023453,-0.020136,-0.034940,0.009391,-0.036282,0.053616,0.080786,-0.015979,0.049319,0.026640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Miramax Films,-0.052167,-0.017755,-0.036833,0.015518,0.053616,-0.011541,-0.048459,-0.052364,-0.017660,-0.000711,...,-0.034017,-0.037329,-0.020644,-0.025497,-0.022153,1.000000,-0.020713,-0.024094,-0.021203,-0.039482
United Artists,-0.081444,-0.013549,-0.043478,0.079807,0.080786,-0.040163,0.018689,0.032441,-0.035380,-0.048976,...,-0.033231,-0.036466,-0.030518,-0.024908,-0.021641,-0.020713,1.000000,-0.023537,-0.020713,-0.029770
Columbia Pictures Corporation,0.016747,-0.023620,-0.011613,0.019639,-0.015979,-0.034823,-0.010512,0.006599,-0.032941,-0.000715,...,-0.029970,-0.034421,-0.026119,-0.028974,-0.012234,-0.024094,-0.023537,1.000000,-0.010600,-0.029631
DreamWorks SKG,0.079653,0.016515,0.072980,0.050656,0.049319,0.042748,-0.005105,0.028412,0.056568,-0.007772,...,-0.034017,-0.001190,-0.031240,-0.025497,-0.022153,-0.021203,-0.020713,-0.010600,1.000000,-0.022273


In [82]:
var = []
corr = []
for index,i in zip(corr_matrix.index,corr_matrix.revenue):
    if i > 0:
        var.append(index)
        corr.append(i)
        
#positive correlation with revenue
#print(var)
#print(corr)

In [83]:
var_df = pd.DataFrame(var)

In [84]:
var_df=var_df.reset_index()

In [85]:
corr_df = pd.DataFrame(corr)

In [86]:
corr_df=corr_df.reset_index()

In [87]:
positive_corr = pd.merge(var_df, corr_df, on= "index")

In [88]:
positive_corr.sort_values("0_y", ascending = False)[:40]

Unnamed: 0,index,0_x,0_y
2,2,revenue,1.0
5,5,vote_count,0.756143
0,0,budget,0.705306
1,1,popularity,0.602122
7,7,Adventure,0.344038
3,3,runtime,0.233236
9,9,Fantasy,0.216217
8,8,Animation,0.210396
11,11,Family,0.20033
4,4,vote_average,0.188014


In [89]:
neg = []
num = []
for index,i in zip(corr_matrix.index,corr_matrix.revenue):
    if i < 0:
        neg.append(index)
        num.append(i)
        
#negative correlation with revenue

In [90]:
neg_df = pd.DataFrame(neg)

In [91]:
neg_df=neg_df.reset_index()

In [92]:
num_df = pd.DataFrame(num)

In [93]:
num_df=num_df.reset_index()

In [94]:
negative = pd.merge(neg_df, num_df, on= "index")

In [95]:
negative.sort_values("0_y", ascending = True)[:40]

Unnamed: 0,index,0_x,0_y
4,4,Drama,-0.188931
3,3,Horror,-0.097411
1,1,Crime,-0.078212
0,0,Romance,-0.071793
10,10,Documentary,-0.055947
9,9,History,-0.050528
5,5,Comedy,-0.049899
49,49,United Artists,-0.043478
11,11,Music,-0.042806
12,12,Western,-0.039979


# Correlation matrix: Vote score

In [96]:
corr_matrix.vote_average

budget                          -0.035757
popularity                       0.288189
revenue                          0.188014
runtime                          0.386199
vote_average                     1.000000
                                   ...   
Miramax Films                    0.053616
United Artists                   0.080786
Columbia Pictures Corporation   -0.015979
DreamWorks SKG                   0.049319
Warner Bros.                     0.026640
Name: vote_average, Length: 158, dtype: float64

In [97]:
for index,i in zip(corr_matrix.index,corr_matrix.vote_average):
    print(index, i)

budget -0.03575690037883095
popularity 0.28818930054783004
revenue 0.18801390868089132
runtime 0.38619910497404547
vote_average 1.0
vote_count 0.380824854025463
Action -0.13498923343853766
Adventure -0.04605222873830193
Animation 0.03847848416348833
Fantasy -0.05823624262935562
Science Fiction -0.06856709241454051
Romance 0.02091136410567958
TV Movie nan
Crime 0.06793852068755642
Thriller -0.07905383553683552
Horror -0.16182303768061887
Drama 0.3020122268744479
Comedy -0.19592056582693645
Foreign -0.005189425708774689
Family -0.053075674706817746
Mystery 0.017703256735753795
War 0.109541555151602
History 0.12296613577227963
Documentary 0.06714277192414153
Music 0.0467274047412917
Western 0.05593807814863684
Bruce Willis -0.0037363067555374768
Nicolas Cage -0.03829897446708291
Johnny Depp 0.03284503336434744
Denzel Washington 0.04480230339880506
Robert De Niro 0.012147535465589446
Tom Cruise 0.031200685897540062
Tom Hanks 0.08405142216219605
Matt Damon 0.044287802532391875
Arnold Schwar

In [100]:
neg = []
num = []
for index,i in zip(corr_matrix.index,corr_matrix.vote_average):
    if i < 0:
        neg.append(index)
        num.append(i)

In [101]:
var = []
corr = []
for index,i in zip(corr_matrix.index,corr_matrix.vote_average):
    if i > 0:
        var.append(index)
        corr.append(i)

In [102]:
neg_df = pd.DataFrame(neg)
neg_df=neg_df.reset_index()
num_df = pd.DataFrame(num)
num_df=num_df.reset_index()
negative = pd.merge(neg_df, num_df, on= "index")
negative.sort_values("0_y", ascending = True)

Unnamed: 0,index,0_x,0_y
7,7,Comedy,-0.195921
6,6,Horror,-0.161823
1,1,Action,-0.134989
5,5,Thriller,-0.079054
4,4,Science Fiction,-0.068567
...,...,...,...
22,22,Liam Neeson,-0.003490
47,47,George Miller,-0.002269
37,37,Kevin Smith,-0.001521
34,34,Steven Soderbergh,-0.001018


In [104]:
var_df = pd.DataFrame(var)
var_df=var_df.reset_index()
corr_df = pd.DataFrame(corr)
corr_df=corr_df.reset_index()
positive = pd.merge(var_df, corr_df, on= "index")
positive.sort_values("0_y", ascending = False)

Unnamed: 0,index,0_x,0_y
3,3,vote_average,1.000000
2,2,runtime,0.386199
4,4,vote_count,0.380825
8,8,Drama,0.302012
0,0,popularity,0.288189
...,...,...,...
37,37,Michael Douglas,0.004245
21,21,George Clooney,0.003851
88,88,Universal Pictures,0.003340
38,38,Nicole Kidman,0.001078
