# Setup

In [None]:
# this mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


FOLDERNAME = 'cs229_proj/'


assert FOLDERNAME is not None, "[!] Enter the foldername."

# now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd drive/My\ Drive/$FOLDERNAME/

Mounted at /content/drive
/content/drive/My Drive/cs229_proj


In [None]:
!pwd
!ls

In [None]:
from IPython.display import Image
import json
import random
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math

import torch
from torch import nn

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.impute import SimpleImputer

# Load Files

In [None]:
movies_full_path = '/content/drive/My Drive/cs229_proj/movies_full.csv'
user_ratings_full_path = '/content/drive/My Drive/cs229_proj/user_ratings_full.csv'
movies_embeddings_full_path = '/content/drive/My Drive/cs229_proj/movies_embeddings_full.csv'
movies_svd_path = '/content/drive/My Drive/cs229_proj/movies_svd_full.csv'

movies_full_df = pd.read_csv(movies_full_path)
ratings_full_df = pd.read_csv(user_ratings_full_path)
movies_embeddings_df = pd.read_csv(movies_embeddings_full_path)
movies_svd_df = pd.read_csv(movies_svd_path)

In [None]:
print(movies_full_df.shape)
print(ratings_full_df.shape)
print(movies_embeddings_df.shape)
print(movies_svd_df.shape)

In [None]:
# print(movies_full_df.columns)

all_columns = ['movie_id', 'poster_path', 'title',
       'year', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
       'tmdb_budget', 'imdb_budget', 'tmdb_revenue', 'imdb_revenue',
       'tmdb_vote_average', 'tmdb_vote_count', 'imdb_vote_average',
       'imdb_vote_count', 'tmdb_popularity', 'tmdb_runtime', 'imdb_runtime',
       'main_genre', 'genres', 'director', 'writer', 'main_actor',
       'mpaa_rating', 'overview', 'tagline', 'keywords', 'release_date',
       'main_prod_company', 'production_companies', 'country',
       'production_countries', 'original_language', 'spoken_languages',
       'rt_info', 'critics_consensus', 'actors', 'tm_status', 'tm_rating',
       'tm_count', 'audience_status', 'audience_rating', 'audience_count',
       'tm_top_critics_count', 'tm_fresh_critics_count',
       'tm_rotten_critics_count', 'ebert_rating', 'boxd_vote_average']
print(len(all_columns))

In [None]:
embeddings_file_path = '/content/drive/My Drive/cs229_proj/embeddings.json'

embeddings_dict = {}
with open(embeddings_file_path, 'r') as fh:
    embeddings_dict = json.load(fh)

# Data Preprocessing

## Load Embeddings into Dataframe

In [None]:
resnet_ct = 1000
clip_ct = 512

In [None]:
movies_embeddings_full_df = movies_full_df.copy(True)
for i in range(1, resnet_ct + 1):
    movies_embeddings_full_df['resnet-' + str(i)] = 0.0
for i in range(1, clip_ct + 1):
    movies_embeddings_full_df['clip-' + str(i)] = 0.0

In [None]:
count = 0
for key in embeddings_dict.keys():
    resnet_lst = embeddings_dict[key]['resnet']
    clip_lst = embeddings_dict[key]['clip']
    for i in range(1, resnet_ct + 1):
        movies_embeddings_full_df.loc[movies_embeddings_full_df['movie_id'] == key, 'resnet-' + str(i)] = resnet_lst[i - 1]
    for i in range(1, clip_ct + 1):
        movies_embeddings_full_df.loc[movies_embeddings_full_df['movie_id'] == key, 'clip-' + str(i)] = clip_lst[i - 1]
    count += 1
    if count % 50 == 0:
        print(count)

50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200


In [None]:
movies_embeddings_full_df

Unnamed: 0,movie_id,poster_path,title,year,1,2,3,4,5,6,7,8,9,10,tmdb_budget,imdb_budget,tmdb_revenue,imdb_revenue,tmdb_vote_average,tmdb_vote_count,imdb_vote_average,imdb_vote_count,tmdb_popularity,tmdb_runtime,imdb_runtime,main_genre,genres,director,writer,main_actor,mpaa_rating,overview,tagline,keywords,release_date,main_prod_company,production_companies,country,production_countries,original_language,...,clip-473,clip-474,clip-475,clip-476,clip-477,clip-478,clip-479,clip-480,clip-481,clip-482,clip-483,clip-484,clip-485,clip-486,clip-487,clip-488,clip-489,clip-490,clip-491,clip-492,clip-493,clip-494,clip-495,clip-496,clip-497,clip-498,clip-499,clip-500,clip-501,clip-502,clip-503,clip-504,clip-505,clip-506,clip-507,clip-508,clip-509,clip-510,clip-511,clip-512
0,pulp-fiction,1,Pulp Fiction,1994,1660,3821,1626,10863,7688,51895,47635,192128,117437,313059,8000000.0,8000000.0,2.139288e+08,2.139288e+08,8.3,8428.0,8.9,1900000.0,121.463076,154.0,154.0,Crime,"[""Thriller"", ""Crime""]",Quentin Tarantino,Quentin Tarantino,John Travolta,R,"A burger-loving hit man, his philosophical par...",Just because you are a character doesn't mean ...,"[""transporter"", ""brothel"", ""drug dealer"", ""box...",1994-10-08,Miramax,"[""Miramax Films"", ""A Band Apart"", ""Jersey Films""]",United States,"[""United States of America""]",en,...,0.048368,-0.555454,-0.103602,0.129186,-0.028184,0.538109,-0.093995,-0.167798,-0.248025,-0.395872,0.239344,0.093756,-0.200428,0.361666,0.032376,-0.092,-0.291351,0.093222,0.229836,-0.179129,0.288823,0.416872,-0.228312,0.202375,0.659008,-0.239633,0.043003,-0.543179,-0.282646,0.09312,0.228052,-0.134189,0.144366,-0.082164,0.494361,0.502388,-0.065403,0.056428,-0.170295,0.265028
1,inception,1,Inception,2010,1250,3550,2063,13735,11410,71166,69034,232098,117643,245070,160000000.0,160000000.0,8.255328e+08,8.368370e+08,8.1,13752.0,8.8,2100000.0,167.583710,148.0,148.0,Action,"[""Action"", ""Thriller"", ""Science Fiction"", ""Mys...",Christopher Nolan,Christopher Nolan,Leonardo DiCaprio,PG-13,"Cobb, a skilled thief who commits corporate es...",Your mind is the scene of the crime.,"[""loss of lover"", ""dream"", ""kidnapping"", ""slee...",2010-07-14,Warner Bros.,"[""Legendary Pictures"", ""Warner Bros."", ""Syncopy""]",United States,"[""United Kingdom"", ""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,fight-club,1,Fight Club,1999,1069,2709,1501,9343,7963,50079,53178,191481,110863,240121,63000000.0,63000000.0,1.008538e+08,1.012097e+08,8.3,9413.0,8.8,1900000.0,146.757391,139.0,139.0,Drama,"[""Drama""]",David Fincher,Chuck Palahniuk,Brad Pitt,R,A ticking-time-bomb insomniac and a slippery s...,Mischief. Mayhem. Soap.,"[""support group"", ""dual identity"", ""nihilism"",...",1999-10-15,Fox 2000 Pictures,"[""Regency Enterprises"", ""Fox 2000 Pictures"", ""...",Germany,"[""Germany"", ""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,the-dark-knight,1,The Dark Knight,2008,757,1771,962,6094,5548,37917,40828,175350,125042,335098,185000000.0,185000000.0,1.004558e+09,1.005974e+09,8.2,12002.0,9.0,2400000.0,187.322927,152.0,152.0,Action,"[""Drama"", ""Action"", ""Crime"", ""Thriller""]",Christopher Nolan,Jonathan Nolan,Christian Bale,PG-13,Batman raises the stakes in his war on crime. ...,Why So Serious?,"[""dc comics"", ""crime fighter"", ""secret identit...",2008-07-16,Warner Bros.,"[""DC Comics"", ""Legendary Pictures"", ""Warner Br...",United States,"[""United Kingdom"", ""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,the-grand-budapest-hotel,1,The Grand Budapest Hotel,2014,624,2238,1081,8321,6965,47211,52375,182761,101769,173334,30000000.0,25000000.0,1.746003e+08,1.729458e+08,8.0,4519.0,8.1,734000.0,74.417456,99.0,99.0,Adventure,"[""Comedy"", ""Drama""]",Wes Anderson,Stefan Zweig,Ralph Fiennes,R,The Grand Budapest Hotel tells of a legendary ...,A perfect holiday without leaving home.,"[""hotel"", ""painting"", ""wartime"", ""gunfight"", ""...",2014-02-26,Fox Searchlight Pictures,"[""Fox Searchlight Pictures"", ""Scott Rudin Prod...",United States,"[""United Kingdom"", ""United States of America"",...",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2215,school-daze,100,School Daze,1988,22,52,96,404,669,1555,1440,1126,231,167,0.0,6500000.0,0.000000e+00,1.454584e+07,6.2,26.0,6.0,6700.0,3.159574,121.0,121.0,Comedy,"[""Comedy"", ""Drama""]",Spike Lee,Spike Lee,Laurence Fishburne,R,In the South of the United States are taking p...,,"[""musical""]",1988-02-12,Columbia Pictures,"[""Columbia Pictures"", ""40 Acres & A Mule Filmw...",United States,"[""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2216,trees-lounge,100,Trees Lounge,1996,3,24,46,166,350,1403,1867,1791,381,316,0.0,1300000.0,0.000000e+00,7.497410e+05,6.7,51.0,7.1,14000.0,3.443916,95.0,95.0,Comedy,"[""Comedy"", ""Drama""]",Steve Buscemi,Steve Buscemi,Steve Buscemi,R,"Tommy has lost his job, his love and his life....",,"[""bar"", ""alcoholism"", ""independent film"", ""dri...",1996-10-11,Addis Wechsler Pictures,[],United States,[],en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2217,kill-the-messenger,100,Kill the Messenger,2014,12,43,84,480,895,2703,2354,1204,185,112,5000000.0,,2.450846e+06,5.218973e+06,6.6,355.0,6.9,45000.0,21.343093,112.0,112.0,Biography,"[""Thriller"", ""Crime"", ""Drama"", ""Mystery""]",Michael Cuesta,Peter Landesman,Jeremy Renner,R,A reporter becomes the target of a vicious sme...,Can you keep a national secret?,"[""biography""]",2014-10-09,Sierra / Affinity,"[""The Combine"", ""Bluegrass Films""]",United States,"[""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2218,my-name-is-khan,100,My Name Is Khan,2010,104,149,128,434,599,1885,1930,2827,847,1278,12000000.0,12000000.0,4.234536e+07,4.234536e+07,7.7,232.0,8.0,101000.0,14.719886,145.0,165.0,Drama,"[""Drama"", ""Romance""]",Karan Johar,Shibani Bathija,Shah Rukh Khan,PG-13,"Rizwan Khan, a Muslim from the Borivali sectio...",I am Muslim and i am not a terrorist,"[""bollywood""]",2010-02-12,Dharma Productions,"[""Fox Searchlight Pictures"", ""Red Chillies Ent...",India,"[""India"", ""United States of America""]",en,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


## SVD of Embeddings into Dataframe (Dimensionality Reduction)

In [None]:
resnet_ct = 1000
clip_ct = 512

resnet_cols = ['resnet-' + str(i) for i in range(1, resnet_ct + 1)]
clip_cols = ['clip-' + str(i) for i in range(1, clip_ct + 1)]

In [None]:
resnet_embeddings = movies_embeddings_df[resnet_cols].to_numpy()
clip_embeddings = movies_embeddings_df[clip_cols].to_numpy()

In [None]:
print(resnet_embeddings.shape)
print(clip_embeddings.shape)

(2220, 1000)
(2220, 512)


In [None]:
# GOAL: 60%+ of variance explained by some principal components
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=25, n_iter=100, random_state=42) # going from 1000 components to n_components
resnet_embeddings_transformed = svd.fit_transform(resnet_embeddings)
print(resnet_embeddings_transformed)

# print(svd.explained_variance_ratio_) 
print(svd.explained_variance_ratio_.sum()) # It's recommended to have a explained_variance sum of at least 60%
# print(svd.singular_values_)  

[[ 52.28808902 -25.57044348   0.15399569 ...   4.08556302  12.36022076
    5.58902851]
 [ 43.04803563  35.18070113 -14.85086376 ...   0.21128375   5.58805193
   -4.04673102]
 [ 69.41883401 -37.20061374   3.60528657 ...   3.03088635   7.0809814
    1.43086712]
 ...
 [ 65.70805298 -12.96662221  -9.50243983 ...  -2.85175954  -8.43684706
  -13.91063351]
 [ 50.52344672   1.33055999   7.51365911 ...   3.91074325   6.10079248
   -1.47419657]
 [ 45.97288235  10.74540617  23.20759677 ...  -1.56207745   7.48384752
    1.74138037]]
0.6691681198326397


In [None]:
svd2 = TruncatedSVD(n_components=100, n_iter=100, random_state=42) # going from 512 components to n_components
clip_embeddings_transformed = svd2.fit_transform(clip_embeddings)
print(clip_embeddings_transformed)

# print(svd2.explained_variance_ratio_) 
print(svd2.explained_variance_ratio_.sum()) # It's recommended to have a explained_variance sum of at least 60%
# print(svd2.singular_values_)  

[[ 8.26362533  1.69392484  0.96080328 ... -0.2726456  -0.39446704
   0.05506076]
 [ 6.91250991 -0.46618331 -0.24693094 ...  0.42217368  0.21047207
   0.01120105]
 [ 8.07872654  1.12909548 -0.46852469 ... -0.52249784 -0.115079
  -0.4029122 ]
 ...
 [ 6.48693263 -0.21836686  0.04745073 ...  0.36355663  0.4676413
  -0.1576065 ]
 [ 6.45334838  1.03573776  0.78064658 ...  0.35844601 -0.22555962
   0.09843524]
 [ 4.51857376  0.52433771  0.45147635 ... -0.12204195  0.18556176
   0.34862041]]
0.6849507183922126


In [None]:
movies_svd_df = movies_full_df.copy(True)
for i in range(1, 26):
    movies_svd_df['resnet-svd-' + str(i)] = 0.0
for i in range(1, 101):
    movies_svd_df['clip-svd-' + str(i)] = 0.0

In [None]:
svd_keys = movies_svd_df['movie_id'].to_list()

In [None]:
count = 0
for j, key in enumerate(svd_keys):
    cur_resnet_svd = resnet_embeddings_transformed[j]
    cur_clip_svd = clip_embeddings_transformed[j]
    for i in range(25):
        movies_svd_df.loc[movies_svd_df['movie_id'] == key, 'resnet-svd-' + str(i + 1)] = cur_resnet_svd[i]
    for i in range(100):
        movies_svd_df.loc[movies_svd_df['movie_id'] == key, 'clip-svd-' + str(i + 1)] = cur_clip_svd[i]
    count += 1
    if count % 50 == 0:
        print(count)

50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200


In [None]:
# movies_svd_df.to_csv('movies_svd_full.csv', index=False)

In [None]:
movies_svd_df

Unnamed: 0,movie_id,poster_path,title,year,1,2,3,4,5,6,7,8,9,10,tmdb_budget,imdb_budget,tmdb_revenue,imdb_revenue,tmdb_vote_average,tmdb_vote_count,imdb_vote_average,imdb_vote_count,tmdb_popularity,tmdb_runtime,imdb_runtime,main_genre,genres,director,writer,main_actor,mpaa_rating,overview,tagline,keywords,release_date,main_prod_company,production_companies,country,production_countries,original_language,...,clip-svd-61,clip-svd-62,clip-svd-63,clip-svd-64,clip-svd-65,clip-svd-66,clip-svd-67,clip-svd-68,clip-svd-69,clip-svd-70,clip-svd-71,clip-svd-72,clip-svd-73,clip-svd-74,clip-svd-75,clip-svd-76,clip-svd-77,clip-svd-78,clip-svd-79,clip-svd-80,clip-svd-81,clip-svd-82,clip-svd-83,clip-svd-84,clip-svd-85,clip-svd-86,clip-svd-87,clip-svd-88,clip-svd-89,clip-svd-90,clip-svd-91,clip-svd-92,clip-svd-93,clip-svd-94,clip-svd-95,clip-svd-96,clip-svd-97,clip-svd-98,clip-svd-99,clip-svd-100
0,pulp-fiction,1,Pulp Fiction,1994,1660,3821,1626,10863,7688,51895,47635,192128,117437,313059,8000000.0,8000000.0,2.139288e+08,2.139288e+08,8.3,8428.0,8.9,1900000.0,121.463076,154.0,154.0,Crime,"[""Thriller"", ""Crime""]",Quentin Tarantino,Quentin Tarantino,John Travolta,R,"A burger-loving hit man, his philosophical par...",Just because you are a character doesn't mean ...,"[""transporter"", ""brothel"", ""drug dealer"", ""box...",1994-10-08,Miramax,"[""Miramax Films"", ""A Band Apart"", ""Jersey Films""]",United States,"[""United States of America""]",en,...,0.106207,0.046117,0.415922,-0.073585,0.545229,0.118201,0.634495,0.142964,-0.115012,0.219434,0.125325,0.080485,-0.429419,0.027908,0.073935,-0.304804,-0.288261,-0.200714,0.208492,0.779725,-0.800836,0.034438,0.288412,0.229675,-0.335248,-0.288394,0.245033,0.511130,0.520081,-0.349292,0.365304,0.132131,-0.140956,-0.362418,-0.551428,-0.002636,-0.274965,-0.272646,-0.394467,0.055061
1,inception,1,Inception,2010,1250,3550,2063,13735,11410,71166,69034,232098,117643,245070,160000000.0,160000000.0,8.255328e+08,8.368370e+08,8.1,13752.0,8.8,2100000.0,167.583710,148.0,148.0,Action,"[""Action"", ""Thriller"", ""Science Fiction"", ""Mys...",Christopher Nolan,Christopher Nolan,Leonardo DiCaprio,PG-13,"Cobb, a skilled thief who commits corporate es...",Your mind is the scene of the crime.,"[""loss of lover"", ""dream"", ""kidnapping"", ""slee...",2010-07-14,Warner Bros.,"[""Legendary Pictures"", ""Warner Bros."", ""Syncopy""]",United States,"[""United Kingdom"", ""United States of America""]",en,...,-0.119137,0.262049,0.287175,0.012482,-0.082011,-0.160348,0.578488,0.387897,0.001924,-0.520276,-0.217770,0.289944,0.267479,-0.461354,0.341671,-0.234852,-1.557203,-0.340024,0.193612,-0.766906,-0.100503,-0.485344,0.012208,0.282387,-0.694765,0.697771,0.398297,-0.153925,0.667748,0.260768,0.498085,0.447739,-0.283164,-0.207072,0.168721,0.372364,-0.018718,0.422174,0.210472,0.011201
2,fight-club,1,Fight Club,1999,1069,2709,1501,9343,7963,50079,53178,191481,110863,240121,63000000.0,63000000.0,1.008538e+08,1.012097e+08,8.3,9413.0,8.8,1900000.0,146.757391,139.0,139.0,Drama,"[""Drama""]",David Fincher,Chuck Palahniuk,Brad Pitt,R,A ticking-time-bomb insomniac and a slippery s...,Mischief. Mayhem. Soap.,"[""support group"", ""dual identity"", ""nihilism"",...",1999-10-15,Fox 2000 Pictures,"[""Regency Enterprises"", ""Fox 2000 Pictures"", ""...",Germany,"[""Germany"", ""United States of America""]",en,...,0.178715,-0.074550,0.667895,-0.118228,0.573886,-0.159035,-0.000485,-0.154453,0.524943,-0.093771,0.257389,0.012262,-0.038924,-0.116921,-0.310949,-0.139076,0.089106,0.052424,-0.306406,-0.185601,-0.367458,0.308373,0.067917,0.131446,0.096693,0.273524,0.363829,0.421742,0.080194,-0.370569,0.086160,-0.030020,0.721685,0.222751,-0.269235,0.597388,0.142464,-0.522498,-0.115079,-0.402912
3,the-dark-knight,1,The Dark Knight,2008,757,1771,962,6094,5548,37917,40828,175350,125042,335098,185000000.0,185000000.0,1.004558e+09,1.005974e+09,8.2,12002.0,9.0,2400000.0,187.322927,152.0,152.0,Action,"[""Drama"", ""Action"", ""Crime"", ""Thriller""]",Christopher Nolan,Jonathan Nolan,Christian Bale,PG-13,Batman raises the stakes in his war on crime. ...,Why So Serious?,"[""dc comics"", ""crime fighter"", ""secret identit...",2008-07-16,Warner Bros.,"[""DC Comics"", ""Legendary Pictures"", ""Warner Br...",United States,"[""United Kingdom"", ""United States of America""]",en,...,0.131434,-0.313347,0.232748,0.064601,-0.004624,0.003423,0.204058,-0.509620,-0.255689,0.588832,-0.184298,0.010108,0.027666,-0.197460,-0.455834,-0.065865,0.051267,0.000701,-0.033666,-0.312280,0.322554,-0.301058,-0.153141,-0.039826,-0.378111,-0.119824,0.019732,-0.196616,-0.062102,0.216959,0.438930,0.545922,0.374288,0.604819,-0.065131,0.177041,0.411836,0.147754,-0.001075,-0.153692
4,the-grand-budapest-hotel,1,The Grand Budapest Hotel,2014,624,2238,1081,8321,6965,47211,52375,182761,101769,173334,30000000.0,25000000.0,1.746003e+08,1.729458e+08,8.0,4519.0,8.1,734000.0,74.417456,99.0,99.0,Adventure,"[""Comedy"", ""Drama""]",Wes Anderson,Stefan Zweig,Ralph Fiennes,R,The Grand Budapest Hotel tells of a legendary ...,A perfect holiday without leaving home.,"[""hotel"", ""painting"", ""wartime"", ""gunfight"", ""...",2014-02-26,Fox Searchlight Pictures,"[""Fox Searchlight Pictures"", ""Scott Rudin Prod...",United States,"[""United Kingdom"", ""United States of America"",...",en,...,0.088443,-0.140682,-0.185224,-0.088992,0.036646,0.088285,-0.479279,0.509677,-0.323339,0.000741,0.315331,0.253137,-0.277143,-0.571560,-0.026098,-0.040029,0.515814,-0.371983,0.567698,0.702346,0.156191,-0.266082,-0.048418,-0.618173,-0.008863,0.590116,-0.132343,-0.567959,0.940719,-0.809299,0.364001,0.244867,0.536509,-0.180361,0.300403,0.155534,-1.017678,-0.397475,0.009754,0.694310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2215,school-daze,100,School Daze,1988,22,52,96,404,669,1555,1440,1126,231,167,0.0,6500000.0,0.000000e+00,1.454584e+07,6.2,26.0,6.0,6700.0,3.159574,121.0,121.0,Comedy,"[""Comedy"", ""Drama""]",Spike Lee,Spike Lee,Laurence Fishburne,R,In the South of the United States are taking p...,,"[""musical""]",1988-02-12,Columbia Pictures,"[""Columbia Pictures"", ""40 Acres & A Mule Filmw...",United States,"[""United States of America""]",en,...,-0.282994,0.458820,-0.034171,-0.526553,0.493823,0.021989,-0.640333,-0.380053,-0.629586,0.511338,0.126358,0.348108,-0.300216,-0.390483,0.003360,-0.396817,0.537229,0.676412,-0.527100,0.186221,0.537350,0.286259,-0.242455,-0.232045,-0.686460,0.778026,-0.583061,0.828617,-0.070537,0.439831,0.048003,0.201090,-0.131657,-0.516819,-0.253814,-0.677629,-0.001575,-0.145851,0.814993,0.018891
2216,trees-lounge,100,Trees Lounge,1996,3,24,46,166,350,1403,1867,1791,381,316,0.0,1300000.0,0.000000e+00,7.497410e+05,6.7,51.0,7.1,14000.0,3.443916,95.0,95.0,Comedy,"[""Comedy"", ""Drama""]",Steve Buscemi,Steve Buscemi,Steve Buscemi,R,"Tommy has lost his job, his love and his life....",,"[""bar"", ""alcoholism"", ""independent film"", ""dri...",1996-10-11,Addis Wechsler Pictures,[],United States,[],en,...,-0.556028,0.040841,0.219007,0.131967,-0.045977,-0.204693,0.536678,0.153923,0.026530,-0.170860,0.142577,-0.402615,-1.457534,0.148657,-0.321875,0.303648,-0.327490,0.019046,-0.547699,0.442569,0.889082,-0.206024,0.659111,0.187016,-0.159576,-0.511253,-0.071060,0.732909,-0.493915,0.181119,-0.182171,0.219999,0.810385,-0.151117,0.308403,0.335515,0.290602,0.087276,0.262740,0.075948
2217,kill-the-messenger,100,Kill the Messenger,2014,12,43,84,480,895,2703,2354,1204,185,112,5000000.0,,2.450846e+06,5.218973e+06,6.6,355.0,6.9,45000.0,21.343093,112.0,112.0,Biography,"[""Thriller"", ""Crime"", ""Drama"", ""Mystery""]",Michael Cuesta,Peter Landesman,Jeremy Renner,R,A reporter becomes the target of a vicious sme...,Can you keep a national secret?,"[""biography""]",2014-10-09,Sierra / Affinity,"[""The Combine"", ""Bluegrass Films""]",United States,"[""United States of America""]",en,...,0.553700,-0.806222,-0.187884,0.669934,-0.164539,-0.103785,0.516265,-0.535839,0.081190,0.493245,-1.199746,-0.059159,-0.133984,0.027199,-0.115578,-0.065024,-0.131906,0.635331,-0.285237,-0.298702,-0.068700,0.493407,-0.212016,-0.396965,0.535082,0.586959,0.345196,0.298150,-0.573393,-0.762601,-0.307736,-0.227932,-0.079085,0.107209,0.292304,-0.241922,0.378515,0.363557,0.467641,-0.157606
2218,my-name-is-khan,100,My Name Is Khan,2010,104,149,128,434,599,1885,1930,2827,847,1278,12000000.0,12000000.0,4.234536e+07,4.234536e+07,7.7,232.0,8.0,101000.0,14.719886,145.0,165.0,Drama,"[""Drama"", ""Romance""]",Karan Johar,Shibani Bathija,Shah Rukh Khan,PG-13,"Rizwan Khan, a Muslim from the Borivali sectio...",I am Muslim and i am not a terrorist,"[""bollywood""]",2010-02-12,Dharma Productions,"[""Fox Searchlight Pictures"", ""Red Chillies Ent...",India,"[""India"", ""United States of America""]",en,...,0.670209,0.556839,-0.333246,-0.216289,-0.121223,-1.144446,0.513237,0.698719,0.358761,-0.313985,0.558233,0.013145,-0.767739,0.182163,-0.425216,-0.511711,0.299283,0.447937,0.369274,0.156356,-0.592719,-0.084202,0.429061,0.272175,-0.607780,0.015932,-0.099564,0.412872,-0.728951,-0.670670,-0.079187,0.697550,-0.263568,0.214034,-0.072620,-1.210892,-0.403599,0.358446,-0.225560,0.098435
