# Part 5 - Regression Model-Based Insights

## Loading the Data

In [2]:
%load_ext autoreload
%autoreload 2
import functions as pf

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_transformer, make_column_selector, ColumnTransformer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
## fixing random for lesson generation
np.random.seed(321)

##import statsmodels correctly
import statsmodels.api as sm

In [4]:
plt.style.use('fivethirtyeight')
pd.set_option('display.max_columns',100)

###  Finding & Loading Batches of Files with glob

In [5]:
import os
FOLDER = 'Data/'
file_list = sorted(os.listdir(FOLDER))
file_list

['combined_tmdb_api_data.csv.gz',
 'final_tmdb_data_2000.csv.gz',
 'final_tmdb_data_2001.csv.gz',
 'final_tmdb_data_2002.csv.gz',
 'final_tmdb_data_2003.csv.gz',
 'final_tmdb_data_2004.csv.gz',
 'final_tmdb_data_2005.csv.gz',
 'final_tmdb_data_2006.csv.gz',
 'final_tmdb_data_2007.csv.gz',
 'final_tmdb_data_2008.csv.gz',
 'final_tmdb_data_2009.csv.gz',
 'final_tmdb_data_2010.csv.gz',
 'final_tmdb_data_2011.csv.gz',
 'final_tmdb_data_2012.csv.gz',
 'final_tmdb_data_2013.csv.gz',
 'final_tmdb_data_2014.csv.gz',
 'final_tmdb_data_2015.csv.gz',
 'final_tmdb_data_2016.csv.gz',
 'final_tmdb_data_2017.csv.gz',
 'final_tmdb_data_2018.csv.gz',
 'final_tmdb_data_2019.csv.gz',
 'final_tmdb_data_2020.csv.gz',
 'final_tmdb_data_2021.csv.gz',
 'final_tmdb_data_2022.csv.gz',
 'title_akas_cleaned.csv.gz',
 'title_basics_cleaned.csv.gz',
 'title_ratings_cleaned.csv.gz']

In [6]:
df = pd.read_csv(FOLDER+'combined_tmdb_api_data.csv.gz')
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification,Release Year
0,False,/vMFs7nw6P0bIV1jDsQpxAieAVnH.jpg,,10000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10402, '...",,62127,tt0113026,en,The Fantasticks,Two rural teens sing and dance their way throu...,2.289,/hfO64mXz3DgUxkBVU7no2UWRP7x.jpg,"[{'id': 60, 'logo_path': '/2eqFolQI0NLL7ExZts5...","[{'iso_3166_1': 'US', 'name': 'United States o...",2000-09-22,0,86,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Try to remember the first time magic happened,The Fantasticks,False,5.500,22,,2000.0
1,False,,,0,"[{'id': 878, 'name': 'Science Fiction'}]",,110977,tt0113092,en,For the Cause,Earth is in a state of constant war and two co...,3.133,/h9bWO13nWRGZJo4XVPiElXyrRMU.jpg,"[{'id': 925, 'logo_path': '/dIb9hjXNOkgxu4kBWd...","[{'iso_3166_1': 'US', 'name': 'United States o...",2000-11-15,0,100,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The ultimate showdown on a forbidden planet.,For the Cause,False,5.100,8,,2000.0
2,False,,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,442869,tt0116391,hi,Gang,"After falling prey to underworld, four friends...",1.091,/yB5wRu4uyXXwZA3PEj8cITu0xt3.jpg,[],"[{'iso_3166_1': 'IN', 'name': 'India'}]",2000-04-14,0,152,"[{'english_name': 'Hindi', 'iso_639_1': 'hi', ...",Released,,Gang,False,0.000,0,,2000.0
3,False,/n4GJFGzsc7NinI1VeGDXIcQjtU2.jpg,,150000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",http://www.wkw-inthemoodforlove.com/,843,tt0118694,cn,花樣年華,"Hong Kong, 1962: Chow Mo-Wan and Su Li-Zhen mo...",22.892,/iYypPT4bhqXfq1b6EnmxvRt6b2Y.jpg,"[{'id': 539, 'logo_path': None, 'name': 'Block...","[{'iso_3166_1': 'HK', 'name': 'Hong Kong'}]",2000-09-29,12854953,99,"[{'english_name': 'Cantonese', 'iso_639_1': 'c...",Released,"Feel the heat, keep the feeling burning, let t...",In the Mood for Love,False,8.103,1948,PG,2000.0
4,False,,,0,"[{'id': 18, 'name': 'Drama'}]",,49511,tt0118852,en,Chinese Coffee,"When Harry Levine, an aging, unsuccessful Gree...",3.913,/nZGWnSuf1FIuzyEuMRZHHZWViAp.jpg,"[{'id': 1596, 'logo_path': None, 'name': 'Shoo...","[{'iso_3166_1': 'US', 'name': 'United States o...",2000-09-02,0,99,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,There's a fine line between friendship and bet...,Chinese Coffee,False,6.900,46,R,2000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62379,False,/8tyq1kXM3YQmu3obW6LxWm5TVRO.jpg,,0,"[{'id': 28, 'name': 'Action'}, {'id': 36, 'nam...",,605153,tt9851854,te,మేజర్,Based on the life of real-life Hero Major Sand...,19.029,/sJOfJuyQVZPwNQ8g21Qv0lojQhC.jpg,"[{'id': 69124, 'logo_path': None, 'name': 'G. ...","[{'iso_3166_1': 'IN', 'name': 'India'}]",2022-06-03,0,149,"[{'english_name': 'Telugu', 'iso_639_1': 'te',...",Released,Jaan Doonga Desh Nahi,Major,False,8.233,15,,2022.0
62380,False,,,0,"[{'id': 80, 'name': 'Crime'}]",,969840,tt9854058,en,Shadows,A young low-level drug dealer is reunited with...,0.600,/2HaAOGM1EmiSwsJrdq1RNhYehce.jpg,[],[],2022-05-13,0,101,[],Released,Family Is The Last Line Of Defense,Shadows,False,0.000,0,,2022.0
62381,False,,,0,"[{'id': 80, 'name': 'Crime'}, {'id': 10749, 'n...",,796955,tt9893158,en,Clowning,"With his girlfriend pregnant, Dante, a pacifis...",3.136,/xppIANX9DQoRYg3FlNCifDYuFwP.jpg,"[{'id': 109533, 'logo_path': '/xtQJYJg54jp5QVS...","[{'iso_3166_1': 'US', 'name': 'United States o...",2022-03-13,0,96,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Warm nights... Feels like death in the sand du...,Clowning,False,8.000,1,,2022.0
62382,False,/jX5XGqJUTzvpta2RjcX6pMZqxk5.jpg,,0,"[{'id': 53, 'name': 'Thriller'}, {'id': 80, 'n...",,606303,tt9893160,en,No Way Out,"Nick, a talented photographer who is new to Lo...",18.247,/df9pAqtYzM40llo9Joxy2ftqSrP.jpg,"[{'id': 13238, 'logo_path': '/kDNZz8imH866Mezx...","[{'iso_3166_1': 'US', 'name': 'United States o...",2022-08-12,0,89,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Find what you love and let it kill you.,No Way Out,False,3.000,3,,2022.0
