## Part 3

# movieStats


## Author:  Sheneka Allen

In [1]:
# Create a MySQL database for your customer.

# -Normalize the tables as best you can before adding them to your new database.
# -Keep all of the data from the TMDB API in 1 table together (even though it will not be perfectly normalized).  
# -Only keep the imdb_id, revenue, budget, and certification columns

In [2]:
# Transform Data

# -Normalize Genre:
# Convert the single string of genres from title basics into 2 new tables.

# *title_genres: with the columns:
# tconst
# genre_id

# *genres:
# genre_id
# genre_name

# -Discard unnecessary information:

# For the title basics table, drop the following columns:
# "original_title" (we will use the primary title column instead)
# "isAdult" ("Adult" will show up in the genres so this is redundant information).
# "titleType" (every row will be a movie).
# "genres" and other variants of genre (genre is now represented in the 2 new tables described above.
# Do not include the title_akas table in your SQL database.
# You have already filtered out the desired movies using this table and 
# the remaining data is mostly nulls and not of-interest to the stakeholder.

In [3]:
# MySQL Database Requirements

# -Use sqlalchemy with pandas to execute your SQL queries inside your notebook.

# -Create a new database on your MySQL server and call it "movies".

# -Make sure to have the following tables in your "movies" database:

# title_basics
# title_ratings
# title_genres
# genres
# tmdb_data

# -Make sure to set a Primary Key for each table that isn't a joiner table 
# (e.g. title_genres is a joiner table).

# After creating each table, show the first 5 rows of that table using a SQL query.

# Make sure to run the "SHOW TABLES" SQL query at the end of your notebook to show 
# that all required tables have been created.

In [4]:
# Import standard libraries and tools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
pymysql.install_as_MySQLdb()

#from urllib.parse import quote_plus as urlquote # must have for special char pwd
from sqlalchemy import create_engine
from sqlalchemy_utils import create_database, database_exists

import json

import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook

In [5]:
# read in tmdb api combined results data file
tmdb_results_combo = pd.read_csv('Data/tmdb_results_combined.csv.gz')
tmdb_results_combo.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0015414,0.0,,,0.0,"[{'id': 18, 'name': 'Drama'}]",,607290.0,es,La tierra de los toros,...,0.0,53.0,"[{'english_name': 'No Language', 'iso_639_1': ...",Released,,The Land of the Bulls,0.0,0.0,0.0,
2,tt0113026,0.0,/vMFs7nw6P0bIV1jDsQpxAieAVnH.jpg,,10000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10402, '...",,62127.0,en,The Fantasticks,...,0.0,86.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Try to remember the first time magic happened,The Fantasticks,0.0,5.5,22.0,
3,tt0113086,0.0,,,0.0,[],,612666.0,en,Florentino y el diablo,...,0.0,90.0,[],Released,,Florentino y el diablo,0.0,0.0,0.0,
4,tt0113092,0.0,,,0.0,"[{'id': 878, 'name': 'Science Fiction'}]",,110977.0,en,For the Cause,...,0.0,100.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The ultimate showdown on a forbidden planet.,For the Cause,0.0,4.4,7.0,


In [6]:
# read in title basics data file
title_basics = pd.read_csv('Data/title.basics.tsv.gz', compression='gzip', sep='\t', low_memory=False)
title_basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"
