# Export MySQL Database for Tableau

## 🎛️ Update These Variables

- You may update the following variables:
    - `DB_NAME`: Then name of the database. (Most likely "movies").
    - `MYSQL_LOGIN`: the filepath to the json file with your mysql username and password. 
        - Note: if you have not saved your mysql credentials to a json file in your .secret folder yet, we strongly suggest you do so now. 
        - Change `USER_KEY` to be the correct key from your json file with your user name.
        - Change `PASSWORD_KEY` to be the the correct key from your json file with your password.
        
        
- (Optional) You can change where the csv file will be saved by changing the `folder` variable.

In [1]:
## UPDATE THESE VARIABLES TO MATCH YOUR OWN PC/DATABASE
# MySQL Database to export 
DB_NAME = "movie-dashboard"

# Json file with mysql login credentials
MYSQL_LOGIN = "/Users/codingdojo/.secret/mysql.json"


## Getting mysql server password
import json
with open(MYSQL_LOGIN) as f:
    login = json.load(f)

login.keys()

dict_keys(['user', 'password'])

In [2]:
## (Optional) - Change folder
folder = "Data-for-Tableau/"


USER_KEY = "user"
PASSWORD_KEY = "password"

In [3]:
######## CODE TO TEST LOGIN CREDENTIALS
import os, json
os.makedirs(folder, exist_ok=True)

with open(MYSQL_LOGIN) as f:
	login = json.load(f)

if (USER_KEY not in login):
    raise Exception(f"[!] The json file did not have a {USER_KEY} key.")
    
if (PASSWORD_KEY not in login):
    raise Exception(f"[!] The json file did not have a {PASSWORD_KEY} key.")
    

## Run All Below!

In [4]:
# !pip install pymysql

In [5]:
import pandas as pd
import os
import numpy as np

from sqlalchemy import create_engine
from sqlalchemy_utils import create_database, database_exists

import pymysql
pymysql.install_as_MySQLdb()

In [6]:
connection = f"mysql+pymysql://{login[USER_KEY]}:{login[PASSWORD_KEY]}@localhost/{DB_NAME}"
engine = create_engine(connection)

if database_exists(engine.url):
    print(f"[i] Database {DB_NAME} found.")
else:
    raise Exception(f'[!] Database {DB_NAME} does not exist.')

[i] Database movie-dashboard found.


In [7]:
q  = """SHOW TABLES;"""
tables = pd.read_sql(q, engine)
tables

Unnamed: 0,Tables_in_movie-dashboard
0,collections
1,genres
2,name_basics
3,name_characters
4,name_knownForTitles
5,name_professions
6,production_companies
7,title_basics
8,title_collection
9,title_crew


In [8]:
table_names = tables[f'Tables_in_{DB_NAME}'].to_list()
table_names

['collections',
 'genres',
 'name_basics',
 'name_characters',
 'name_knownForTitles',
 'name_professions',
 'production_companies',
 'title_basics',
 'title_collection',
 'title_crew',
 'title_genres',
 'title_principals',
 'title_production_company',
 'title_ratings',
 'tmdb']

In [9]:
# Empty containers for new filenames and error messages
errors = {}
new_files = []

dashes = '---'*25
print(dashes,f"    EXPORTING DATABASE ({DB_NAME}) to '{folder}'", 
      dashes, sep='\n')


# Loop through all tables to export
for table in table_names:
    
    try:
        ## Get all data for table and save to csv
        temp = pd.read_sql(f"SELECT * FROM {table}", engine )
        fname = folder+f"{table}.csv"
        temp.to_csv(fname,index=False)
        
        # Save filename and print message
        new_files.append(fname)
        print(f"  - Exported {table} to '{fname}'")

    except Exception as e:
        # Save error message
        errors[table] = e
        print(f"  - [!] Error with '{table}' table")
        

---------------------------------------------------------------------------
    EXPORTING DATABASE (movie-dashboard) to 'Data-for-Tableau/'
---------------------------------------------------------------------------
  - Exported collections to 'Data-for-Tableau/collections.csv'
  - Exported genres to 'Data-for-Tableau/genres.csv'
  - Exported name_basics to 'Data-for-Tableau/name_basics.csv'
  - Exported name_characters to 'Data-for-Tableau/name_characters.csv'
  - Exported name_knownForTitles to 'Data-for-Tableau/name_knownForTitles.csv'
  - Exported name_professions to 'Data-for-Tableau/name_professions.csv'
  - Exported production_companies to 'Data-for-Tableau/production_companies.csv'
  - Exported title_basics to 'Data-for-Tableau/title_basics.csv'
  - Exported title_collection to 'Data-for-Tableau/title_collection.csv'
  - Exported title_crew to 'Data-for-Tableau/title_crew.csv'
  - Exported title_genres to 'Data-for-Tableau/title_genres.csv'
  - Exported title_principals to 'Dat

### Errors

In [10]:
## if errors, print out details
if len(errors) > 0:
    print('\n\n[!] ERRORS FOUND DURING EXPORT:')
    for k, v in errors.keys():
        print(f"  - Error for table {k}:   {e}")
        
else:
    print('[i]  No errors. :-)')

[i]  No errors. :-)


### Final Preview

In [45]:
## Print preview of exported files.
for file in new_files:
    temp_df = pd.read_csv(file)
    
    
    ## Making an elipses row for concat preview
    elipses = pd.DataFrame([['...']*len(temp_df.columns)],
                       index=['...'], columns=temp_df.columns)

    print(dashes, f"[i] Preview of {file}:", dashes, sep='\n')
    display(pd.concat([temp_df.head(), elipses, temp_df.tail()]))

---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/collections.csv:
---------------------------------------------------------------------------


Unnamed: 0,collection_id,collection_name
0,0,#TemanTapiMenikah
1,1,... Cameras Collection
2,2,.hack Collection
3,3,10 giorni... - Collezione
4,4,100 Girls Collection
...,...,...
1938,1938,달마야 시리즈
1939,1939,독전 시리즈
1940,1940,동갑내기 과외하기 시리즈
1941,1941,반드시 크게 들을 것


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,Genre_Name,Genre_ID
0,Action,0
1,Adult,1
2,Adventure,2
3,Animation,3
4,Biography,4
...,...,...
22,Sport,22
23,Talk-Show,23
24,Thriller,24
25,War,25


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/name_basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,primaryName,birthYear,deathYear
0,nm0000005,Ingmar Bergman,1918,2007
1,nm0000018,Kirk Douglas,1916,2020
2,nm0000041,Akira Kurosawa,1910,1998
3,nm0000054,Marilyn Monroe,1926,1962
4,nm0000080,Orson Welles,1915,1985
...,...,...,...,...
138270,nm9993103,Brianna Temple,\N,\N
138271,nm9993197,Juanjo Braulio,\N,\N
138272,nm9993311,Sadegh Khoshhal,\N,\N
138273,nm9993398,Oliviero Del Papa,\N,\N


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/name_characters.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,characters
0,nm0000212,Kate McKay
1,nm0413168,Leopold
2,nm0000630,Stuart Besser
3,nm0005227,Charlie McKay
4,nm0815612,Viudo
...,...,...
420407,nm8383131,Restaurant Customer
420408,nm3766704,Ana
420409,nm0107165,Rostegui
420410,nm0266723,Consejero


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/name_knownForTitles.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,knownForTitles
0,nm0000005,tt0050976
1,nm0000005,tt0050986
2,nm0000005,tt0083922
3,nm0000005,tt0060827
4,nm0000018,tt0050825
...,...,...
450428,nm9993398,tt10941386
450429,nm9993398,tt10270592
450430,nm9993398,tt15662942
450431,nm9993494,tt19813764


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/name_professions.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,profession
0,nm0000005,writer
1,nm0000005,director
2,nm0000005,actor
3,nm0000018,actor
4,nm0000018,producer
...,...,...
359880,nm9993103,camera_department
359881,nm9993197,writer
359882,nm9993311,writer
359883,nm9993398,writer


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/production_companies.csv:
---------------------------------------------------------------------------


Unnamed: 0,company_id,company_name
0,0,# Andrea Sperling Productions
1,1,#Beardforce Films
2,2,#Sinning Works
3,3,#littlesecretfilm
4,4,(주)라인필름
...,...,...
39867,39867,타임스토리그룹
39868,39868,파파스필름
39869,39869,플랫폼픽쳐스
39870,39870,한맥문화


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
0,tt0035423,Kate & Leopold,2001.0,118
1,tt0062336,The Tango of the Widower and Its Distorting Mi...,2020.0,70
2,tt0069049,The Other Side of the Wind,2018.0,122
3,tt0088751,The Naked Monster,2005.0,100
4,tt0096056,Crime and Punishment,2002.0,126
...,...,...,...,...
118422,tt9915436,Vida em Movimento,2019.0,70
118423,tt9915872,The Last White Witch,2019.0,97
118424,tt9916170,The Rehearsal,2019.0,51
118425,tt9916190,Safeguard,2020.0,95


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_collection.csv:
---------------------------------------------------------------------------


Unnamed: 0,imdb_id,collection_id
0,tt0317219,274
1,tt0317919,963
2,tt0344854,131
3,tt0348150,1389
4,tt0362120,1256
...,...,...
3912,tt1133935,391
3913,tt2243192,1778
3914,tt3533690,1187
3915,tt3679304,1773


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_crew.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,role,nconst
0,tt0035423,director,nm0003506
1,tt0062336,director,nm0749914
2,tt0062336,director,nm0765384
3,tt0069049,director,nm0000080
4,tt0088751,director,nm0078540
...,...,...,...
330644,tt9916170,writer,nm6743460
330645,tt9916170,writer,nm3245789
330646,tt9916190,writer,nm7308376
330647,tt9916362,writer,nm1893148


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,Genre_ID
0,tt0035423,5
1,tt0035423,10
2,tt0035423,19
3,tt0062336,8
4,tt0069049,8
...,...,...
210959,tt9916190,0
210960,tt9916190,2
210961,tt9916190,24
210962,tt9916362,8


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_principals.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,ordering,nconst,category,job
0,tt0035423,10,nm0107463,editor,\N
1,tt0035423,1,nm0000212,actress,\N
2,tt0035423,2,nm0413168,actor,\N
3,tt0035423,3,nm0000630,actor,\N
4,tt0035423,4,nm0005227,actor,\N
...,...,...,...,...,...
967317,tt9916362,5,nm1893148,director,\N
967318,tt9916362,6,nm3471432,writer,screenplay by
967319,tt9916362,7,nm2970042,producer,executive producer
967320,tt9916362,8,nm4065853,producer,producer


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_production_company.csv:
---------------------------------------------------------------------------


Unnamed: 0,imdb_id,company_id
0,tt0204250,14402
1,tt0206634,36827
2,tt0206634,33356
3,tt0206634,16040
4,tt0244521,11845
...,...,...
106548,tt7098636,6068
106549,tt7098636,26073
106550,tt7098636,2167
106551,tt7637550,961


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_ratings.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,averageRating,numVotes
0,tt0035423,6.4,85981
1,tt0062336,6.4,164
2,tt0069049,6.7,7553
3,tt0088751,5.3,328
4,tt0096056,5.6,831
...,...,...,...
89677,tt9915436,7.4,5
89678,tt9915872,7.3,8
89679,tt9916170,7.0,7
89680,tt9916190,3.7,241


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/tmdb.csv:
---------------------------------------------------------------------------


Unnamed: 0,imdb_id,budget,revenue,certification,original_language,release_date,popularity,vote_average,vote_count
0,tt0035423,48000000.0,76019000.0,PG-13,en,2001-12-25,9.183,6.3,1112
1,tt0062336,0.0,0.0,,es,2020-02-21,1.4,5.3,3
2,tt0069049,12000000.0,0.0,R,en,2018-11-02,5.155,6.7,155
3,tt0088751,350000.0,0.0,,en,2005-04-22,2.19,3.4,5
4,tt0093119,7500000.0,0.0,,en,2020-02-17,9.226,4.4,14
...,...,...,...,...,...,...,...,...,...
78392,tt9914644,0.0,0.0,,en,2018-09-11,1.821,7.0,2
78393,tt9914942,0.0,0.0,,ca,2019-07-12,2.047,7.8,6
78394,tt9915872,0.0,0.0,,en,,0.654,0.0,0
78395,tt9916190,0.0,0.0,,en,2020-09-07,3.252,6.5,2


In [46]:
# ## Making an elipses row for concat preview

# elipses = pd.DataFrame([['...']*len(temp_df.columns)],
#                        index=['...'], columns=temp_df.columns)
# elipses

In [47]:
# display(pd.concat([temp_df.head(), elipses, temp_df.tail()]))