# Project Overview
This project performs an Extract, Transform, and Load on a sample database using numerous Python libraries including pandas, sqlalchemy, and pymongo. I am using the Sakila SQL database, which I access by connecting to my SQL server and to an Atlas MongoDB cluster I created. I want to model the casting of an actor in a film for a dimensional data mart, organized in star schema. The fact_casting table I created lists the name of the actor and the details of the movies they were in. Therefore my dimension tables are dim_actor, dim_film, dim_film_actor, dim_category (used as one of the details of the film in fact_casting), and dim_film_category. The actors table is sourced from a .json file, and the film table is sourced from a .csv file, the rest come from the original sql file. I constructed fact_casting using both a SQL select statement as well as through Pandas. I also authored multiple test statements, including select-all statements for all dimension and fact tables, as well as a select statement which concatenates and lists all the movies each actor was in (one potential use case for my data mart).

## Import Statements

In [1]:
import os
import pymysql
import mysql.connector
import numpy
import pandas as pd
import json
import pymongo
from sqlalchemy import create_engine

## Connection variables for MySQL server + Mongo, source and destination databases

In [2]:
host_name = "localhost"
port = "3306"
user_id = "root"
pwd = "123Colby"

src_dbname = "sakila"
dst_dbname = "sakila_dw"

conn_str = {'local' : f"mongodb://localhost:27017/",
    'atlas' : 'mongodb+srv://ncc9kn:123Colby@cluster0.r70tagk.mongodb.net/?retryWrites=true&w=majority'
}

print(f"Local Connection String: {conn_str['local']}")
print(f"Atlas Connection String: {conn_str['atlas']}")

Local Connection String: mongodb://localhost:27017/
Atlas Connection String: mongodb+srv://ncc9kn:123Colby@cluster0.r70tagk.mongodb.net/?retryWrites=true&w=majority


## Get and set data into DBs

In [3]:
def get_dataframe(user_id, pwd, host_name, db_name, sql_query):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    dframe = pd.read_sql(sql_query, connection);
    connection.close()
    
    return dframe

def get_mongo_dataframe(connect_str, db_name, collection, query):
    '''Create a connection to MongoDB'''
    client = pymongo.MongoClient(connect_str)
    
    '''Query MongoDB, and fill a python list with documents to create a DataFrame'''
    db = client[db_name]
    dframe = pd.DataFrame(list(db[collection].find(query)))
    dframe.drop(['_id'], axis=1, inplace=True)
    client.close()
    return dframe

def set_dataframe(user_id, pwd, host_name, db_name, df, table_name, pk_column, db_operation):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    
    if db_operation == "insert":
        df.to_sql(table_name, con=connection, index=False, if_exists='replace')
        sqlEngine.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY ({pk_column});")
            
    elif db_operation == "update":
        df.to_sql(table_name, con=connection, index=False, if_exists='append')
    
    connection.close()

## Populate MongoDB with Source Data (JSON file)

In [4]:
client = pymongo.MongoClient(conn_str['atlas'])
db = client[src_dbname]

# Gets the path of the Current Working Directory for this Notebook, and then Appends the 'data' directory.
data_dir = os.path.join(os.getcwd(), 'data')

json_files = {'actor' : 'sakila_actor.json'}

for file in json_files:
    db.drop_collection(file)
    json_file = os.path.join(data_dir, json_files[file])
    with open(json_file, 'r') as openfile:
        json_object = json.load(openfile)
        file = db[file]
        result = file.insert_many(json_object)
        
client.close()       

## Create new destination data warehouse

In [5]:
pymysql_conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}"
sqlEngine = create_engine(pymysql_conn_str, pool_recycle=3600)

sqlEngine.execute(f"DROP DATABASE IF EXISTS `{dst_dbname}`;")
sqlEngine.execute(f"CREATE DATABASE `{dst_dbname}`;")
sqlEngine.execute(f"USE {dst_dbname};")

  sqlEngine.execute(f"DROP DATABASE IF EXISTS `{dst_dbname}`;")


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x11a027850>

## Creating dimension tables

In [6]:
query = {} #no where clause = select everything
collection = "actor"

df_actor = get_mongo_dataframe(conn_str['atlas'], src_dbname, collection, query)
df_actor.head(2)

Unnamed: 0,actor_id,first_name,last_name,last_update
0,1,PENELOPE,GUINESS,2006-02-15 04:34:33
1,2,NICK,WAHLBERG,2006-02-15 04:34:33


In [7]:
data_dir = os.path.join(os.getcwd(), 'data')
csv_file = os.path.join(data_dir, 'sakila_film.csv')


df_film = pd.read_csv(csv_file)
df_film.head(2)

Unnamed: 0,film_id,title,description,release_year,language_id,original_language_id,rental_duration,rental_rate,length,replacement_cost,rating,special_features,last_update
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,1,,6,0.99,86,20.99,PG,"Deleted Scenes,Behind the Scenes",2006-02-15 05:03:42
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,1,,3,4.99,48,12.99,G,"Trailers,Deleted Scenes",2006-02-15 05:03:42


In [8]:
sql_film_actor = "SELECT * FROM sakila.film_actor;"
df_film_actor = get_dataframe(user_id, pwd, host_name, src_dbname, sql_film_actor)
df_film_actor.head(2)

Unnamed: 0,actor_id,film_id,last_update
0,1,1,2006-02-15 05:05:03
1,1,23,2006-02-15 05:05:03


In [9]:
sql_category = "SELECT * FROM sakila.category;"
df_category = get_dataframe(user_id, pwd, host_name, src_dbname, sql_category)
df_category.head(20)

Unnamed: 0,category_id,name,last_update
0,1,Action,2006-02-15 04:46:27
1,2,Animation,2006-02-15 04:46:27
2,3,Children,2006-02-15 04:46:27
3,4,Classics,2006-02-15 04:46:27
4,5,Comedy,2006-02-15 04:46:27
5,6,Documentary,2006-02-15 04:46:27
6,7,Drama,2006-02-15 04:46:27
7,8,Family,2006-02-15 04:46:27
8,9,Foreign,2006-02-15 04:46:27
9,10,Games,2006-02-15 04:46:27


In [10]:
sql_film_category = "SELECT * FROM sakila.film_category;"
df_film_category = get_dataframe(user_id, pwd, host_name, src_dbname, sql_film_category)
df_film_category.head(20)

Unnamed: 0,film_id,category_id,last_update
0,1,6,2006-02-15 05:07:09
1,2,11,2006-02-15 05:07:09
2,3,6,2006-02-15 05:07:09
3,4,11,2006-02-15 05:07:09
4,5,8,2006-02-15 05:07:09
5,6,9,2006-02-15 05:07:09
6,7,5,2006-02-15 05:07:09
7,8,11,2006-02-15 05:07:09
8,9,11,2006-02-15 05:07:09
9,10,15,2006-02-15 05:07:09


## Transformations on dim tables, rename IDs to "key" for readability

In [11]:
drop_cols = ['last_update']
df_actor.drop(drop_cols, axis=1, inplace=True)
df_actor.rename(columns={"actor_id":"actor_key"}, inplace=True)

df_actor.head(2)

Unnamed: 0,actor_key,first_name,last_name
0,1,PENELOPE,GUINESS
1,2,NICK,WAHLBERG


In [12]:
drop_cols = ['language_id','original_language_id','rental_duration','rental_rate','replacement_cost','last_update']
df_film.drop(drop_cols, axis=1, inplace=True)
df_film.rename(columns={"film_id":"film_key"}, inplace=True)

df_film.head(2)

Unnamed: 0,film_key,title,description,release_year,length,rating,special_features
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,48,G,"Trailers,Deleted Scenes"


In [13]:
drop_cols = ['last_update']
df_film_actor.drop(drop_cols, axis=1, inplace=True)
df_film_actor.rename(columns={"actor_id":"actor_key", "film_id":"film_key"}, inplace=True)
df_film_actor.insert(0, 'casting_key', range(1, df_film_actor.shape[0]+1))

df_film_actor.head(20)

Unnamed: 0,casting_key,actor_key,film_key
0,1,1,1
1,2,1,23
2,3,1,25
3,4,1,106
4,5,1,140
5,6,1,166
6,7,1,277
7,8,1,361
8,9,1,438
9,10,1,499


In [14]:
drop_cols = ['last_update']
df_category.drop(drop_cols, axis=1, inplace=True)
df_category.rename(columns={"category_id":"category_key"}, inplace=True)

df_category.head(20)

Unnamed: 0,category_key,name
0,1,Action
1,2,Animation
2,3,Children
3,4,Classics
4,5,Comedy
5,6,Documentary
6,7,Drama
7,8,Family
8,9,Foreign
9,10,Games


In [15]:
drop_cols = ['last_update']
df_film_category.drop(drop_cols, axis=1, inplace=True)
df_film_category.rename(columns={"film_id":"film_key","category_id":"category_key"}, inplace=True)
df_film_category.insert(0, 'categorization_key', range(1, df_film_category.shape[0]+1))

df_film_category.head(20)

Unnamed: 0,categorization_key,film_key,category_key
0,1,1,6
1,2,2,11
2,3,3,6
3,4,4,11
4,5,5,8
5,6,6,9
6,7,7,5
7,8,8,11
8,9,9,11
9,10,10,15


## Load altered data frames into the new warehouse

In [16]:
db_operation = "insert"

tables = [('dim_actor', df_actor, 'actor_key'),
          ('dim_film', df_film, 'film_key'),
          ('dim_film_actor', df_film_actor, 'casting_key'),
          ('dim_category', df_category, 'category_key'),
          ('dim_film_category', df_film_category, 'categorization_key')]

In [17]:
for table_name, dataframe, primary_key in tables:
    set_dataframe(user_id, pwd, host_name, dst_dbname, dataframe, table_name, primary_key, db_operation)

## (1/2) SQL select statement to create fact_casting df

In [18]:
sql_query = """
SELECT
    fa.actor_id as actor_key,
    fa.film_id as film_key,
    a.first_name,
    a.last_name,
    f.title,
    f.description,
    c.name as category,
    f.release_year,
    f.length,
    f.rating,
    f.special_features as features
    FROM sakila.film_actor as fa
	INNER JOIN sakila.actor as a
	ON a.actor_id = fa.actor_id
    INNER JOIN sakila.film as f
    ON f.film_id = fa.film_id
    INNER JOIN sakila.film_category as fc
    ON f.film_id = fc.film_id
    INNER JOIN sakila.category as c
    ON fc.category_id = c.category_id;
"""
df_fact_casting = get_dataframe(user_id, pwd, host_name, src_dbname, sql_query)
df_fact_casting.head(1000)

Unnamed: 0,actor_key,film_key,first_name,last_name,title,description,category,release_year,length,rating,features
0,5,19,JOHNNY,LOLLOBRIGIDA,AMADEUS HOLY,A Emotional Display of a Pioneer And a Technic...,Action,2006,113,PG,"Commentaries,Deleted Scenes,Behind the Scenes"
1,27,19,JULIA,MCQUEEN,AMADEUS HOLY,A Emotional Display of a Pioneer And a Technic...,Action,2006,113,PG,"Commentaries,Deleted Scenes,Behind the Scenes"
2,37,19,VAL,BOLGER,AMADEUS HOLY,A Emotional Display of a Pioneer And a Technic...,Action,2006,113,PG,"Commentaries,Deleted Scenes,Behind the Scenes"
3,43,19,KIRK,JOVOVICH,AMADEUS HOLY,A Emotional Display of a Pioneer And a Technic...,Action,2006,113,PG,"Commentaries,Deleted Scenes,Behind the Scenes"
4,84,19,JAMES,PITT,AMADEUS HOLY,A Emotional Display of a Pioneer And a Technic...,Action,2006,113,PG,"Commentaries,Deleted Scenes,Behind the Scenes"
...,...,...,...,...,...,...,...,...,...,...,...
995,173,852,ALAN,DREYFUSS,STRANGELOVE DESIRE,A Awe-Inspiring Panorama of a Lumberjack And a...,Children,2006,103,NC-17,"Trailers,Commentaries,Deleted Scenes"
996,174,852,MICHAEL,BENING,STRANGELOVE DESIRE,A Awe-Inspiring Panorama of a Lumberjack And a...,Children,2006,103,NC-17,"Trailers,Commentaries,Deleted Scenes"
997,179,852,ED,GUINESS,STRANGELOVE DESIRE,A Awe-Inspiring Panorama of a Lumberjack And a...,Children,2006,103,NC-17,"Trailers,Commentaries,Deleted Scenes"
998,17,853,HELEN,VOIGHT,STRANGER STRANGERS,A Awe-Inspiring Yarn of a Womanizer And a Expl...,Children,2006,139,G,"Commentaries,Deleted Scenes"


## (2/2) Pandas method of creating fact_casting df. Merge df_actor into df_film_actor on actor_key, df_category into df_film_category on category_key, df_film_category into df_film_actor on film_key, and finally create df_fact_casting by merging df_film_actor and df_film on film key.

In [19]:
df_film_actor = pd.merge(df_film_actor, df_actor, on ='actor_key', how='inner')
df_film_actor.head(80)

Unnamed: 0,casting_key,actor_key,film_key,first_name,last_name
0,1,1,1,PENELOPE,GUINESS
1,2,1,23,PENELOPE,GUINESS
2,3,1,25,PENELOPE,GUINESS
3,4,1,106,PENELOPE,GUINESS
4,5,1,140,PENELOPE,GUINESS
...,...,...,...,...,...
75,76,4,463,JENNIFER,DAVIS
76,77,4,490,JENNIFER,DAVIS
77,78,4,616,JENNIFER,DAVIS
78,79,4,635,JENNIFER,DAVIS


In [20]:
df_film_category = pd.merge(df_film_category, df_category, on ='category_key', how='inner')
df_film_category.head(1000)

Unnamed: 0,categorization_key,film_key,category_key,name
0,1,1,6,Documentary
1,3,3,6,Documentary
2,40,40,6,Documentary
3,58,58,6,Documentary
4,62,62,6,Documentary
...,...,...,...,...
995,928,928,3,Children
996,955,955,3,Children
997,959,959,3,Children
998,993,993,3,Children


In [21]:
df_film_actor = pd.merge(df_film_actor, df_film_category, on ='film_key', how='inner')
df_film_actor.rename(columns = {"name":"genre"}, inplace=True)
df_film_actor.head(1000)

Unnamed: 0,casting_key,actor_key,film_key,first_name,last_name,categorization_key,category_key,genre
0,1,1,1,PENELOPE,GUINESS,1,6,Documentary
1,213,10,1,CHRISTIAN,GABLE,1,6,Documentary
2,490,20,1,LUCILLE,TRACY,1,6,Documentary
3,781,30,1,SANDRA,PECK,1,6,Documentary
4,1022,40,1,JOHNNY,CAGE,1,6,Documentary
...,...,...,...,...,...,...,...,...
995,3642,133,806,RICHARD,PENN,806,13,New
996,3716,136,806,ED,MANSFIELD,806,13,New
997,5191,190,806,AUDREY,BAILEY,806,13,New
998,5418,198,806,MARY,KEITEL,806,13,New


In [22]:
df_fact_casting = pd.merge(df_film_actor, df_film, on = 'film_key', how='inner')
df_fact_casting.rename(columns = {"special_features":"features"}, inplace=True)
df_fact_casting.head(80)

Unnamed: 0,casting_key,actor_key,film_key,first_name,last_name,categorization_key,category_key,genre,title,description,release_year,length,rating,features
0,1,1,1,PENELOPE,GUINESS,1,6,Documentary,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
1,213,10,1,CHRISTIAN,GABLE,1,6,Documentary,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
2,490,20,1,LUCILLE,TRACY,1,6,Documentary,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
3,781,30,1,SANDRA,PECK,1,6,Documentary,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
4,1022,40,1,JOHNNY,CAGE,1,6,Documentary,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,132,6,605,BETTE,NICHOLSON,605,9,Foreign,MULHOLLAND BEAST,A Awe-Inspiring Display of a Husband And a Squ...,2006,157,PG,"Trailers,Deleted Scenes,Behind the Scenes"
76,1125,43,605,KIRK,JOVOVICH,605,9,Foreign,MULHOLLAND BEAST,A Awe-Inspiring Display of a Husband And a Squ...,2006,157,PG,"Trailers,Deleted Scenes,Behind the Scenes"
77,1501,57,605,JUDE,CRUISE,605,9,Foreign,MULHOLLAND BEAST,A Awe-Inspiring Display of a Husband And a Squ...,2006,157,PG,"Trailers,Deleted Scenes,Behind the Scenes"
78,2744,102,605,WALTER,TORN,605,9,Foreign,MULHOLLAND BEAST,A Awe-Inspiring Display of a Husband And a Squ...,2006,157,PG,"Trailers,Deleted Scenes,Behind the Scenes"


## Final transformations on df_fact_casting

In [23]:
#drop unimportant columns
drop_columns = ['categorization_key', 'category_key']
df_fact_casting.drop(drop_columns, axis=1, inplace=True)

#reorder column order
ordered_columns = ['casting_key', 'actor_key', 'film_key', 'last_name', 'first_name', 'title', 'description', 'genre', 'rating', 'release_year', 'length', 'features']
df_fact_casting = df_fact_casting[ordered_columns]
df_fact_casting.head(1000)

Unnamed: 0,casting_key,actor_key,film_key,last_name,first_name,title,description,genre,rating,release_year,length,features
0,1,1,1,GUINESS,PENELOPE,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
1,213,10,1,GABLE,CHRISTIAN,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
2,490,20,1,TRACY,LUCILLE,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
3,781,30,1,PECK,SANDRA,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
4,1022,40,1,CAGE,JOHNNY,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
...,...,...,...,...,...,...,...,...,...,...,...,...
995,3642,133,806,PENN,RICHARD,SLEEPY JAPANESE,A Emotional Epistle of a Moose And a Composer ...,New,PG,2006,137,"Trailers,Commentaries,Deleted Scenes"
996,3716,136,806,MANSFIELD,ED,SLEEPY JAPANESE,A Emotional Epistle of a Moose And a Composer ...,New,PG,2006,137,"Trailers,Commentaries,Deleted Scenes"
997,5191,190,806,BAILEY,AUDREY,SLEEPY JAPANESE,A Emotional Epistle of a Moose And a Composer ...,New,PG,2006,137,"Trailers,Commentaries,Deleted Scenes"
998,5418,198,806,KEITEL,MARY,SLEEPY JAPANESE,A Emotional Epistle of a Moose And a Composer ...,New,PG,2006,137,"Trailers,Commentaries,Deleted Scenes"


## Write fact_casting table into sakila.dw

In [24]:
table_name = 'fact_casting'
primary_key = 'casting_key'
db_operation = 'insert'

set_dataframe(user_id, pwd, host_name, dst_dbname, df_fact_casting, table_name, primary_key, db_operation)

# Test Statements

## List all movies an actor has been in

In [25]:
sql_test = """
    SELECT dim_actor.last_name, GROUP_CONCAT(dim_film.title SEPARATOR ', ') AS movies
    FROM fact_casting
    JOIN dim_actor ON fact_casting.actor_key = dim_actor.actor_key
    JOIN dim_film ON fact_casting.film_key = dim_film.film_key
    GROUP BY dim_actor.actor_key;
"""

df_test = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)

In [26]:
df_test.head(80)

Unnamed: 0,last_name,movies
0,GUINESS,"ACADEMY DINOSAUR, ANACONDA CONFESSIONS, ANGELS..."
1,WAHLBERG,"ADAPTATION HOLES, APACHE DIVINE, BABY HALL, BU..."
2,CHASE,"ALONE TRIP, ARMY FLINTSTONES, ARTIST COLDBLOOD..."
3,DAVIS,"ANACONDA CONFESSIONS, ANGELS LIFE, BAREFOOT MA..."
4,LOLLOBRIGIDA,"AMADEUS HOLY, BANGER PINOCCHIO, BONNIE HOLOCAU..."
...,...,...
75,ASTAIRE,"BEAST HUNCHBACK, BENEATH RUSH, BETRAYED REAR, ..."
76,MCCONAUGHEY,"ALI FOREVER, AMISTAD MIDSUMMER, ARMY FLINTSTON..."
77,SINATRA,"BOOGIE AMELIE, DOGMA FAMILY, DUDE BLINDNESS, D..."
78,HOFFMAN,"APOCALYPSE FLAMINGOS, APOLLO TEEN, ARMY FLINTS..."


## SELECT-all statements to print dim tables and fact_casting

In [27]:
sql_test = """
SELECT `dim_actor`.`actor_key`,
    `dim_actor`.`first_name`,
    `dim_actor`.`last_name`
FROM `sakila_dw`.`dim_actor`;
"""

df_test_dim_actor = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_dim_actor.head()

Unnamed: 0,actor_key,first_name,last_name
0,1,PENELOPE,GUINESS
1,2,NICK,WAHLBERG
2,3,ED,CHASE
3,4,JENNIFER,DAVIS
4,5,JOHNNY,LOLLOBRIGIDA


In [28]:
sql_test = """
SELECT `dim_film`.`film_key`,
    `dim_film`.`title`,
    `dim_film`.`description`,
    `dim_film`.`release_year`,
    `dim_film`.`length`,
    `dim_film`.`rating`,
    `dim_film`.`special_features`
FROM `sakila_dw`.`dim_film`;
"""

df_test_dim_film = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_dim_film.head()

Unnamed: 0,film_key,title,description,release_year,length,rating,special_features
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,"Deleted Scenes,Behind the Scenes"
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,48,G,"Trailers,Deleted Scenes"
2,3,ADAPTATION HOLES,A Astounding Reflection of a Lumberjack And a ...,2006,50,NC-17,"Trailers,Deleted Scenes"
3,4,AFFAIR PREJUDICE,A Fanciful Documentary of a Frisbee And a Lumb...,2006,117,G,"Commentaries,Behind the Scenes"
4,5,AFRICAN EGG,A Fast-Paced Documentary of a Pastry Chef And ...,2006,130,G,Deleted Scenes


In [29]:
sql_test = """
SELECT `dim_film_actor`.`casting_key`,
    `dim_film_actor`.`actor_key`,
    `dim_film_actor`.`film_key`
FROM `sakila_dw`.`dim_film_actor`;
"""

df_test_dim_film_actor = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_dim_film_actor.head()

Unnamed: 0,casting_key,actor_key,film_key
0,1,1,1
1,2,1,23
2,3,1,25
3,4,1,106
4,5,1,140


In [30]:
sql_test = """
SELECT `dim_category`.`category_key`,
    `dim_category`.`name`
FROM `sakila_dw`.`dim_category`;
"""

df_test_dim_category = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_dim_category.head()

Unnamed: 0,category_key,name
0,1,Action
1,2,Animation
2,3,Children
3,4,Classics
4,5,Comedy


In [31]:
sql_test = """
SELECT `dim_film_category`.`categorization_key`,
    `dim_film_category`.`film_key`,
    `dim_film_category`.`category_key`
FROM `sakila_dw`.`dim_film_category`;
"""

df_test_dim_film_category = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_dim_film_category.head()

Unnamed: 0,categorization_key,film_key,category_key
0,1,1,6
1,2,2,11
2,3,3,6
3,4,4,11
4,5,5,8


In [32]:
sql_test = """
SELECT `fact_casting`.`casting_key`,
    `fact_casting`.`actor_key`,
    `fact_casting`.`film_key`,
    `fact_casting`.`last_name`,
    `fact_casting`.`first_name`,
    `fact_casting`.`title`,
    `fact_casting`.`description`,
    `fact_casting`.`genre`,
    `fact_casting`.`rating`,
    `fact_casting`.`release_year`,
    `fact_casting`.`length`,
    `fact_casting`.`features`
FROM `sakila_dw`.`fact_casting`;
"""

df_test_fact_casting = get_dataframe(user_id, pwd, host_name, dst_dbname, sql_test)
df_test_fact_casting.head()

Unnamed: 0,casting_key,actor_key,film_key,last_name,first_name,title,description,genre,rating,release_year,length,features
0,1,1,1,GUINESS,PENELOPE,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,Documentary,PG,2006,86,"Deleted Scenes,Behind the Scenes"
1,2,1,23,GUINESS,PENELOPE,ANACONDA CONFESSIONS,A Lacklusture Display of a Dentist And a Denti...,Animation,R,2006,92,"Trailers,Deleted Scenes"
2,3,1,25,GUINESS,PENELOPE,ANGELS LIFE,A Thoughtful Display of a Woman And a Astronau...,New,G,2006,74,Trailers
3,4,1,106,GUINESS,PENELOPE,BULWORTH COMMANDMENTS,A Amazing Display of a Mad Cow And a Pioneer w...,Games,G,2006,61,Trailers
4,5,1,140,GUINESS,PENELOPE,CHEAPER CLYDE,A Emotional Character Study of a Pioneer And a...,Sci-Fi,G,2006,87,"Trailers,Commentaries,Behind the Scenes"
