# Deliverable #3

## Objective: 
Extract and transform the Kaggle metadata and MovieLens rating data, then convert the transformed data into separate DataFrames. Then, merge the Kaggle metadata DataFrame with the Wikipedia movies DataFrame to create the movies_df DataFrame. Finally, merge the MovieLens rating data DataFrame with the movies_df DataFrame to create the movies_with_ratings_df.

In [2]:
import json
import pandas as pd
import numpy as np

import re

from sqlalchemy import create_engine
# import psycopg2

# from config import db_password

import time

In [3]:
#  Add the clean movie function that takes in the argument, "movie".

def clean_movie(movie):
    movie = dict(movie) #create a non-destructive copy
    alt_titles = {}
    # combine alternate titles into one list
    for key in ['Also known as','Arabic','Cantonese','Chinese','French',
                'Hangul','Hebrew','Hepburn','Japanese','Literally',
                'Mandarin','McCune-Reischauer','Original title','Polish',
                'Revised Romanization','Romanized','Russian',
                'Simplified','Traditional','Yiddish']:
        if key in movie:
            alt_titles[key] = movie[key]
            movie.pop(key)
    if len(alt_titles) > 0:
        movie['alt_titles'] = alt_titles

    # merge column names - function within a function here
    def change_column_name(old_name, new_name):
        if old_name in movie:
            movie[new_name] = movie.pop(old_name)
    change_column_name('Adaptation by', 'Writer(s)')
    change_column_name('Country of origin', 'Country')
    change_column_name('Directed by', 'Director')
    change_column_name('Distributed by', 'Distributor')
    change_column_name('Edited by', 'Editor(s)')
    change_column_name('Length', 'Running time')
    change_column_name('Original release', 'Release date')
    change_column_name('Music by', 'Composer(s)')
    change_column_name('Produced by', 'Producer(s)')
    change_column_name('Producer', 'Producer(s)')
    change_column_name('Productioncompanies ', 'Production company(s)')
    change_column_name('Productioncompany ', 'Production company(s)')
    change_column_name('Released', 'Release Date')
    change_column_name('Release Date', 'Release date')
    change_column_name('Screen story by', 'Writer(s)')
    change_column_name('Screenplay by', 'Writer(s)')
    change_column_name('Story by', 'Writer(s)')
    change_column_name('Theme music composer', 'Composer(s)')
    change_column_name('Written by', 'Writer(s)')
    
    return movie

In [4]:
# 10. Create the path to your file directory and variables for the three files.
file_dir = '../Movies-ETL'
# The Wikipedia data
wiki_file = f'{file_dir}/wikipedia.movies.json'
# The Kaggle metadata
kaggle_file = f'{file_dir}/movies_metadata.csv'
# The MovieLens rating data.
ratings_file = f'{file_dir}/ratings.csv'

In [5]:
# 1 Add the function that takes in three arguments;
# Wikipedia data, Kaggle metadata, and MovieLens rating data (from Kaggle)

# Read in the kaggle metadata and MovieLens ratings CSV files as Pandas DataFrames.
kaggle_metadata = pd.read_csv(f'{file_dir}/movies_metadata.csv', low_memory=False)
ratings = pd.read_csv(f'{file_dir}/ratings.csv')
   
# Open then read the Wikipedia data JSON file.
with open(f'{file_dir}/wikipedia-movies.json', mode='r') as file:
    wiki_movies_raw = json.load(file)
    
# Read in the raw wiki movie data as a Pandas DataFrame.
wiki_movies_df = pd.DataFrame(wiki_movies_raw)
    


## Confirming Tables Work

In [6]:
wiki_movies_df.head()

Unnamed: 0,url,year,imdb_link,title,Directed by,Produced by,Screenplay by,Story by,Based on,Starring,...,Predecessor,Founders,Area served,Products,Services,Russian,Hebrew,Revenue,Operating income,Polish
0,https://en.wikipedia.org/wiki/The_Adventures_o...,1990.0,https://www.imdb.com/title/tt0098987/,The Adventures of Ford Fairlane,Renny Harlin,"[Steve Perry, Joel Silver]","[David Arnott, James Cappe, Daniel Waters]","[David Arnott, James Cappe]","[Characters, by Rex Weiner]","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",...,,,,,,,,,,
1,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990.0,https://www.imdb.com/title/tt0098994/,"After Dark, My Sweet",James Foley,"[Ric Kidney, Robert Redlin]","[James Foley, Robert Redlin]",,"[the novel, After Dark, My Sweet, by, Jim Thom...","[Jason Patric, Rachel Ward, Bruce Dern, George...",...,,,,,,,,,,
2,https://en.wikipedia.org/wiki/Air_America_(film),1990.0,https://www.imdb.com/title/tt0099005/,Air America,Roger Spottiswoode,Daniel Melnick,"[John Eskow, Richard Rush]",,"[Air America, by, Christopher Robbins]","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",...,,,,,,,,,,
3,https://en.wikipedia.org/wiki/Alice_(1990_film),1990.0,https://www.imdb.com/title/tt0099012/,Alice,Woody Allen,Robert Greenhut,,,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",...,,,,,,,,,,
4,https://en.wikipedia.org/wiki/Almost_an_Angel,1990.0,https://www.imdb.com/title/tt0099018/,Almost an Angel,John Cornell,John Cornell,,,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",...,,,,,,,,,,


In [7]:
kaggle_metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [8]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [9]:
#KEY CODE!
def extract_transform_load(testing):
    # Read in the kaggle metadata and MovieLens ratings CSV files as Pandas DataFrames.
    kaggle_metadata = pd.read_csv(f'{file_dir}/movies_metadata.csv', low_memory=False)
    ratings = pd.read_csv(f'{file_dir}/ratings.csv')

    # Open and read the Wikipedia data JSON file.
    with open(f'{file_dir}/wikipedia-movies.json', mode='r') as file:
        wiki_movies_raw = json.load(file)
    
    # Write a list comprehension to filter out TV shows.
    wiki_movies = [movie for movie in wiki_movies_raw
                   if ('Director' in movie or 'Directed by' in movie)
                       and 'imdb_link' in movie
                       and 'No. of episodes' not in movie]

    # Write a list comprehension to iterate through the cleaned wiki movies list
    # and call the clean_movie function on each movie.
    clean_movies = [clean_movie(movie) for movie in wiki_movies_raw]

    # Read in the cleaned movies list from Step 4 as a DataFrame.
    wiki_movies_df = pd.DataFrame(clean_movies)

    # Write a try-except block to catch errors while extracting the IMDb ID using a regular expression string and
    #  dropping any imdb_id duplicates. If there is an error, capture and print the exception.
    try: 
        #Extracting IMDB ID
        wiki_movies_df['imdb_id'] = wiki_movies_df['imdb_link'].str.extract(r'(tt\d{7})')
        #Drop duplicates
        wiki_movies_df.drop_duplicates(subset='imdb_id', inplace=True)
        
    except:
        print("An exception occurred")

    #  Write a list comprehension to keep the columns that don't have null values from the wiki_movies_df DataFrame.
    wiki_columns_to_keep = [column for column in wiki_movies_df.columns if wiki_movies_df[column].isnull().sum() < len(wiki_movies_df) * 0.9]
    wiki_movies_df = wiki_movies_df[wiki_columns_to_keep]

    # Create a variable that will hold the non-null values from the “Box office” column.
    box_office = wiki_movies_df['Box office'].dropna()
    
    # Convert the box office data created in Step 8 to string values using the lambda and join functions.
    box_office = box_office.apply(lambda x: ' '.join(x) if type(x) == list else x)

    # Write a regular expression to match the six elements of "form_one" of the box office data.
    form_one = r'\$\d+\.?\d*\s*[mb]illion'

    # Write a regular expression to match the three elements of "form_two" of the box office data.
    form_two = r'\$\d{1,3}(?:,\d{3})+'
    
    # Add the parse_dollars function.
    def parse_dollars(s):
        # if s is not a string, return NaN
        if type(s) != str:
            return np.nan

        # if input is of the form $###.# million
        if re.match(r'\$\s*\d+\.?\d*\s*milli?on', s, flags=re.IGNORECASE):

            # remove dollar sign and " million"
            s = re.sub('\$|\s|[a-zA-Z]','', s)

            # convert to float and multiply by a million
            value = float(s) * 10**6

            # return value
            return value

        # if input is of the form $###.# billion
        elif re.match(r'\$\s*\d+\.?\d*\s*billi?on', s, flags=re.IGNORECASE):

            # remove dollar sign and " billion"
            s = re.sub('\$|\s|[a-zA-Z]','', s)

            # convert to float and multiply by a billion
            value = float(s) * 10**9

            # return value
            return value

        # if input is of the form $###,###,###
        elif re.match(r'\$\s*\d{1,3}(?:[,\.]\d{3})+(?!\s[mb]illion)', s, flags=re.IGNORECASE):

            # remove dollar sign and commas
            s = re.sub('\$|,','', s)

            # convert to float
            value = float(s)

            # return value
            return value

        # otherwise, return NaN
        else:
            return np.nan
    
        
    # Clean the box office column in the wiki_movies_df DataFrame.
    box_office.str.contains(form_one, flags=re.IGNORECASE).sum()   
    box_office.str.contains(form_two, flags=re.IGNORECASE).sum()
    
    # Clean the budget column in the wiki_movies_df DataFrame.
    wiki_movies_df['box_office'] = box_office.str.extract(f'({form_one}|{form_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    wiki_movies_df.drop('Box office', axis=1, inplace=True)

    # Clean the release date column in the wiki_movies_df DataFrame.
    release_date = wiki_movies_df['Release date'].dropna().apply(lambda x: ' '.join(x) if type(x) == list else x)
    date_form_one = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s[123]\d,\s\d{4}'
    date_form_two = r'\d{4}.[01]\d.[123]\d'
    date_form_three = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4}'
    date_form_four = r'\d{4}'
    release_date.str.extract(f'({date_form_one}|{date_form_two}|{date_form_three}|{date_form_four})', flags=re.IGNORECASE)
    wiki_movies_df['release_date'] = pd.to_datetime(release_date.str.extract(f'({date_form_one}|{date_form_two}|{date_form_three}|{date_form_four})')[0], infer_datetime_format=True)
    
    # Clean the running time column in the wiki_movies_df DataFrame.
    running_time = wiki_movies_df['Running time'].dropna().apply(lambda x: ' '.join(x) if type(x) == list else x)
    running_time.str.contains(r'^\d*\s*minutes$', flags=re.IGNORECASE).sum()
    running_time[running_time.str.contains(r'^\d*\s*minutes$', flags=re.IGNORECASE) != True]
    running_time_extract = running_time.str.extract(r'(\d+)\s*ho?u?r?s?\s*(\d*)|(\d+)\s*m')
    running_time_extract = running_time_extract.apply(lambda col: pd.to_numeric(col, errors='coerce')).fillna(0)
    wiki_movies_df['running_time'] = running_time_extract.apply(lambda row: row[0]*60 + row[1] if row[2] == 0 else row[2], axis=1)
    wiki_movies_df.drop('Running time', axis=1, inplace=True) 
     
    return testing

In [10]:
# 2. Clean the Kaggle metadata.
kaggle_metadata = kaggle_metadata[kaggle_metadata['adult'] == 'False'].drop('adult',axis='columns')
kaggle_metadata['video'] = kaggle_metadata['video'] == 'True'
kaggle_metadata['budget'] = kaggle_metadata['budget'].astype(int)
kaggle_metadata['id'] = pd.to_numeric(kaggle_metadata['id'], errors='raise')
kaggle_metadata['popularity'] = pd.to_numeric(kaggle_metadata['popularity'], errors='raise')
kaggle_metadata['release_date'] = pd.to_datetime(kaggle_metadata['release_date'])

In [11]:
# 3. Merged the two DataFrames into the movies DataFrame.

#Part A - Obtain the imdb_id from wiki_movies_df via the imbd_link. Create the column as well.
wiki_movies_df['imdb_id'] = wiki_movies_df['imdb_link'].str.extract(r'(tt\d{7})')

#Part B - Merge the tables based on the imdb_id
movies_df = pd.merge(wiki_movies_df, kaggle_metadata, on='imdb_id', suffixes=['_wiki','_kaggle'])

In [12]:
# 4. Drop unnecessary columns from the merged DataFrame.
#Drop title_wiki, release_date_wiki, language, and production compan
movies_df.drop(columns=['title_wiki','Language','Production company(s)'], inplace=True) 

# Decision Point - Original list of columns Dropped
# 'title_wiki',
# 'release_date_wiki' - Removed from code because it was not in the code after reformatting of cells
# 'Language',
# 'Production company(s)'

In [13]:
# 5. Add in the function to fill in the missing Kaggle data.
def fill_missing_kaggle_data(df, kaggle_column, wiki_column):
    df[kaggle_column] = df.apply(
        lambda row: row[wiki_column] if row[kaggle_column] == 0 else row[kaggle_column]
        , axis=1)
    df.drop(columns=wiki_column, inplace=True)

In [14]:
# 6. Call the function in Step 5 with the DataFrame and columns as the arguments.
# fill_missing_kaggle_data(movies_df, 'runtime', 'running_time')
fill_missing_kaggle_data(movies_df, 'Budget', 'Budget')
#fill_missing_kaggle_data(movies_df, 'revenue', 'box_office')

In [15]:
    # 7. Filter the movies DataFrame for specific columns.
    movies_df[['id',
               'title_kaggle',
               'url',
               'release_date',
               'Country',
               'Distributor',
               'Producer(s)', 
               'Director',
               'Starring',
               'Cinematography',
               'Editor(s)',
               'Writer(s)',
               'Composer(s)',
               'Based on']]

Unnamed: 0,id,title_kaggle,url,release_date,Country,Distributor,Producer(s),Director,Starring,Cinematography,Editor(s),Writer(s),Composer(s),Based on
0,9548,The Adventures of Ford Fairlane,https://en.wikipedia.org/wiki/The_Adventures_o...,1990-07-11,United States,,,,"[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",Oliver Wood,,,,"[Characters, by Rex Weiner]"
1,25501,"After Dark, My Sweet","https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990-08-24,United States,,,,"[Jason Patric, Rachel Ward, Bruce Dern, George...",Mark Plummer,,,,"[the novel, After Dark, My Sweet, by, Jim Thom..."
2,11856,Air America,https://en.wikipedia.org/wiki/Air_America_(film),1990-08-10,United States,,,,"[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",Roger Deakins,,,,"[Air America, by, Christopher Robbins]"
3,8217,Alice,https://en.wikipedia.org/wiki/Alice_(1990_film),1990-12-25,United States,,,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",Carlo Di Palma,,,,
4,25943,Almost an Angel,https://en.wikipedia.org/wiki/Almost_an_Angel,1990-12-21,US,,,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",Russell Boyd,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9683,429191,A Fantastic Woman,https://en.wikipedia.org/wiki/A_Fantastic_Woman,2017-04-06,"[Chile, Germany, Spain, United States, [2]]",,,,"[Daniela Vega, Francisco Reyes]",Benjamín Echazarreta,,,,
9684,390059,Permission,https://en.wikipedia.org/wiki/Permission_(film),2017-04-22,United States,,,,"[Rebecca Hall, Dan Stevens, Morgan Spector, Fr...",Adam Bricker,,,,
9685,429174,Loveless,https://en.wikipedia.org/wiki/Loveless_(film),2017-06-01,"[Russia, France, Belgium, Germany, [3]]",,,,"[Maryana Spivak, Aleksey Rozin, Matvey Novikov...",Mikhail Krichman,,,,
9686,412302,Gemini,https://en.wikipedia.org/wiki/Gemini_(2017_film),2017-03-12,United States,,,,"[Lola Kirke, Zoë Kravitz, Greta Lee, Michelle ...",Andrew Reed,,,,


In [16]:
    # 8. Rename the columns in the movies DataFrame.
    movies_df.rename({#'id':'kaggle_id',
                  'title_kaggle':'title',
                  'url':'wikipedia_url',
                  'release_date_kaggle':'release_date',
                  'Country':'country',
                  'Distributor':'distributor',
                  'Producer(s)':'producers',
                  'Director':'director',
                  'Starring':'starring',
                  'Cinematography':'cinematography',
                  'Editor(s)':'editors',
                  'Writer(s)':'writers',
                  'Composer(s)':'composers',
                  'Based on':'based_on'
                 }, axis='columns', inplace=True)

## Preparation for Merging
Which column can I join on?

In [17]:
#ratings

In [18]:
# my_list = df.columns.values.tolist()
# learned here: https://datatofish.com/list-column-names-pandas-dataframe/
# Method used: https://www.google.com/search?client=safari&rls=en&sxsrf=ALeKk00iA_2mw4JgVB8BQaGQ7LfxePRMBw%3A1609643145533&ei=iTTxX8OJIJHQtAWh5p2IBw&q=obtain+all+column+names+python+in+alphabetical+order&oq=obtain+all+column+names+python+in+al&gs_lcp=CgZwc3ktYWIQAxgCMgUIIRCgATIFCCEQoAEyBQghEKABMgUIIRCgAToECAAQRzoICCEQFhAdEB46CQgAEMkDEBYQHlDdHFiCJmDENmgAcAF4AIABggGIAecEkgEDNS4xmAEAoAEBqgEHZ3dzLXdpesgBCMABAQ&sclient=psy-ab#kpvalbx=_pTXxX4u-IouUsgX8u5m4CA15

#sorted(movies_df)

In [19]:
# 9. Transform and merge the ratings DataFrame.
# Extra detail: transform and merge the ratings DataFrame with the movies_df DataFrame, 
# name the new DataFrame movies_with_ratings_df, then clean the movies_with_ratings_df DataFrame.

movies_with_ratings_df = ratings.merge(movies_df, left_on='movieId', right_on='id', how='right')
#movies_with_ratings_df[rating.columns] = movies_with_ratings_df[rating.columns].fillna(0)
movies_with_ratings_df
 

Unnamed: 0,userId,movieId,rating,timestamp,wikipedia_url,year,imdb_link,Directed by,Produced by,Screenplay by,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,1.0,858.0,5.0,1.425942e+09,https://en.wikipedia.org/wiki/Sleepless_in_Sea...,1993.0,https://www.imdb.com/title/tt0108160/,Nora Ephron,Gary Foster,"[Jeff Arch, Nora Ephron, David S. Ward]",...,1993-06-24,227799884.0,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"What if someone you never met, someone you nev...",Sleepless in Seattle,False,6.5,630.0
1,3.0,858.0,4.0,1.048077e+09,https://en.wikipedia.org/wiki/Sleepless_in_Sea...,1993.0,https://www.imdb.com/title/tt0108160/,Nora Ephron,Gary Foster,"[Jeff Arch, Nora Ephron, David S. Ward]",...,1993-06-24,227799884.0,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"What if someone you never met, someone you nev...",Sleepless in Seattle,False,6.5,630.0
2,5.0,858.0,5.0,9.494242e+08,https://en.wikipedia.org/wiki/Sleepless_in_Sea...,1993.0,https://www.imdb.com/title/tt0108160/,Nora Ephron,Gary Foster,"[Jeff Arch, Nora Ephron, David S. Ward]",...,1993-06-24,227799884.0,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"What if someone you never met, someone you nev...",Sleepless in Seattle,False,6.5,630.0
3,12.0,858.0,4.0,9.439118e+08,https://en.wikipedia.org/wiki/Sleepless_in_Sea...,1993.0,https://www.imdb.com/title/tt0108160/,Nora Ephron,Gary Foster,"[Jeff Arch, Nora Ephron, David S. Ward]",...,1993-06-24,227799884.0,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"What if someone you never met, someone you nev...",Sleepless in Seattle,False,6.5,630.0
4,20.0,858.0,4.5,1.428912e+09,https://en.wikipedia.org/wiki/Sleepless_in_Sea...,1993.0,https://www.imdb.com/title/tt0108160/,Nora Ephron,Gary Foster,"[Jeff Arch, Nora Ephron, David S. Ward]",...,1993-06-24,227799884.0,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"What if someone you never met, someone you nev...",Sleepless in Seattle,False,6.5,630.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4356560,,,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,2018.0,https://www.imdb.com/title/tt5639354/,Sebastián Lelio,"[Juan de Dios Larraín, Pablo Larraín]",,...,2017-04-06,0.0,104.0,"[{'iso_639_1': 'es', 'name': 'Español'}]",Released,,A Fantastic Woman,False,7.2,13.0
4356561,,,,,https://en.wikipedia.org/wiki/Permission_(film),2018.0,https://www.imdb.com/title/tt5390066/,Brian Crano,"[Brian Crano, Rebecca Hall]",,...,2017-04-22,0.0,96.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Permission,False,0.0,1.0
4356562,,,,,https://en.wikipedia.org/wiki/Loveless_(film),2018.0,https://www.imdb.com/title/tt6304162/,Andrey Zvyagintsev,"[Alexander Rodnyansky, Sergey Melkumov, Gleb F...","[Oleg Negin, Andrey Zvyagintsev]",...,2017-06-01,0.0,128.0,"[{'iso_639_1': 'ru', 'name': 'Pусский'}]",Released,,Loveless,False,7.8,26.0
4356563,,,,,https://en.wikipedia.org/wiki/Gemini_(2017_film),2018.0,https://www.imdb.com/title/tt5795086/,Aaron Katz,"[Mynette Louie, Sara Murphy, Adele Romanski]",,...,2017-03-12,0.0,92.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Post Production,,Gemini,False,0.0,0.0


In [20]:
# 11. Set the three variables equal to the function created in D1.
wiki_file, kaggle_file, ratings_file = extract_transform_load()

TypeError: extract_transform_load() missing 1 required positional argument: 'testing'

In [None]:
# 12. Set the DataFrames from the return statement equal to the file names in Step 11. 
wiki_movies_df = wiki_file
movies_with_ratings_df = kaggle_file
movies_df = ratings_file

In [None]:
# 13. Check the wiki_movies_df DataFrame. 
wiki_movies_df.head()

In [None]:
# 14. Check the movies_with_ratings_df DataFrame.
movies_with_ratings_df.head()

In [None]:
# 15. Check the movies_df DataFrame. 
movies_df.head()