# Setting Up The SQLite Server
Below is the code to import the sqlite3 library that is included with the Python standard library package.

In [1]:
import sqlite3

Next is the code to create/connect with the database

In [2]:
connection = sqlite3.connect('movies.db')

Now the next code will create a cursor object so we can execute SQL statements

In [3]:
cursor = connection.cursor()

## Drop Tables
Drop all the tables from the database to start fresh.

In [4]:
cursor.execute('DROP TABLE IF EXISTS movie_stars')
cursor.execute('DROP TABLE IF EXISTS stars')
cursor.execute('DROP TABLE IF EXISTS movie_genres')
cursor.execute('DROP TABLE IF EXISTS genres')
cursor.execute('DROP TABLE IF EXISTS movie_directors')
cursor.execute('DROP TABLE IF EXISTS directors')
cursor.execute('DROP TABLE IF EXISTS imdb_movies')
cursor.execute('DROP TABLE IF EXISTS certifications')

<sqlite3.Cursor at 0x2837fdf1140>

## Create Tables 

### Main Table From imdb dataset (imdb_movies)

In [5]:
cursor.execute('''CREATE TABLE IF NOT EXISTS imdb_movies
               (
                    imdb_movies_id INTEGER PRIMARY KEY NOT NULL, 
                    movie_name VARCHAR(100) NOT NULL, 
                    year_released TEXT NOT NULL,
                    run_time INTEGER NOT NULL,
                    meta_score INTEGER,
                    description TEXT NOT NULL,
                    certification VARCHAR(25)
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Create table for the stars

In [6]:
cursor.execute('''CREATE TABLE IF NOT EXISTS stars
               (
                    star_id INTEGER PRIMARY KEY NOT NULL, 
                    star_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Make table that connects movies and stars
We are connecting movies and stars in a separate table because there can be multiple stars per movie and each star can be in multiple movies. Since there is a many-to-many relationship between the movies and stars tables, we need to make a new table that contains the id from the movies table and the id from the stars table. This will also be the same with movie directors and genres.

In [7]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_stars
               (
                    movie_star_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    star_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (star_id) REFERENCES stars(star_id)
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Make table for Directors

In [8]:
cursor.execute('''CREATE TABLE IF NOT EXISTS directors
               (
                    director_id INTEGER PRIMARY KEY NOT NULL, 
                    director_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Make a table that connects Movies and Directors
We need to create a table that connects movies and directors like we did with movies and stars

In [9]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_directors
               (
                    movie_director_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    director_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (director_id) REFERENCES directors(director_id)
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Create table containing all the different genres

In [10]:
cursor.execute('''CREATE TABLE IF NOT EXISTS genres
               (
                    genre_id INTEGER PRIMARY KEY NOT NULL, 
                    genre_name VARCHAR(20) 
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

### Make a table connecting movies and genres
Like we did with movies and stars, and movies and directors, we will make a table connecting the movie values with the genre values.

In [11]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_genres
               (
                    movie_genre_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    genre_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (genre_id) REFERENCES genres(genre_id)
                )''')

<sqlite3.Cursor at 0x2837fdf1140>

## Use pandas to clean up our data
Using pandas we will clean up our data to only have movies released from 2018 to 2022

In [12]:
# import pandas package
import pandas as pd

# path to our movie data file
path_to_data = './datasets/imdb/imdb-data.csv'

# open the csv file
imdb_df = pd.read_csv(path_to_data, index_col=0)

movies_afer_2018 = imdb_df["Year of Release"] >= 2018
movies_before_2023 = imdb_df["Year of Release"] <= 2022

movies_in_range_df = imdb_df[movies_afer_2018 & movies_before_2023]
movies_in_range_df = movies_in_range_df.sort_values(by="Year of Release")

movies_in_range_df

Unnamed: 0,Movie Name,Year of Release,Run Time in minutes,Movie Rating,Votes,MetaScore,Gross,Genre,Certification,Director,Stars,Description
8947,The Package,2018,94,5.6,20342,,,"['Comedy', ' Thriller']",TV-MA,['Jake Szymanski'],"['Daniel Doheny', 'Geraldine Viswanathan', 'Sa...","['When', 'a', 'group', 'of', 'teenage', 'frien..."
5382,Sarkar,2018,163,6.7,21141,,,"['Action', ' Drama']",Not Rated,['A.R. Murugadoss'],"['Joseph Vijay', 'Keerthy Suresh', 'Yogi Babu'...","['An', 'NRI', 'businessman', 'learns', 'his', ..."
9263,The Little Stranger,2018,111,5.5,10423,67.0,710000.0,"['Drama', ' Horror', ' Mystery']",R,['Lenny Abrahamson'],"['Domhnall Gleeson', 'Will Poulter', 'Ruth Wil...","['After', 'a', 'doctor', 'is', 'called', 'to',..."
5401,Red Sparrow,2018,140,6.6,196091,53.0,46870000.0,"['Action', ' Drama', ' Thriller']",R,['Francis Lawrence'],"['Jennifer Lawrence', 'Joel Edgerton', 'Matthi...","['Ballerina', 'Dominika', 'Egorova', 'is', 're..."
2861,Gold,2018,151,7.3,14531,,,"['Drama', ' History', ' Sport']",Not Rated,['Reema Kagti'],"['Krishan Tandon', 'Hubertus Geller', 'Siddhar...","['The', 'journey', 'of', 'a', 'man', 'who', 'w..."
...,...,...,...,...,...,...,...,...,...,...,...,...
5924,Dog,2022,101,6.5,42798,61.0,,"['Comedy', ' Drama']",PG-13,"['Reid Carolin', 'Channing Tatum']","['Channing Tatum', 'Ryder McLaughlin', 'Aavi H...","['Two', 'former', 'Army', 'Rangers', 'are', 'p..."
5922,Minions: The Rise of Gru,2022,87,6.5,78997,56.0,369700000.0,"['Animation', ' Adventure', ' Comedy']",PG,"['Kyle Balda', 'Brad Ableson', 'Jonathan del V...","['Steve Carell', 'Pierre Coffin', 'Alan Arkin'...","['The', 'untold', 'story', 'of', 'one', ""twelv..."
5910,"Confess, Fletch",2022,98,6.5,16520,64.0,,"['Comedy', ' Crime', ' Mystery']",R,['Greg Mottola'],"['Jon Hamm', 'Caitlin Zerra Rose', 'Roy Wood J...","['After', 'becoming', 'the', 'prime', 'suspect..."
5849,Sonic the Hedgehog 2,2022,122,6.5,80046,47.0,190870000.0,"['Action', ' Adventure', ' Comedy']",PG,['Jeff Fowler'],"['James Marsden', 'Jim Carrey', 'Ben Schwartz'...","['When', 'the', 'manic', 'Dr.', 'Robotnik', 'r..."


In [13]:
import ast

genre_s = movies_in_range_df["Genre"]

genre_list = []

for genres in genre_s:
    lit_genres = ast.literal_eval(genres)
    for genre in lit_genres:
        genre_list.append(genre.strip())

genre_set = set(genre_list)

for genre in genre_set:
    print(genre)

History
Romance
Fantasy
Animation
Crime
Comedy
Horror
Mystery
War
Drama
Adventure
Musical
Biography
Family
Music
Sport
Action
Thriller
Western
Sci-Fi


#### Adding Genres to the genres table

In [14]:
id = 1

for genre in genre_set:
    cursor.execute('INSERT INTO genres (genre_id, genre_name) VALUES (?, ?)', (id, genre))
    connection.commit()
    id += 1

In [15]:
cursor.execute('SELECT * FROM genres')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'History')
(2, 'Romance')
(3, 'Fantasy')
(4, 'Animation')
(5, 'Crime')
(6, 'Comedy')
(7, 'Horror')
(8, 'Mystery')
(9, 'War')
(10, 'Drama')
(11, 'Adventure')
(12, 'Musical')
(13, 'Biography')
(14, 'Family')
(15, 'Music')
(16, 'Sport')
(17, 'Action')
(18, 'Thriller')
(19, 'Western')
(20, 'Sci-Fi')


## Get All the Directors
Get all the directors from the DataFrame like was done with the genres and insert them into the directors table.

In [16]:
director_s = movies_in_range_df["Director"]

director_list = []

for director in director_s:
    lit_director = ast.literal_eval(director)
    for each in lit_director:
        director_list.append(each.strip())


director_set = set(director_list)

print(director_set)

{'Chloé Zhao', 'Janicza Bravo', 'Hideaki Anno', 'Sujeeth', 'Jon S. Baird', 'James Kent', 'Philip Barantini', 'Tom Gormican', 'Matteo Garrone', "Alma Har'el", 'S.K. Dale', 'Kelly Reichardt', 'Gareth Evans', 'Nick Powell', 'Dominique Rocher', 'Vikas Bahl', 'Glen Keane', 'Kookie Gulati', 'Josh Safdie', 'Anurag Kashyap', 'John Hyams', 'Catherine Hardwicke', 'Jacob Chase', 'Marius Vaysberg', 'Kevin Lewis', 'David F. Sandberg', 'Ariel Schulman', 'Edgar Wright', 'Darren Lynn Bousman', 'Sam Levinson', 'Wanuri Kahiu', 'Luc Besson', 'Derek Tsang', 'Anvita Dutt', 'Parker Finn', 'Octavio E. Rodriguez', 'Drew Goddard', 'Akan Satayev', 'Joe Cornish', 'Jerry', 'David Michôd', 'Vinil Mathew', 'Patty Jenkins', 'Jason Reitman', 'Barry Jenkins', 'Ernie Barbarash', 'Yilmaz Erdogan', 'Michael Grandage', 'Adesh Prasad', 'Zu Quirke', 'Paul Thomas Anderson', 'Rob Cohen', 'Wayne Roberts', 'Lulu Wang', 'Chiwetel Ejiofor', 'Edward Berger', 'Nitesh Tiwari', 'Guy Nattiv', 'Josh Gordon', 'Dexter Fletcher', 'Jaume B

### Adding Directors to the Directors Table

In [17]:
id = 1

for director in director_set:
    cursor.execute('INSERT INTO directors (director_id, director_name) VALUES (?, ?)', (id, director))
    connection.commit()
    id += 1

In [18]:
cursor.execute('SELECT * FROM directors')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Chloé Zhao')
(2, 'Janicza Bravo')
(3, 'Hideaki Anno')
(4, 'Sujeeth')
(5, 'Jon S. Baird')
(6, 'James Kent')
(7, 'Philip Barantini')
(8, 'Tom Gormican')
(9, 'Matteo Garrone')
(10, "Alma Har'el")
(11, 'S.K. Dale')
(12, 'Kelly Reichardt')
(13, 'Gareth Evans')
(14, 'Nick Powell')
(15, 'Dominique Rocher')
(16, 'Vikas Bahl')
(17, 'Glen Keane')
(18, 'Kookie Gulati')
(19, 'Josh Safdie')
(20, 'Anurag Kashyap')
(21, 'John Hyams')
(22, 'Catherine Hardwicke')
(23, 'Jacob Chase')
(24, 'Marius Vaysberg')
(25, 'Kevin Lewis')
(26, 'David F. Sandberg')
(27, 'Ariel Schulman')
(28, 'Edgar Wright')
(29, 'Darren Lynn Bousman')
(30, 'Sam Levinson')
(31, 'Wanuri Kahiu')
(32, 'Luc Besson')
(33, 'Derek Tsang')
(34, 'Anvita Dutt')
(35, 'Parker Finn')
(36, 'Octavio E. Rodriguez')
(37, 'Drew Goddard')
(38, 'Akan Satayev')
(39, 'Joe Cornish')
(40, 'Jerry')
(41, 'David Michôd')
(42, 'Vinil Mathew')
(43, 'Patty Jenkins')
(44, 'Jason Reitman')
(45, 'Barry Jenkins')
(46, 'Ernie Barbarash')
(47, 'Yilmaz Erdogan')
(

## Get all the Stars
Get all the stars from the DataFrame to insert into the stars table

In [19]:
star_s = movies_in_range_df["Stars"]

stars_list = []

for star in star_s:
    lit_star = ast.literal_eval(star)
    for each in lit_star:
        stars_list.append(each.strip())


stars_set = set(stars_list)

print(stars_set)

{'Alicia von Rittberg', 'Jonah Hauer-King', 'Sam Worthington', 'Luke Evans', 'Leo Long', 'Adarsh Gourav', 'Raj B. Shetty', 'Luke Bracey', 'Ian McKellen', 'Stormee Kipp', 'Garance Marillier', 'Naya Rivera', 'Mia Quiney', 'Bob Balaban', 'Aditya Roy Kapoor', 'Akshaye Khanna', 'Zahra Anderson', 'Karam Taher', 'Aisling Franciosi', 'Bowen Yang', 'Aayush Sharma', 'Chiara Aurelia', 'Jeff Bridges', 'Welker White', 'Himesh Patel', 'Jack Quaid', 'Levi Eisenblätter', 'Lara Dutta', 'Elizabeth Lail', 'Brendan Sexton III', 'Kaley Cuoco', 'Samuel Leakey', 'Kelly Asbury', 'Michael Kenneth Williams', 'Elsie Fisher', 'Priscilla Quintana', 'Dylan Sprouse', 'Tom Sturridge', 'Emmanuelle Chriqui', 'Toby Jones', 'Alexander Ludwig', "Brian O'Halloran", 'Mohanlal', 'Amrita Singh', 'John Higgins', 'Eva Melander', 'Glen Keane', 'Indira Varma', 'Linda May', 'Billy Burke', 'AnnaSophia Robb', 'Vanessa Kirby', 'Kuwaarjeet Chopraa', 'Kim Tae-ri', 'Harriet Dyer', 'Cuba Gooding Jr.', "Y'lan Noel", 'Sophia Di Martino', '

#### Adding stars to the stars table

In [20]:
id = 1

for star in stars_set:
    cursor.execute('INSERT INTO stars (star_id, star_name) VALUES (?, ?)', (id, star))
    connection.commit()
    id += 1

In [21]:
cursor.execute('SELECT * FROM stars')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Alicia von Rittberg')
(2, 'Jonah Hauer-King')
(3, 'Sam Worthington')
(4, 'Luke Evans')
(5, 'Leo Long')
(6, 'Adarsh Gourav')
(7, 'Raj B. Shetty')
(8, 'Luke Bracey')
(9, 'Ian McKellen')
(10, 'Stormee Kipp')
(11, 'Garance Marillier')
(12, 'Naya Rivera')
(13, 'Mia Quiney')
(14, 'Bob Balaban')
(15, 'Aditya Roy Kapoor')
(16, 'Akshaye Khanna')
(17, 'Zahra Anderson')
(18, 'Karam Taher')
(19, 'Aisling Franciosi')
(20, 'Bowen Yang')
(21, 'Aayush Sharma')
(22, 'Chiara Aurelia')
(23, 'Jeff Bridges')
(24, 'Welker White')
(25, 'Himesh Patel')
(26, 'Jack Quaid')
(27, 'Levi Eisenblätter')
(28, 'Lara Dutta')
(29, 'Elizabeth Lail')
(30, 'Brendan Sexton III')
(31, 'Kaley Cuoco')
(32, 'Samuel Leakey')
(33, 'Kelly Asbury')
(34, 'Michael Kenneth Williams')
(35, 'Elsie Fisher')
(36, 'Priscilla Quintana')
(37, 'Dylan Sprouse')
(38, 'Tom Sturridge')
(39, 'Emmanuelle Chriqui')
(40, 'Toby Jones')
(41, 'Alexander Ludwig')
(42, "Brian O'Halloran")
(43, 'Mohanlal')
(44, 'Amrita Singh')
(45, 'John Higgins')
(46

### Add the Movies to the imdb_movies Table
Now we will add all the Movies and the proper data to the imdb_movies table

In [22]:
import re

for index, row in movies_in_range_df.iterrows():
    # Declare a variable for each field and assign it to value from DF
    movie_id = index
    movie_name = re.sub(r'[^\w\s]', '',row["Movie Name"]).upper()
    year_released = row["Year of Release"]
    run_time = row["Run Time in minutes"]
    meta_score = row["MetaScore"]
    description = " ".join(ast.literal_eval(row["Description"]))
    certification = row["Certification"]
    print(f"{movie_id}, {movie_name}, {year_released}, {run_time}, {meta_score}, {description}, {certification}")
    cursor.execute('''INSERT INTO imdb_movies (imdb_movies_id, movie_name, year_released, run_time, meta_score, description, certification) 
                      VALUES (?, ?, ?, ?, ?, ?, ?)''', (movie_id, movie_name, year_released, run_time, meta_score, description, certification))
    connection.commit()

    # Make movie names with all caps and no special characters to match with other datasets
    # Use INSERT statement to add values to the database

8947, THE PACKAGE, 2018, 94, nan, When a group of teenage friends go on a spring break camping trip, an unfortunate accident sets off a race against time to save their friend's most prized possession., TV-MA
5382, SARKAR, 2018, 163, nan, An NRI businessman learns his vote has been cast by someone else and decides to investigate the matter, eventually finding himself pitted against two corrupt politicians., Not Rated
9263, THE LITTLE STRANGER, 2018, 111, 67.0, After a doctor is called to visit a crumbling manor, strange things begin to occur., R
5401, RED SPARROW, 2018, 140, 53.0, Ballerina Dominika Egorova is recruited to 'Sparrow School,' a Russian intelligence service where she is forced to use her body as a weapon. Her first mission, targeting a C.I.A. agent, threatens to unravel the security of both nations., R
2861, GOLD, 2018, 151, nan, The journey of a man who was instrumental in making India win its first Olympic gold medal as a free nation., Not Rated
2860, MULK, 2018, 140, na

In [23]:
cursor.execute('SELECT * FROM imdb_movies')
rows = cursor.fetchall()
for row in rows:
    print(row)

(10, 'DAMAN', '2022', 121, None, "The film is set in 2015. Sid, is a young doctor who completed his MBBS has been posted to the cut-off area of Malkangiri district Odisha having 151 villages & infamous for Naxals dominance & no basic facilities. It's a real life tale of perseverance & dedication.", None)
(12, 'DEMON SLAYER KIMETSU NO YAIBA  TSUZUMI MANSION ARC', '2021', 87, None, 'Tanjiro ventures to the south-southeast where he encounters a cowardly young man named Zenitsu Agatsuma. He is a fellow survivor from Final Selection and his sparrow asks Tanjiro to help keep him in line.', None)
(22, 'JAI BHIM', '2021', 164, None, 'When a tribal man is arrested for a case of alleged theft, his wife turns to a human-rights lawyer to help bring justice.', 'TV-MA')
(24, '777 CHARLIE', '2022', 164, None, 'Dharma is stuck in a rut with his negative and lonely lifestyle and spends each day in the comfort of his loneliness. A pup named Charlie enters his life and gives him a new perspective towards

## Get IDs from each movie and Directors to insert into movie_directors table
Loop through the pandas dataframe holding all of the data, get the ID of the movie and the ID of the director and insert into the movie_directors. Will have to loop through the directors since it is a list inside of the DataFrame and individually get the id of each from the directors table. 

In [24]:
movie_director_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    directors = ast.literal_eval(row["Director"])
    for director in directors:
        cursor.execute("SELECT director_id FROM directors WHERE director_name = ?", (director.strip(),))
        dir_ids = cursor.fetchall()
        for dir_id in dir_ids:
            cursor.execute("INSERT INTO movie_directors VALUES (?,?,?)", (movie_director_index, movie_id, dir_id[0]))
            connection.commit()
            movie_director_index += 1
        

In [25]:
cursor.execute("SELECT * FROM movie_directors")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 1238)
(2, 5382, 420)
(3, 9263, 698)
(4, 5401, 365)
(5, 2861, 1028)
(6, 2860, 353)
(7, 5423, 446)
(8, 2842, 90)
(9, 7570, 636)
(10, 8878, 210)
(11, 5431, 1031)
(12, 8160, 901)
(13, 2807, 685)
(14, 9199, 349)
(15, 5466, 1137)
(16, 7534, 1077)
(17, 5472, 1233)
(18, 8207, 1067)
(19, 2676, 597)
(20, 2676, 270)
(21, 7533, 350)
(22, 2593, 557)
(23, 7531, 1162)
(24, 2562, 929)
(25, 9213, 1144)
(26, 5491, 670)
(27, 7588, 1254)
(28, 5373, 1228)
(29, 3165, 1263)
(30, 5162, 53)
(31, 5166, 773)
(32, 5200, 664)
(33, 3114, 9)
(34, 3113, 1040)
(35, 3111, 5)
(36, 5206, 633)
(37, 5206, 1230)
(38, 9285, 617)
(39, 5241, 1197)
(40, 2915, 467)
(41, 7605, 254)
(42, 5308, 135)
(43, 3044, 694)
(44, 5315, 334)
(45, 3032, 1321)
(46, 8871, 1244)
(47, 9270, 918)
(48, 5343, 535)
(49, 5343, 1291)
(50, 9624, 1310)
(51, 8148, 1221)
(52, 9269, 1140)
(53, 8125, 423)
(54, 7615, 637)
(55, 2544, 936)
(56, 2539, 419)
(57, 7510, 560)
(58, 5630, 746)
(59, 5647, 426)
(60, 2260, 162)
(61, 9687, 870)
(62, 5708, 736)
(6

## Get IDs from Movies and Stars to insert into movie_stars Table
Loop through the movies table and stars table to get their ids and then insert them into a new row in the movie_stars table.

In [26]:
movie_star_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    stars = ast.literal_eval(row["Stars"])
    for star in stars:
        cursor.execute("SELECT star_id FROM stars WHERE star_name = ?", (star.strip(),))
        star_ids = cursor.fetchall()
        for star_id in star_ids:
            cursor.execute("INSERT INTO movie_stars VALUES (?,?,?)", (movie_star_index, movie_id, star_id[0]))
            connection.commit()
            movie_star_index += 1

In [27]:
cursor.execute("SELECT * FROM movie_stars")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 3235)
(2, 8947, 2938)
(3, 8947, 1171)
(4, 8947, 2950)
(5, 5382, 3079)
(6, 5382, 138)
(7, 5382, 2682)
(8, 5382, 1227)
(9, 9263, 1340)
(10, 9263, 1558)
(11, 9263, 3050)
(12, 9263, 3531)
(13, 5401, 1253)
(14, 5401, 1277)
(15, 5401, 1899)
(16, 5401, 1772)
(17, 2861, 2279)
(18, 2861, 2304)
(19, 2861, 809)
(20, 2861, 3295)
(21, 2860, 1342)
(22, 2860, 1122)
(23, 2860, 2173)
(24, 2860, 3519)
(25, 5423, 1712)
(26, 5423, 2222)
(27, 5423, 1316)
(28, 5423, 2357)
(29, 2842, 1886)
(30, 2842, 2493)
(31, 2842, 1364)
(32, 2842, 3492)
(33, 7570, 2449)
(34, 7570, 3402)
(35, 7570, 2722)
(36, 7570, 3225)
(37, 8878, 1330)
(38, 8878, 838)
(39, 8878, 951)
(40, 8878, 169)
(41, 5431, 200)
(42, 5431, 1820)
(43, 5431, 2411)
(44, 5431, 1798)
(45, 8160, 1916)
(46, 8160, 124)
(47, 8160, 2307)
(48, 8160, 670)
(49, 2807, 2647)
(50, 2807, 123)
(51, 2807, 1623)
(52, 2807, 2242)
(53, 9199, 2166)
(54, 9199, 3388)
(55, 9199, 3134)
(56, 9199, 1681)
(57, 5466, 2263)
(58, 5466, 3262)
(59, 5466, 2839)
(60, 5466, 315)

## Get IDs from Movies and Genres and Place them into the movie_genres table
Get the id from the imdb_movies table and each genre and place them in rows in the movie_genres table

In [28]:
movie_genre_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    genres = ast.literal_eval(row["Genre"])
    for genre in genres:
        cursor.execute("SELECT genre_id FROM genres WHERE genre_name = ?", (genre.strip(),))
        genre_ids = cursor.fetchall()
        #print(genre_ids)
        for genre_id in genre_ids:
            cursor.execute("INSERT INTO movie_genres VALUES (?,?,?)", (movie_genre_index, movie_id, genre_id[0]))
            connection.commit()
            movie_genre_index += 1

In [29]:
cursor.execute("SELECT * FROM movie_genres")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 6)
(2, 8947, 18)
(3, 5382, 17)
(4, 5382, 10)
(5, 9263, 10)
(6, 9263, 7)
(7, 9263, 8)
(8, 5401, 17)
(9, 5401, 10)
(10, 5401, 18)
(11, 2861, 10)
(12, 2861, 1)
(13, 2861, 16)
(14, 2860, 10)
(15, 5423, 17)
(16, 5423, 11)
(17, 5423, 20)
(18, 2842, 6)
(19, 2842, 10)
(20, 7570, 10)
(21, 7570, 8)
(22, 7570, 20)
(23, 8878, 5)
(24, 8878, 10)
(25, 8878, 2)
(26, 5431, 7)
(27, 5431, 8)
(28, 5431, 20)
(29, 8160, 10)
(30, 8160, 8)
(31, 8160, 18)
(32, 2807, 4)
(33, 2807, 17)
(34, 2807, 11)
(35, 9199, 7)
(36, 9199, 18)
(37, 5466, 6)
(38, 5466, 12)
(39, 5466, 2)
(40, 7534, 17)
(41, 7534, 11)
(42, 7534, 5)
(43, 5472, 17)
(44, 5472, 18)
(45, 8207, 10)
(46, 8207, 7)
(47, 8207, 8)
(48, 2676, 13)
(49, 2676, 10)
(50, 2676, 14)
(51, 7533, 6)
(52, 7533, 10)
(53, 7533, 2)
(54, 2593, 10)
(55, 2593, 2)
(56, 2593, 9)
(57, 7531, 17)
(58, 7531, 11)
(59, 7531, 20)
(60, 2562, 11)
(61, 2562, 10)
(62, 2562, 18)
(63, 9213, 17)
(64, 9213, 6)
(65, 9213, 5)
(66, 5491, 13)
(67, 5491, 10)
(68, 5491, 1)
(69, 7588, 5)


## Close Connection to the Database

In [30]:
connection.close()