# Setting Up The SQLite Server
Below is the code to import the sqlite3 library that is included with the Python standard library package.

In [1]:
import sqlite3

Next is the code to create/connect with the database

In [2]:
connection = sqlite3.connect('movies.db')

Now the next code will create a cursor object so we can execute SQL statements

In [3]:
cursor = connection.cursor()

## Drop Tables
Drop all the tables from the database to start fresh.

In [4]:
cursor.execute('DROP TABLE IF EXISTS movie_stars')
cursor.execute('DROP TABLE IF EXISTS stars')
cursor.execute('DROP TABLE IF EXISTS movie_genres')
cursor.execute('DROP TABLE IF EXISTS genres')
cursor.execute('DROP TABLE IF EXISTS movie_directors')
cursor.execute('DROP TABLE IF EXISTS directors')
cursor.execute('DROP TABLE IF EXISTS imdb_movies')
cursor.execute('DROP TABLE IF EXISTS certifications')

<sqlite3.Cursor at 0x201a2968fc0>

## Create Tables 

### Main Table From imdb dataset (imdb_movies)

In [5]:
cursor.execute('''CREATE TABLE IF NOT EXISTS imdb_movies
               (
                    imdb_movies_id INTEGER PRIMARY KEY NOT NULL, 
                    movie_name VARCHAR(100) NOT NULL, 
                    year_released TEXT NOT NULL,
                    run_time INTEGER NOT NULL,
                    meta_score INTEGER,
                    description TEXT NOT NULL,
                    certification VARCHAR(25)
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Create table for the stars

In [6]:
cursor.execute('''CREATE TABLE IF NOT EXISTS stars
               (
                    star_id INTEGER PRIMARY KEY NOT NULL, 
                    star_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Make table that connects movies and stars
We are connecting movies and stars in a separate table because there can be multiple stars per movie and each star can be in multiple movies. Since there is a many-to-many relationship between the movies and stars tables, we need to make a new table that contains the id from the movies table and the id from the stars table. This will also be the same with movie directors and genres.

In [7]:
cursor.execute('DROP TABLE IF EXISTS movie_stars')
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_stars
               (
                    movie_star_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    star_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (star_id) REFERENCES stars(star_id)
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Make table for Directors

In [8]:
cursor.execute('''CREATE TABLE IF NOT EXISTS directors
               (
                    director_id INTEGER PRIMARY KEY NOT NULL, 
                    director_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Make a table that connects Movies and Directors
We need to create a table that connects movies and directors like we did with movies and stars

In [9]:
cursor.execute('DROP TABLE IF EXISTS movie_directors')
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_directors
               (
                    movie_director_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    director_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (director_id) REFERENCES directors(director_id)
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Create table containing all the different genres

In [10]:
cursor.execute('''CREATE TABLE IF NOT EXISTS genres
               (
                    genre_id INTEGER PRIMARY KEY NOT NULL, 
                    genre_name VARCHAR(20) 
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

### Make a table connecting movies and genres
Like we did with movies and stars, and movies and directors, we will make a table connecting the movie values with the genre values.

In [11]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_genres
               (
                    movie_genre_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    genre_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (genre_id) REFERENCES genres(genre_id)
                )''')

<sqlite3.Cursor at 0x201a2968fc0>

## Use pandas to clean up our data
Using pandas we will clean up our data to only have movies released from 2018 to 2022

In [12]:
# import pandas package
import pandas as pd

# path to our movie data file
path_to_data = './datasets/imdb/imdb-data.csv'

# open the csv file
imdb_df = pd.read_csv(path_to_data, index_col=0)

movies_afer_2018 = imdb_df["Year of Release"] >= 2018
movies_before_2023 = imdb_df["Year of Release"] <= 2022

movies_in_range_df = imdb_df[movies_afer_2018 & movies_before_2023]
movies_in_range_df = movies_in_range_df.sort_values(by="Year of Release")

movies_in_range_df

Unnamed: 0,Movie Name,Year of Release,Run Time in minutes,Movie Rating,Votes,MetaScore,Gross,Genre,Certification,Director,Stars,Description
8947,The Package,2018,94,5.6,20342,,,"['Comedy', ' Thriller']",TV-MA,['Jake Szymanski'],"['Daniel Doheny', 'Geraldine Viswanathan', 'Sa...","['When', 'a', 'group', 'of', 'teenage', 'frien..."
5382,Sarkar,2018,163,6.7,21141,,,"['Action', ' Drama']",Not Rated,['A.R. Murugadoss'],"['Joseph Vijay', 'Keerthy Suresh', 'Yogi Babu'...","['An', 'NRI', 'businessman', 'learns', 'his', ..."
9263,The Little Stranger,2018,111,5.5,10423,67.0,710000.0,"['Drama', ' Horror', ' Mystery']",R,['Lenny Abrahamson'],"['Domhnall Gleeson', 'Will Poulter', 'Ruth Wil...","['After', 'a', 'doctor', 'is', 'called', 'to',..."
5401,Red Sparrow,2018,140,6.6,196091,53.0,46870000.0,"['Action', ' Drama', ' Thriller']",R,['Francis Lawrence'],"['Jennifer Lawrence', 'Joel Edgerton', 'Matthi...","['Ballerina', 'Dominika', 'Egorova', 'is', 're..."
2861,Gold,2018,151,7.3,14531,,,"['Drama', ' History', ' Sport']",Not Rated,['Reema Kagti'],"['Krishan Tandon', 'Hubertus Geller', 'Siddhar...","['The', 'journey', 'of', 'a', 'man', 'who', 'w..."
...,...,...,...,...,...,...,...,...,...,...,...,...
5924,Dog,2022,101,6.5,42798,61.0,,"['Comedy', ' Drama']",PG-13,"['Reid Carolin', 'Channing Tatum']","['Channing Tatum', 'Ryder McLaughlin', 'Aavi H...","['Two', 'former', 'Army', 'Rangers', 'are', 'p..."
5922,Minions: The Rise of Gru,2022,87,6.5,78997,56.0,369700000.0,"['Animation', ' Adventure', ' Comedy']",PG,"['Kyle Balda', 'Brad Ableson', 'Jonathan del V...","['Steve Carell', 'Pierre Coffin', 'Alan Arkin'...","['The', 'untold', 'story', 'of', 'one', ""twelv..."
5910,"Confess, Fletch",2022,98,6.5,16520,64.0,,"['Comedy', ' Crime', ' Mystery']",R,['Greg Mottola'],"['Jon Hamm', 'Caitlin Zerra Rose', 'Roy Wood J...","['After', 'becoming', 'the', 'prime', 'suspect..."
5849,Sonic the Hedgehog 2,2022,122,6.5,80046,47.0,190870000.0,"['Action', ' Adventure', ' Comedy']",PG,['Jeff Fowler'],"['James Marsden', 'Jim Carrey', 'Ben Schwartz'...","['When', 'the', 'manic', 'Dr.', 'Robotnik', 'r..."


In [13]:
import ast

genre_s = movies_in_range_df["Genre"]

genre_list = []

for genres in genre_s:
    lit_genres = ast.literal_eval(genres)
    for genre in lit_genres:
        genre_list.append(genre.strip())

genre_set = set(genre_list)

for genre in genre_set:
    print(genre)

Animation
Romance
Sci-Fi
Western
Biography
Mystery
Crime
Action
War
Horror
Comedy
Drama
Family
Sport
Thriller
Fantasy
Adventure
History
Music
Musical


#### Adding Genres to the genres table

In [14]:
id = 1

for genre in genre_set:
    cursor.execute('INSERT INTO genres (genre_id, genre_name) VALUES (?, ?)', (id, genre))
    id += 1
connection.commit()

In [15]:
cursor.execute('SELECT * FROM genres')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Animation')
(2, 'Romance')
(3, 'Sci-Fi')
(4, 'Western')
(5, 'Biography')
(6, 'Mystery')
(7, 'Crime')
(8, 'Action')
(9, 'War')
(10, 'Horror')
(11, 'Comedy')
(12, 'Drama')
(13, 'Family')
(14, 'Sport')
(15, 'Thriller')
(16, 'Fantasy')
(17, 'Adventure')
(18, 'History')
(19, 'Music')
(20, 'Musical')


## Get All the Directors
Get all the directors from the DataFrame like was done with the genres and insert them into the directors table.

In [16]:
director_s = movies_in_range_df["Director"]

director_list = []

for director in director_s:
    lit_director = ast.literal_eval(director)
    for each in lit_director:
        director_list.append(each.strip())


director_set = set(director_list)

print(director_set)

{'Céline Sciamma', 'Trevor Nunn', 'Edward Berger', 'Andrea Berloff', 'Emmanuel Osei-Kuffour', 'Lila Neugebauer', 'Abhishek Sharma', 'Lenka Debiprasad', 'Shanker Raman', 'Charlie Kaufman', 'Conrad Vernon', 'Tomm Moore', 'Zack Snyder', 'Sean Durkin', 'Hernán Jiménez', 'Basil Joseph', 'Jesse V. Johnson', 'Matthijs van Heijningen Jr.', 'M. Night Shyamalan', 'Daniel Stamm', 'Danny Philippou', 'Bertie', 'Kleber Mendonça Filho', 'Qui Nguyen', 'Alex Hardcastle', 'Yorgos Lanthimos', 'Terrence Malick', 'Mukesh Chhabra', 'Trivikram Srinivas', 'Ali Abbasi', 'Clint Eastwood', 'Pablo Larraín', 'David Slade', 'Robert Connolly', 'Scott Derrickson', 'Peter Thorwarth', 'Sam Levinson', 'Bridget Savage Cole', 'David F. Sandberg', 'Gavin Hood', 'Andrew Fleming', 'Masashi Koizuka', 'Sukumar', 'Ross Stewart', 'David Yarovesky', 'Gustav Möller', 'Kiranraj K', 'Albert Hughes', 'Peggy Holmes', 'Kay Cannon', 'Edgar Wright', 'Mohit Suri', 'Ben Lewin', 'Luis Ortega', 'Danis Tanovic', 'Octavio E. Rodriguez', 'Anvit

### Adding Directors to the Directors Table

In [17]:
id = 1

for director in director_set:
    cursor.execute('INSERT INTO directors (director_id, director_name) VALUES (?, ?)', (id, director))
    id += 1
connection.commit()

In [18]:
cursor.execute('SELECT * FROM directors')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Céline Sciamma')
(2, 'Trevor Nunn')
(3, 'Edward Berger')
(4, 'Andrea Berloff')
(5, 'Emmanuel Osei-Kuffour')
(6, 'Lila Neugebauer')
(7, 'Abhishek Sharma')
(8, 'Lenka Debiprasad')
(9, 'Shanker Raman')
(10, 'Charlie Kaufman')
(11, 'Conrad Vernon')
(12, 'Tomm Moore')
(13, 'Zack Snyder')
(14, 'Sean Durkin')
(15, 'Hernán Jiménez')
(16, 'Basil Joseph')
(17, 'Jesse V. Johnson')
(18, 'Matthijs van Heijningen Jr.')
(19, 'M. Night Shyamalan')
(20, 'Daniel Stamm')
(21, 'Danny Philippou')
(22, 'Bertie')
(23, 'Kleber Mendonça Filho')
(24, 'Qui Nguyen')
(25, 'Alex Hardcastle')
(26, 'Yorgos Lanthimos')
(27, 'Terrence Malick')
(28, 'Mukesh Chhabra')
(29, 'Trivikram Srinivas')
(30, 'Ali Abbasi')
(31, 'Clint Eastwood')
(32, 'Pablo Larraín')
(33, 'David Slade')
(34, 'Robert Connolly')
(35, 'Scott Derrickson')
(36, 'Peter Thorwarth')
(37, 'Sam Levinson')
(38, 'Bridget Savage Cole')
(39, 'David F. Sandberg')
(40, 'Gavin Hood')
(41, 'Andrew Fleming')
(42, 'Masashi Koizuka')
(43, 'Sukumar')
(44, 'Ross St

## Get all the Stars
Get all the stars from the DataFrame to insert into the stars table

In [19]:
star_s = movies_in_range_df["Stars"]

stars_list = []

for star in star_s:
    lit_star = ast.literal_eval(star)
    for each in lit_star:
        stars_list.append(each.strip())


stars_set = set(stars_list)

print(stars_set)

{'Nicholas Hoult', 'Kevin Janssens', 'Ronak Singh Chadha Berges', 'Jacqueline Fernandez', 'Sheila Vand', 'Maria Bock', 'Stephen Campbell Moore', 'Keenan Tracey', 'Zawe Ashton', 'Jason Isaacs', 'Dan Mintz', 'David Hayman', 'Alison Sudol', 'Hong Lu', 'Miles Teller', 'Cory Gruter-Andrew', 'Bhuvan Arora', 'Kasia Madera', 'Tabu', 'Cameron Seely', 'Stoya', 'Shahid Kapoor', 'Arnold Schwarzenegger', 'Malgorzata Bela', 'Colin Blumenau', 'Lin Shaye', 'Giles Matthey', 'Khary Payton', 'Ronny Chieng', "Beverly D'Angelo", 'Sofia Boutella', 'Minnie Driver', 'Rob Malone', 'Ke Huy Quan', 'Glynn Turman', 'Ben Platt', 'Carlease Burke', 'Adam Driver', 'Peter Mooney', 'Tom Glynn-Carney', 'Megumi Ogata', 'Teresa Ruiz', 'June Squibb', 'Taylor Schilling', 'Danny Huston', 'Parambrata Chattopadhyay', 'Michael Wincott', 'Mbulelo Grootboom', 'Felix Lemburo', 'Basil Joseph', 'Mouryaani', 'Hafsa Ashraf', 'Mason Gooding', 'Djebril Zonga', 'Angus Sampson', 'Radina Drandova', 'Jacques Colimon', 'Kalyani Priyadarshan',

#### Adding stars to the stars table

In [20]:
id = 1

for star in stars_set:
    cursor.execute('INSERT INTO stars (star_id, star_name) VALUES (?, ?)', (id, star))
    id += 1
connection.commit()

In [21]:
cursor.execute('SELECT * FROM stars')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Nicholas Hoult')
(2, 'Kevin Janssens')
(3, 'Ronak Singh Chadha Berges')
(4, 'Jacqueline Fernandez')
(5, 'Sheila Vand')
(6, 'Maria Bock')
(7, 'Stephen Campbell Moore')
(8, 'Keenan Tracey')
(9, 'Zawe Ashton')
(10, 'Jason Isaacs')
(11, 'Dan Mintz')
(12, 'David Hayman')
(13, 'Alison Sudol')
(14, 'Hong Lu')
(15, 'Miles Teller')
(16, 'Cory Gruter-Andrew')
(17, 'Bhuvan Arora')
(18, 'Kasia Madera')
(19, 'Tabu')
(20, 'Cameron Seely')
(21, 'Stoya')
(22, 'Shahid Kapoor')
(23, 'Arnold Schwarzenegger')
(24, 'Malgorzata Bela')
(25, 'Colin Blumenau')
(26, 'Lin Shaye')
(27, 'Giles Matthey')
(28, 'Khary Payton')
(29, 'Ronny Chieng')
(30, "Beverly D'Angelo")
(31, 'Sofia Boutella')
(32, 'Minnie Driver')
(33, 'Rob Malone')
(34, 'Ke Huy Quan')
(35, 'Glynn Turman')
(36, 'Ben Platt')
(37, 'Carlease Burke')
(38, 'Adam Driver')
(39, 'Peter Mooney')
(40, 'Tom Glynn-Carney')
(41, 'Megumi Ogata')
(42, 'Teresa Ruiz')
(43, 'June Squibb')
(44, 'Taylor Schilling')
(45, 'Danny Huston')
(46, 'Parambrata Chattopadh

### Add the Movies to the imdb_movies Table
Now we will add all the Movies and the proper data to the imdb_movies table

In [22]:
import re

for index, row in movies_in_range_df.iterrows():
    # Declare a variable for each field and assign it to value from DF
    movie_id = index
    movie_name = re.sub(r'[^\w\s]', '',row["Movie Name"]).upper()
    year_released = row["Year of Release"]
    run_time = row["Run Time in minutes"]
    meta_score = row["MetaScore"]
    description = " ".join(ast.literal_eval(row["Description"]))
    certification = row["Certification"]
    print(f"{movie_id}, {movie_name}, {year_released}, {run_time}, {meta_score}, {description}, {certification}")
    cursor.execute('''INSERT INTO imdb_movies (imdb_movies_id, movie_name, year_released, run_time, meta_score, description, certification) 
                      VALUES (?, ?, ?, ?, ?, ?, ?)''', (movie_id, movie_name, year_released, run_time, meta_score, description, certification))
connection.commit()

    # Make movie names with all caps and no special characters to match with other datasets
    # Use INSERT statement to add values to the database

8947, THE PACKAGE, 2018, 94, nan, When a group of teenage friends go on a spring break camping trip, an unfortunate accident sets off a race against time to save their friend's most prized possession., TV-MA
5382, SARKAR, 2018, 163, nan, An NRI businessman learns his vote has been cast by someone else and decides to investigate the matter, eventually finding himself pitted against two corrupt politicians., Not Rated
9263, THE LITTLE STRANGER, 2018, 111, 67.0, After a doctor is called to visit a crumbling manor, strange things begin to occur., R
5401, RED SPARROW, 2018, 140, 53.0, Ballerina Dominika Egorova is recruited to 'Sparrow School,' a Russian intelligence service where she is forced to use her body as a weapon. Her first mission, targeting a C.I.A. agent, threatens to unravel the security of both nations., R
2861, GOLD, 2018, 151, nan, The journey of a man who was instrumental in making India win its first Olympic gold medal as a free nation., Not Rated
2860, MULK, 2018, 140, na

In [23]:
cursor.execute('SELECT * FROM imdb_movies')
rows = cursor.fetchall()
for row in rows:
    print(row)

(10, 'DAMAN', '2022', 121, None, "The film is set in 2015. Sid, is a young doctor who completed his MBBS has been posted to the cut-off area of Malkangiri district Odisha having 151 villages & infamous for Naxals dominance & no basic facilities. It's a real life tale of perseverance & dedication.", None)
(12, 'DEMON SLAYER KIMETSU NO YAIBA  TSUZUMI MANSION ARC', '2021', 87, None, 'Tanjiro ventures to the south-southeast where he encounters a cowardly young man named Zenitsu Agatsuma. He is a fellow survivor from Final Selection and his sparrow asks Tanjiro to help keep him in line.', None)
(22, 'JAI BHIM', '2021', 164, None, 'When a tribal man is arrested for a case of alleged theft, his wife turns to a human-rights lawyer to help bring justice.', 'TV-MA')
(24, '777 CHARLIE', '2022', 164, None, 'Dharma is stuck in a rut with his negative and lonely lifestyle and spends each day in the comfort of his loneliness. A pup named Charlie enters his life and gives him a new perspective towards

## Get IDs from each movie and Directors to insert into movie_directors table
Loop through the pandas dataframe holding all of the data, get the ID of the movie and the ID of the director and insert into the movie_directors. Will have to loop through the directors since it is a list inside of the DataFrame and individually get the id of each from the directors table. 

In [24]:
movie_director_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    directors = ast.literal_eval(row["Director"])
    for director in directors:
        cursor.execute("SELECT director_id FROM directors WHERE director_name = ?", (director.strip(),))
        dir_ids = cursor.fetchall()
        for dir_id in dir_ids:
            cursor.execute("INSERT INTO movie_directors VALUES (?,?,?)", (movie_director_index, movie_id, dir_id[0]))
            movie_director_index += 1

    connection.commit()
        

In [25]:
cursor.execute("SELECT * FROM movie_directors")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 619)
(2, 5382, 383)
(3, 9263, 1192)
(4, 5401, 801)
(5, 2861, 602)
(6, 2860, 796)
(7, 5423, 902)
(8, 2842, 833)
(9, 7570, 148)
(10, 8878, 455)
(11, 5431, 1006)
(12, 8160, 1262)
(13, 2807, 1281)
(14, 9199, 62)
(15, 5466, 1285)
(16, 7534, 597)
(17, 5472, 629)
(18, 8207, 254)
(19, 2676, 65)
(20, 2676, 828)
(21, 7533, 1240)
(22, 2593, 1132)
(23, 7531, 1282)
(24, 2562, 361)
(25, 9213, 224)
(26, 5491, 1211)
(27, 7588, 668)
(28, 5373, 863)
(29, 3165, 1039)
(30, 5162, 371)
(31, 5166, 88)
(32, 5200, 819)
(33, 3114, 446)
(34, 3113, 1009)
(35, 3111, 163)
(36, 5206, 214)
(37, 5206, 437)
(38, 9285, 185)
(39, 5241, 270)
(40, 2915, 1012)
(41, 7605, 470)
(42, 5308, 286)
(43, 3044, 561)
(44, 5315, 1018)
(45, 3032, 393)
(46, 8871, 544)
(47, 9270, 397)
(48, 5343, 726)
(49, 5343, 60)
(50, 9624, 271)
(51, 8148, 1304)
(52, 9269, 905)
(53, 8125, 1298)
(54, 7615, 647)
(55, 2544, 526)
(56, 2539, 1122)
(57, 7510, 1305)
(58, 5630, 898)
(59, 5647, 1091)
(60, 2260, 935)
(61, 9687, 408)
(62, 5708, 244)
(63

## Get IDs from Movies and Stars to insert into movie_stars Table
Loop through the movies table and stars table to get their ids and then insert them into a new row in the movie_stars table.

In [26]:
movie_star_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    stars = ast.literal_eval(row["Stars"])
    for star in stars:
        cursor.execute("SELECT star_id FROM stars WHERE star_name = ?", (star.strip(),))
        star_ids = cursor.fetchall()
        for star_id in star_ids:
            cursor.execute("INSERT INTO movie_stars VALUES (?,?,?)", (movie_star_index, movie_id, star_id[0]))
            movie_star_index += 1
        
    connection.commit()

In [27]:
cursor.execute("SELECT * FROM movie_stars")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 2076)
(2, 8947, 3055)
(3, 8947, 381)
(4, 8947, 1790)
(5, 5382, 3150)
(6, 5382, 2623)
(7, 5382, 2169)
(8, 5382, 1115)
(9, 9263, 3644)
(10, 9263, 3216)
(11, 9263, 3283)
(12, 9263, 2174)
(13, 5401, 2338)
(14, 5401, 1295)
(15, 5401, 1998)
(16, 5401, 3258)
(17, 2861, 318)
(18, 2861, 1504)
(19, 2861, 2095)
(20, 2861, 886)
(21, 2860, 169)
(22, 2860, 3013)
(23, 2860, 3679)
(24, 2860, 559)
(25, 5423, 2082)
(26, 5423, 1425)
(27, 5423, 3686)
(28, 5423, 2153)
(29, 2842, 1883)
(30, 2842, 1018)
(31, 2842, 609)
(32, 2842, 3710)
(33, 7570, 1307)
(34, 7570, 1311)
(35, 7570, 1482)
(36, 7570, 1200)
(37, 8878, 1275)
(38, 8878, 693)
(39, 8878, 1285)
(40, 8878, 2100)
(41, 5431, 2045)
(42, 5431, 2321)
(43, 5431, 1693)
(44, 5431, 1456)
(45, 8160, 1103)
(46, 8160, 61)
(47, 8160, 2121)
(48, 8160, 2233)
(49, 2807, 696)
(50, 2807, 3185)
(51, 2807, 1486)
(52, 2807, 934)
(53, 9199, 2142)
(54, 9199, 1117)
(55, 9199, 1671)
(56, 9199, 3297)
(57, 5466, 2001)
(58, 5466, 1169)
(59, 5466, 78)
(60, 5466, 462)
(61

## Get IDs from Movies and Genres and Place them into the movie_genres table
Get the id from the imdb_movies table and each genre and place them in rows in the movie_genres table

In [28]:
movie_genre_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    genres = ast.literal_eval(row["Genre"])
    for genre in genres:
        cursor.execute("SELECT genre_id FROM genres WHERE genre_name = ?", (genre.strip(),))
        genre_ids = cursor.fetchall()
        #print(genre_ids)
        for genre_id in genre_ids:
            cursor.execute("INSERT INTO movie_genres VALUES (?,?,?)", (movie_genre_index, movie_id, genre_id[0]))
            movie_genre_index += 1
        
    connection.commit()

In [29]:
cursor.execute("SELECT * FROM movie_genres")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 11)
(2, 8947, 15)
(3, 5382, 8)
(4, 5382, 12)
(5, 9263, 12)
(6, 9263, 10)
(7, 9263, 6)
(8, 5401, 8)
(9, 5401, 12)
(10, 5401, 15)
(11, 2861, 12)
(12, 2861, 18)
(13, 2861, 14)
(14, 2860, 12)
(15, 5423, 8)
(16, 5423, 17)
(17, 5423, 3)
(18, 2842, 11)
(19, 2842, 12)
(20, 7570, 12)
(21, 7570, 6)
(22, 7570, 3)
(23, 8878, 7)
(24, 8878, 12)
(25, 8878, 2)
(26, 5431, 10)
(27, 5431, 6)
(28, 5431, 3)
(29, 8160, 12)
(30, 8160, 6)
(31, 8160, 15)
(32, 2807, 1)
(33, 2807, 8)
(34, 2807, 17)
(35, 9199, 10)
(36, 9199, 15)
(37, 5466, 11)
(38, 5466, 20)
(39, 5466, 2)
(40, 7534, 8)
(41, 7534, 17)
(42, 7534, 7)
(43, 5472, 8)
(44, 5472, 15)
(45, 8207, 12)
(46, 8207, 10)
(47, 8207, 6)
(48, 2676, 5)
(49, 2676, 12)
(50, 2676, 13)
(51, 7533, 11)
(52, 7533, 12)
(53, 7533, 2)
(54, 2593, 12)
(55, 2593, 2)
(56, 2593, 9)
(57, 7531, 8)
(58, 7531, 17)
(59, 7531, 3)
(60, 2562, 17)
(61, 2562, 12)
(62, 2562, 15)
(63, 9213, 8)
(64, 9213, 11)
(65, 9213, 7)
(66, 5491, 5)
(67, 5491, 12)
(68, 5491, 18)
(69, 7588, 7)
(70

## Close Connection to the Database

In [30]:
connection.close()