# Setting Up The SQLite Server
Below is the code to import the sqlite3 library that is included with the Python standard library package.

In [1]:
import sqlite3

Next is the code to create/connect with the database

In [2]:
connection = sqlite3.connect('movies.db')

Now the next code will create a cursor object so we can execute SQL statements

In [3]:
cursor = connection.cursor()

## Drop Tables
Drop all the tables from the database to start fresh.

In [4]:
cursor.execute('DROP TABLE IF EXISTS movie_stars')
cursor.execute('DROP TABLE IF EXISTS stars')
cursor.execute('DROP TABLE IF EXISTS movie_genres')
cursor.execute('DROP TABLE IF EXISTS genres')
cursor.execute('DROP TABLE IF EXISTS movie_directors')
cursor.execute('DROP TABLE IF EXISTS directors')
cursor.execute('DROP TABLE IF EXISTS imdb_movies')
cursor.execute('DROP TABLE IF EXISTS certifications')

<sqlite3.Cursor at 0x267fee61340>

## Create Tables 

### Main Table From imdb dataset (imdb_movies)

In [5]:
cursor.execute('''CREATE TABLE IF NOT EXISTS imdb_movies
               (
                    imdb_movies_id INTEGER PRIMARY KEY NOT NULL, 
                    movie_name VARCHAR(100) NOT NULL, 
                    year_released TEXT NOT NULL,
                    run_time INTEGER NOT NULL,
                    meta_score INTEGER,
                    description TEXT NOT NULL,
                    certification VARCHAR(25)
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Create table for the stars

In [6]:
cursor.execute('''CREATE TABLE IF NOT EXISTS stars
               (
                    star_id INTEGER PRIMARY KEY NOT NULL, 
                    star_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Make table that connects movies and stars
We are connecting movies and stars in a separate table because there can be multiple stars per movie and each star can be in multiple movies. Since there is a many-to-many relationship between the movies and stars tables, we need to make a new table that contains the id from the movies table and the id from the stars table. This will also be the same with movie directors and genres.

In [7]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_stars
               (
                    movie_star_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    star_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (star_id) REFERENCES stars(star_id)
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Make table for Directors

In [8]:
cursor.execute('''CREATE TABLE IF NOT EXISTS directors
               (
                    director_id INTEGER PRIMARY KEY NOT NULL, 
                    director_name VARCHAR(100) 
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Make a table that connects Movies and Directors
We need to create a table that connects movies and directors like we did with movies and stars

In [9]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_directors
               (
                    movie_director_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    director_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (director_id) REFERENCES directors(director_id)
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Create table containing all the different genres

In [10]:
cursor.execute('''CREATE TABLE IF NOT EXISTS genres
               (
                    genre_id INTEGER PRIMARY KEY NOT NULL, 
                    genre_name VARCHAR(20) 
                )''')

<sqlite3.Cursor at 0x267fee61340>

### Make a table connecting movies and genres
Like we did with movies and stars, and movies and directors, we will make a table connecting the movie values with the genre values.

In [11]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_genres
               (
                    movie_genre_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                    movie_id INTEGER NOT NULL,
                    genre_id INTEGER NOT NULL,
                    FOREIGN KEY (movie_id) REFERENCES imdb_movies(imdb_movies_id)
                    FOREIGN KEY (genre_id) REFERENCES genres(genre_id)
                )''')

<sqlite3.Cursor at 0x267fee61340>

## Use pandas to clean up our data
Using pandas we will clean up our data to only have movies released from 2018 to 2022

In [12]:
# import csv package
import pandas as pd

# path to our movie data file
path_to_data = './datasets/imdb/imdb-data.csv'

# open the csv file
imdb_df = pd.read_csv(path_to_data, index_col=0)

movies_afer_2018 = imdb_df["Year of Release"] >= 2018
movies_before_2023 = imdb_df["Year of Release"] <= 2022

movies_in_range_df = imdb_df[movies_afer_2018 & movies_before_2023]
movies_in_range_df = movies_in_range_df.sort_values(by="Year of Release")

movies_in_range_df

Unnamed: 0,Movie Name,Year of Release,Run Time in minutes,Movie Rating,Votes,MetaScore,Gross,Genre,Certification,Director,Stars,Description
8947,The Package,2018,94,5.6,20342,,,"['Comedy', ' Thriller']",TV-MA,['Jake Szymanski'],"['Daniel Doheny', 'Geraldine Viswanathan', 'Sa...","['When', 'a', 'group', 'of', 'teenage', 'frien..."
5382,Sarkar,2018,163,6.7,21141,,,"['Action', ' Drama']",Not Rated,['A.R. Murugadoss'],"['Joseph Vijay', 'Keerthy Suresh', 'Yogi Babu'...","['An', 'NRI', 'businessman', 'learns', 'his', ..."
9263,The Little Stranger,2018,111,5.5,10423,67.0,710000.0,"['Drama', ' Horror', ' Mystery']",R,['Lenny Abrahamson'],"['Domhnall Gleeson', 'Will Poulter', 'Ruth Wil...","['After', 'a', 'doctor', 'is', 'called', 'to',..."
5401,Red Sparrow,2018,140,6.6,196091,53.0,46870000.0,"['Action', ' Drama', ' Thriller']",R,['Francis Lawrence'],"['Jennifer Lawrence', 'Joel Edgerton', 'Matthi...","['Ballerina', 'Dominika', 'Egorova', 'is', 're..."
2861,Gold,2018,151,7.3,14531,,,"['Drama', ' History', ' Sport']",Not Rated,['Reema Kagti'],"['Krishan Tandon', 'Hubertus Geller', 'Siddhar...","['The', 'journey', 'of', 'a', 'man', 'who', 'w..."
...,...,...,...,...,...,...,...,...,...,...,...,...
5924,Dog,2022,101,6.5,42798,61.0,,"['Comedy', ' Drama']",PG-13,"['Reid Carolin', 'Channing Tatum']","['Channing Tatum', 'Ryder McLaughlin', 'Aavi H...","['Two', 'former', 'Army', 'Rangers', 'are', 'p..."
5922,Minions: The Rise of Gru,2022,87,6.5,78997,56.0,369700000.0,"['Animation', ' Adventure', ' Comedy']",PG,"['Kyle Balda', 'Brad Ableson', 'Jonathan del V...","['Steve Carell', 'Pierre Coffin', 'Alan Arkin'...","['The', 'untold', 'story', 'of', 'one', ""twelv..."
5910,"Confess, Fletch",2022,98,6.5,16520,64.0,,"['Comedy', ' Crime', ' Mystery']",R,['Greg Mottola'],"['Jon Hamm', 'Caitlin Zerra Rose', 'Roy Wood J...","['After', 'becoming', 'the', 'prime', 'suspect..."
5849,Sonic the Hedgehog 2,2022,122,6.5,80046,47.0,190870000.0,"['Action', ' Adventure', ' Comedy']",PG,['Jeff Fowler'],"['James Marsden', 'Jim Carrey', 'Ben Schwartz'...","['When', 'the', 'manic', 'Dr.', 'Robotnik', 'r..."


In [13]:
import ast

genre_s = movies_in_range_df["Genre"]

genre_list = []

for genres in genre_s:
    lit_genres = ast.literal_eval(genres)
    for genre in lit_genres:
        genre_list.append(genre.strip())

genre_set = set(genre_list)

for genre in genre_set:
    print(genre)

History
Comedy
Thriller
Western
Action
Biography
Adventure
Musical
Horror
Music
Sci-Fi
Animation
Romance
Crime
Sport
Mystery
Family
Drama
Fantasy
War


#### Adding Genres to the genres table

In [14]:
id = 1

for genre in genre_set:
    cursor.execute('INSERT INTO genres (genre_id, genre_name) VALUES (?, ?)', (id, genre))
    id += 1

In [15]:
cursor.execute('SELECT * FROM genres')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'History')
(2, 'Comedy')
(3, 'Thriller')
(4, 'Western')
(5, 'Action')
(6, 'Biography')
(7, 'Adventure')
(8, 'Musical')
(9, 'Horror')
(10, 'Music')
(11, 'Sci-Fi')
(12, 'Animation')
(13, 'Romance')
(14, 'Crime')
(15, 'Sport')
(16, 'Mystery')
(17, 'Family')
(18, 'Drama')
(19, 'Fantasy')
(20, 'War')


## Get All the Directors
Get all the directors from the DataFrame like was done with the genres and insert them into the directors table.

In [16]:
director_s = movies_in_range_df["Director"]

director_list = []

for director in director_s:
    lit_director = ast.literal_eval(director)
    for each in lit_director:
        director_list.append(each.strip())


director_set = set(director_list)

print(director_set)

{'Anna Foerster', 'Lasse Hallström', 'Radha Krishna Jagarlamudi', 'Brad Peyton', 'Drew Pearce', 'Gerard Bush', 'Cory Evans', 'Lakshya Raj Anand', 'Taika Waititi', 'Albert Calleros', 'Reid Carolin', 'Brett Haley', 'Eléonore Pourriat', 'Michael Sucsy', 'Predrag Antonijevic', 'Kristina Buozyte', 'Sung-hee Jo', 'Charles Martin Smith', 'Tanya Wexler', 'Yagmur Taylan', 'Erik Poppe', 'Brandon Trost', 'José Padilha', 'Anurag Singh', 'Casey Affleck', 'Jeremiah Zagar', 'Declan Lawn', 'Kenji Nagasaki', 'Jessica M. Thompson', 'Rodrigo Sorogoyen', 'Garth Jennings', 'Chandra Prakash Dwivedi', 'Otto Bathurst', 'Shaka King', 'Aanand L. Rai', 'Mike Newell', 'Marjane Satrapi', 'Nadine Labaki', 'A.R. Murugadoss', 'Kookie Gulati', 'Joel Edgerton', 'Wes Ball', 'Jeff Fowler', 'Ryûsuke Hamaguchi', 'William Eubank', 'Evan Spiliotopoulos', 'Hirokazu Koreeda', 'Judd Apatow', 'Lin Oeding', 'Peter Facinelli', 'Anthony Byrne', 'Johannes Roberts', 'Steven Brill', 'Tomm Moore', 'Adam Wingard', 'Tetsurô Araki', 'Ánge

### Adding Directors to the Directors Table

In [17]:
id = 1

for director in director_set:
    cursor.execute('INSERT INTO directors (director_id, director_name) VALUES (?, ?)', (id, director))
    id += 1

In [18]:
cursor.execute('SELECT * FROM directors')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Anna Foerster')
(2, 'Lasse Hallström')
(3, 'Radha Krishna Jagarlamudi')
(4, 'Brad Peyton')
(5, 'Drew Pearce')
(6, 'Gerard Bush')
(7, 'Cory Evans')
(8, 'Lakshya Raj Anand')
(9, 'Taika Waititi')
(10, 'Albert Calleros')
(11, 'Reid Carolin')
(12, 'Brett Haley')
(13, 'Eléonore Pourriat')
(14, 'Michael Sucsy')
(15, 'Predrag Antonijevic')
(16, 'Kristina Buozyte')
(17, 'Sung-hee Jo')
(18, 'Charles Martin Smith')
(19, 'Tanya Wexler')
(20, 'Yagmur Taylan')
(21, 'Erik Poppe')
(22, 'Brandon Trost')
(23, 'José Padilha')
(24, 'Anurag Singh')
(25, 'Casey Affleck')
(26, 'Jeremiah Zagar')
(27, 'Declan Lawn')
(28, 'Kenji Nagasaki')
(29, 'Jessica M. Thompson')
(30, 'Rodrigo Sorogoyen')
(31, 'Garth Jennings')
(32, 'Chandra Prakash Dwivedi')
(33, 'Otto Bathurst')
(34, 'Shaka King')
(35, 'Aanand L. Rai')
(36, 'Mike Newell')
(37, 'Marjane Satrapi')
(38, 'Nadine Labaki')
(39, 'A.R. Murugadoss')
(40, 'Kookie Gulati')
(41, 'Joel Edgerton')
(42, 'Wes Ball')
(43, 'Jeff Fowler')
(44, 'Ryûsuke Hamaguchi')
(45,

## Get all the Stars
Get all the stars from the DataFrame to insert into the stars table

In [19]:
star_s = movies_in_range_df["Stars"]

stars_list = []

for star in star_s:
    lit_star = ast.literal_eval(star)
    for each in lit_star:
        stars_list.append(each.strip())


stars_set = set(stars_list)

print(stars_set)

{'Omari Hardwick', "Jason O'Mara", 'Robyn Nevin', 'Liv Tyler', 'Jada Alberts', 'Addison Christie', 'Fanny Ardant', 'Karl Urban', 'Zafer Algöz', 'Emily Ratajkowski', 'Luàna Bajrami', 'Alex Hassell', 'Rachael Leigh Cook', 'Hannah John-Kamen', 'Vondie Curtis-Hall', 'Ed Skrein', 'Aleksandra Konieczna', 'Park Shin-Hye', 'Sadhana', 'Natalia Azahara', 'Joe Cole', 'Travis Tritt', 'Liam Hemsworth', 'Anna Hopkins', 'Harrison Ford', 'Bruna Cusí', 'Louise Orry-Diquéro', 'Kishore Kumar G.', 'Rihanna', 'Kate Hudson', 'Ki Hong Lee', 'Taika Waititi', 'Nora Navas', 'Kirin Kiki', 'Anthony Hopkins', 'Thuso Mbedu', 'Liza Koshy', 'Adèle Haenel', 'Harshvardhan Rane', 'Caoilfhionn Dunne', 'Diana Pozharskaya', 'Stephen Graham', 'Tracy Letts', 'Aidan Turner', 'Sadia Khateeb', 'Raj Arjun', 'Rebecca Romijn', 'Kelly Asbury', 'Nasim Pedrad', 'Jordan Fisher', 'Alba Baptista', 'Rafael Casal', 'Emily Meade', 'David Crowley', 'Taylor Nichols', 'Stéphane Varupenne', 'Alexandra Daddario', 'Carlos Peralta', 'Maria Thelma

#### Adding stars to the stars table

In [20]:
id = 1

for star in stars_set:
    cursor.execute('INSERT INTO stars (star_id, star_name) VALUES (?, ?)', (id, star))
    id += 1

In [21]:
cursor.execute('SELECT * FROM stars')
rows = cursor.fetchall()
for row in rows:
    print(row)

(1, 'Omari Hardwick')
(2, "Jason O'Mara")
(3, 'Robyn Nevin')
(4, 'Liv Tyler')
(5, 'Jada Alberts')
(6, 'Addison Christie')
(7, 'Fanny Ardant')
(8, 'Karl Urban')
(9, 'Zafer Algöz')
(10, 'Emily Ratajkowski')
(11, 'Luàna Bajrami')
(12, 'Alex Hassell')
(13, 'Rachael Leigh Cook')
(14, 'Hannah John-Kamen')
(15, 'Vondie Curtis-Hall')
(16, 'Ed Skrein')
(17, 'Aleksandra Konieczna')
(18, 'Park Shin-Hye')
(19, 'Sadhana')
(20, 'Natalia Azahara')
(21, 'Joe Cole')
(22, 'Travis Tritt')
(23, 'Liam Hemsworth')
(24, 'Anna Hopkins')
(25, 'Harrison Ford')
(26, 'Bruna Cusí')
(27, 'Louise Orry-Diquéro')
(28, 'Kishore Kumar G.')
(29, 'Rihanna')
(30, 'Kate Hudson')
(31, 'Ki Hong Lee')
(32, 'Taika Waititi')
(33, 'Nora Navas')
(34, 'Kirin Kiki')
(35, 'Anthony Hopkins')
(36, 'Thuso Mbedu')
(37, 'Liza Koshy')
(38, 'Adèle Haenel')
(39, 'Harshvardhan Rane')
(40, 'Caoilfhionn Dunne')
(41, 'Diana Pozharskaya')
(42, 'Stephen Graham')
(43, 'Tracy Letts')
(44, 'Aidan Turner')
(45, 'Sadia Khateeb')
(46, 'Raj Arjun')
(47, 

### Add the Movies to the imdb_movies Table
Now we will add all the Movies and the proper data to the imdb_movies table

In [22]:
import re

for index, row in movies_in_range_df.iterrows():
    # Declare a variable for each field and assign it to value from DF
    movie_id = index
    movie_name = re.sub(r'[^\w\s]', '',row["Movie Name"]).upper()
    year_released = row["Year of Release"]
    run_time = row["Run Time in minutes"]
    meta_score = row["MetaScore"]
    description = " ".join(ast.literal_eval(row["Description"]))
    certification = row["Certification"]
    print(f"{movie_id}, {movie_name}, {year_released}, {run_time}, {meta_score}, {description}, {certification}")
    cursor.execute('''INSERT INTO imdb_movies (imdb_movies_id, movie_name, year_released, run_time, meta_score, description, certification) 
                      VALUES (?, ?, ?, ?, ?, ?, ?)''', (movie_id, movie_name, year_released, run_time, meta_score, description, certification))

    # Make movie names with all caps and no special characters to match with other datasets
    # Use INSERT statement to add values to the database

8947, THE PACKAGE, 2018, 94, nan, When a group of teenage friends go on a spring break camping trip, an unfortunate accident sets off a race against time to save their friend's most prized possession., TV-MA
5382, SARKAR, 2018, 163, nan, An NRI businessman learns his vote has been cast by someone else and decides to investigate the matter, eventually finding himself pitted against two corrupt politicians., Not Rated
9263, THE LITTLE STRANGER, 2018, 111, 67.0, After a doctor is called to visit a crumbling manor, strange things begin to occur., R
5401, RED SPARROW, 2018, 140, 53.0, Ballerina Dominika Egorova is recruited to 'Sparrow School,' a Russian intelligence service where she is forced to use her body as a weapon. Her first mission, targeting a C.I.A. agent, threatens to unravel the security of both nations., R
2861, GOLD, 2018, 151, nan, The journey of a man who was instrumental in making India win its first Olympic gold medal as a free nation., Not Rated
2860, MULK, 2018, 140, na

In [23]:
cursor.execute('SELECT * FROM imdb_movies')
rows = cursor.fetchall()
for row in rows:
    print(row)

(10, 'DAMAN', '2022', 121, None, "The film is set in 2015. Sid, is a young doctor who completed his MBBS has been posted to the cut-off area of Malkangiri district Odisha having 151 villages & infamous for Naxals dominance & no basic facilities. It's a real life tale of perseverance & dedication.", None)
(12, 'DEMON SLAYER KIMETSU NO YAIBA  TSUZUMI MANSION ARC', '2021', 87, None, 'Tanjiro ventures to the south-southeast where he encounters a cowardly young man named Zenitsu Agatsuma. He is a fellow survivor from Final Selection and his sparrow asks Tanjiro to help keep him in line.', None)
(22, 'JAI BHIM', '2021', 164, None, 'When a tribal man is arrested for a case of alleged theft, his wife turns to a human-rights lawyer to help bring justice.', 'TV-MA')
(24, '777 CHARLIE', '2022', 164, None, 'Dharma is stuck in a rut with his negative and lonely lifestyle and spends each day in the comfort of his loneliness. A pup named Charlie enters his life and gives him a new perspective towards

## Get IDs from each movie and Directors to insert into movie_directors table
Loop through the pandas dataframe holding all of the data, get the ID of the movie and the ID of the director and insert into the movie_directors. Will have to loop through the directors since it is a list inside of the DataFrame and individually get the id of each from the directors table. 

In [24]:
movie_director_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    directors = ast.literal_eval(row["Director"])
    for director in directors:
        cursor.execute("SELECT director_id FROM directors WHERE director_name = ?", (director.strip(),))
        dir_ids = cursor.fetchall()
        for dir_id in dir_ids:
            cursor.execute("INSERT INTO movie_directors VALUES (?,?,?)", (movie_director_index, movie_id, dir_id[0]))
            movie_director_index += 1
        

In [25]:
cursor.execute("SELECT * FROM movie_directors")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 1167)
(2, 5382, 39)
(3, 9263, 111)
(4, 5401, 587)
(5, 2861, 651)
(6, 2860, 972)
(7, 5423, 743)
(8, 2842, 840)
(9, 7570, 686)
(10, 8878, 724)
(11, 5431, 622)
(12, 8160, 206)
(13, 2807, 28)
(14, 9199, 89)
(15, 5466, 1052)
(16, 7534, 719)
(17, 5472, 1118)
(18, 8207, 527)
(19, 2676, 1270)
(20, 2676, 466)
(21, 7533, 1268)
(22, 2593, 36)
(23, 7531, 4)
(24, 2562, 59)
(25, 9213, 1212)
(26, 5491, 668)
(27, 7588, 261)
(28, 5373, 730)
(29, 3165, 415)
(30, 5162, 1311)
(31, 5166, 694)
(32, 5200, 911)
(33, 3114, 243)
(34, 3113, 928)
(35, 3111, 615)
(36, 5206, 1042)
(37, 5206, 1274)
(38, 9285, 224)
(39, 5241, 783)
(40, 2915, 439)
(41, 7605, 5)
(42, 5308, 726)
(43, 3044, 557)
(44, 5315, 69)
(45, 3032, 520)
(46, 8871, 323)
(47, 9270, 1079)
(48, 5343, 1038)
(49, 5343, 1035)
(50, 9624, 766)
(51, 8148, 1255)
(52, 9269, 1305)
(53, 8125, 334)
(54, 7615, 154)
(55, 2544, 482)
(56, 2539, 779)
(57, 7510, 381)
(58, 5630, 1064)
(59, 5647, 1216)
(60, 2260, 838)
(61, 9687, 873)
(62, 5708, 103)
(63, 5708, 

## Get IDs from Movies and Stars to insert into movie_stars Table
Loop through the movies table and stars table to get their ids and then insert them into a new row in the movie_stars table.

In [26]:
movie_star_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    stars = ast.literal_eval(row["Stars"])
    for star in stars:
        cursor.execute("SELECT star_id FROM stars WHERE star_name = ?", (star.strip(),))
        star_ids = cursor.fetchall()
        for star_id in star_ids:
            cursor.execute("INSERT INTO movie_stars VALUES (?,?,?)", (movie_star_index, movie_id, star_id[0]))
            movie_star_index += 1

In [27]:
cursor.execute("SELECT * FROM movie_stars")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 1858)
(2, 8947, 88)
(3, 8947, 1466)
(4, 8947, 1927)
(5, 5382, 3168)
(6, 5382, 2232)
(7, 5382, 3289)
(8, 5382, 92)
(9, 9263, 2358)
(10, 9263, 2170)
(11, 9263, 1998)
(12, 9263, 1252)
(13, 5401, 2485)
(14, 5401, 99)
(15, 5401, 144)
(16, 5401, 1560)
(17, 2861, 1447)
(18, 2861, 3188)
(19, 2861, 1802)
(20, 2861, 1228)
(21, 2860, 666)
(22, 2860, 2851)
(23, 2860, 2886)
(24, 2860, 3584)
(25, 5423, 1180)
(26, 5423, 2004)
(27, 5423, 1521)
(28, 5423, 3240)
(29, 2842, 155)
(30, 2842, 717)
(31, 2842, 3734)
(32, 2842, 2444)
(33, 7570, 1454)
(34, 7570, 2441)
(35, 7570, 2709)
(36, 7570, 2599)
(37, 8878, 1673)
(38, 8878, 2700)
(39, 8878, 94)
(40, 8878, 1430)
(41, 5431, 414)
(42, 5431, 2443)
(43, 5431, 1543)
(44, 5431, 2915)
(45, 8160, 3643)
(46, 8160, 1150)
(47, 8160, 2937)
(48, 8160, 3309)
(49, 2807, 3778)
(50, 2807, 3561)
(51, 2807, 2196)
(52, 2807, 726)
(53, 9199, 3209)
(54, 9199, 2510)
(55, 9199, 3202)
(56, 9199, 111)
(57, 5466, 2939)
(58, 5466, 992)
(59, 5466, 138)
(60, 5466, 3238)
(61, 7

## Get IDs from Movies and Genres and Place them into the movie_genres table
Get the id from the imdb_movies table and each genre and place them in rows in the movie_genres table

In [28]:
movie_genre_index = 1

for index, row in movies_in_range_df.iterrows():
    movie_id = index
    genres = ast.literal_eval(row["Genre"])
    for genre in genres:
        cursor.execute("SELECT genre_id FROM genres WHERE genre_name = ?", (genre.strip(),))
        genre_ids = cursor.fetchall()
        #print(genre_ids)
        for genre_id in genre_ids:
            cursor.execute("INSERT INTO movie_genres VALUES (?,?,?)", (movie_genre_index, movie_id, genre_id[0]))
            movie_genre_index += 1

In [29]:
cursor.execute("SELECT * FROM movie_genres")
rows = cursor.fetchall()

for row in rows:
    print(row)

(1, 8947, 2)
(2, 8947, 3)
(3, 5382, 5)
(4, 5382, 18)
(5, 9263, 18)
(6, 9263, 9)
(7, 9263, 16)
(8, 5401, 5)
(9, 5401, 18)
(10, 5401, 3)
(11, 2861, 18)
(12, 2861, 1)
(13, 2861, 15)
(14, 2860, 18)
(15, 5423, 5)
(16, 5423, 7)
(17, 5423, 11)
(18, 2842, 2)
(19, 2842, 18)
(20, 7570, 18)
(21, 7570, 16)
(22, 7570, 11)
(23, 8878, 14)
(24, 8878, 18)
(25, 8878, 13)
(26, 5431, 9)
(27, 5431, 16)
(28, 5431, 11)
(29, 8160, 18)
(30, 8160, 16)
(31, 8160, 3)
(32, 2807, 12)
(33, 2807, 5)
(34, 2807, 7)
(35, 9199, 9)
(36, 9199, 3)
(37, 5466, 2)
(38, 5466, 8)
(39, 5466, 13)
(40, 7534, 5)
(41, 7534, 7)
(42, 7534, 14)
(43, 5472, 5)
(44, 5472, 3)
(45, 8207, 18)
(46, 8207, 9)
(47, 8207, 16)
(48, 2676, 6)
(49, 2676, 18)
(50, 2676, 17)
(51, 7533, 2)
(52, 7533, 18)
(53, 7533, 13)
(54, 2593, 18)
(55, 2593, 13)
(56, 2593, 20)
(57, 7531, 5)
(58, 7531, 7)
(59, 7531, 11)
(60, 2562, 7)
(61, 2562, 18)
(62, 2562, 3)
(63, 9213, 5)
(64, 9213, 2)
(65, 9213, 14)
(66, 5491, 6)
(67, 5491, 18)
(68, 5491, 1)
(69, 7588, 14)
(70, 75