# Setting Up The SQLite Server
Below is the code to import the sqlite3 library that is included with the Python standard library package.

In [1]:
import sqlite3

Next is the code to create/connect with the database

In [2]:
connection = sqlite3.connect('movies.db')

Now the next code will create a cursor object so we can execute SQL statements

In [3]:
cursor = connection.cursor()

## Drop Tables
Drop all the tables from the database to start fresh.

In [4]:
cursor.execute('DROP TABLE IF EXISTS movie_reviews')

<sqlite3.Cursor at 0x1fdfb0017c0>

## Create Table for Rotten Tomato Movie Reviews and call it movie_reviews

In [5]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_reviews
               (
                    movies_id INTEGER PRIMARY KEY NOT NULL,
                    movie_name VARCHAR(100) NOT NULL,
                    audience_score INTEGER, 
                    tomato_meter INTEGER
                )''')

<sqlite3.Cursor at 0x1fdfb0017c0>

## Use pandas to clean up our data
Using pandas we will clean up our data to only have movies released from 2018 to 2022

In [6]:
# import pandas
import pandas as pd

# path to data file
path_to_file = "./datasets/rotten-tomatoes/rotten_tomatoes_movies.csv"

# open csv file
movie_reviews_df = pd.read_csv(path_to_file)

movie_reviews_df.index.name = 'review_id'

# get movies before 2023 and after 2017
movies_afer_2018 = movie_reviews_df["releaseDateStreaming"] >= '2018-01-01'
movies_before_2023 = movie_reviews_df["releaseDateStreaming"] <= '2022-12-31'

reviews_in_range_df = movie_reviews_df[movies_afer_2018 & movies_before_2023]
reviews_in_range_df = reviews_in_range_df.sort_values(by="releaseDateStreaming")

# Get rid of rows that have no audience score and tomato meter score
cleaned_reviews_df = reviews_in_range_df.dropna(subset=['audienceScore', 'tomatoMeter'])

cleaned_reviews_df

Unnamed: 0_level_0,id,title,audienceScore,tomatoMeter,rating,ratingContents,releaseDateTheaters,releaseDateStreaming,runtimeMinutes,genre,originalLanguage,director,writer,boxOffice,distributor,soundMix
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
30251,short_time,Short Time,54.0,29.0,,,,2018-01-01,97.0,"Comedy, Drama",English,Gregg Champion,,$3.5M,,Stereo
86042,the_rehearsal,The Rehearsal,44.0,83.0,,,,2018-01-01,102.0,Comedy,English,Alison Maclean,"Alison Maclean,Emily Perkins",,,
84545,without_a_trace,Without a Trace,75.0,86.0,,,,2018-01-01,120.0,"Drama, Lgbtq+",English,Stanley R. Jaffe,,,,
2576,godmonster_of_indian_flats,Godmonster of Indian Flats,31.0,83.0,,,1973-12-31,2018-01-02,89.0,Horror,English,Fredric Hobbs,Fredric Hobbs,,,
128891,future_38_2017,Future '38,71.0,75.0,,,,2018-01-02,75.0,"Romance, Comedy, Sci-fi",English,Jamie Greenberg,Jamie Greenberg,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125048,broadway_rising,Broadway Rising,100.0,78.0,,,2022-12-05,2022-12-27,93.0,"Drama, Documentary",English,Amy Rice,,,Vertical Entertainment,
78369,7_women_and_a_murder,7 Women and a Murder,41.0,50.0,,,,2022-12-28,82.0,"Comedy, Drama",Italian,Alessandro Genovesi,Lisa Nur Sultan,,,
105999,white_noise_2022,White Noise,31.0,63.0,R,['Brief Violence and Language'],2022-11-25,2022-12-30,136.0,"Comedy, Drama, Horror",English,Noah Baumbach,Noah Baumbach,,Netflix,Dolby Digital
116768,armageddon_time,Armageddon Time,48.0,76.0,R,"['Some Drug Use Involving Minors', 'Language']",2022-11-04,2022-12-30,114.0,Drama,English,James Gray,James Gray,$1.8M,Focus Features,Dolby Digital


## Take Data from DataFrame and insert the Data into the Database Table

In [7]:
# import package for regular expressions
import re

for index, row in cleaned_reviews_df.iterrows():
    movie_review_id = index
    movie_name = re.sub(r'[^\w\s]', '',row['title']).upper()
    audience_score = int(row['audienceScore'])
    tomato_meter = int(row['tomatoMeter'])
    print(movie_review_id, ', ', movie_name, ', ', audience_score, ', ', tomato_meter)
    cursor.execute('INSERT INTO movie_reviews(movies_id, movie_name, audience_score, tomato_meter) VALUES(?, ?, ?, ?)', (movie_review_id, movie_name, audience_score, tomato_meter))

connection.commit()

30251 ,  SHORT TIME ,  54 ,  29
86042 ,  THE REHEARSAL ,  44 ,  83
84545 ,  WITHOUT A TRACE ,  75 ,  86
2576 ,  GODMONSTER OF INDIAN FLATS ,  31 ,  83
128891 ,  FUTURE 38 ,  71 ,  75
126088 ,  PSYCHOPATHS ,  14 ,  24
130572 ,  WHAT HAPPENED TO MONDAY ,  67 ,  59
135564 ,  THE LIGHT OF THE MOON ,  93 ,  97
98469 ,  BREATHE ,  72 ,  68
46239 ,  THE HOUSES OCTOBER BUILT 2 ,  43 ,  13
24452 ,  BRADS STATUS ,  55 ,  79
47079 ,  UNA ,  50 ,  76
52417 ,  CHAVELA ,  81 ,  95
19142 ,  SHOCK WAVE ,  57 ,  67
82810 ,  A QUESTION OF FAITH ,  77 ,  40
128076 ,  HAPPY DEATH DAY ,  67 ,  71
17231 ,  SYLVIO ,  70 ,  87
87893 ,  WONDERSTRUCK ,  55 ,  68
129708 ,  FUTURE 38 ,  71 ,  75
133412 ,  SNAKE IN THE EAGLES SHADOW ,  82 ,  80
134063 ,  THE STOLEN ,  25 ,  17
134099 ,  THE SNOWMAN ,  18 ,  6
130843 ,  THE HOUSE NEXT DOOR ,  54 ,  100
7099 ,  REBEL IN THE RYE ,  64 ,  29
11677 ,  THE ADVENTURERS ,  24 ,  14
6943 ,  BOBBI JENE ,  67 ,  61
2660 ,  MAY IT LAST A PORTRAIT OF THE AVETT BROTHERS ,  98 ,

In [8]:
cursor.execute('SELECT * FROM movie_reviews')
review_rows = cursor.fetchall()
for review in review_rows:
    print(review)

(5, 'ADRIFT', 65, 69)
(13, 'LEAP OF FAITH WILLIAM FRIEDKIN ON THE EXORCIST', 86, 93)
(103, 'VIOLET', 56, 85)
(106, 'SMALL TOWN WISCONSIN', 88, 83)
(132, 'THE MAN WHO KILLED DON QUIXOTE', 60, 66)
(146, 'ROBIN HOOD', 40, 15)
(162, 'THINK LIKE A DOG', 52, 70)
(175, 'SEEING ALLRED', 77, 100)
(207, 'ELEPHANTS', 95, 83)
(256, 'HOSPITALITY', 40, 17)
(265, 'THE ADVENT CALENDAR', 63, 81)
(273, 'WEST SIDE STORY', 93, 91)
(288, 'PADDLETON', 86, 89)
(294, 'L FOR LEISURE', 67, 88)
(360, 'HES OUT THERE', 32, 43)
(361, 'THE EDGE OF DEMOCRACY', 68, 97)
(409, 'LUPIN III THE FIRST', 87, 95)
(453, 'THE SONATA', 64, 48)
(468, 'JURASSIC WORLD DOMINION', 77, 29)
(492, 'THE AGRONOMIST', 88, 94)
(504, 'PERFECT BID THE CONTESTANT WHO KNEW TOO MUCH', 57, 100)
(524, 'SKATE KITCHEN', 76, 89)
(528, 'GHOST', 75, 100)
(529, 'CIELO', 82, 80)
(599, 'LOVE SARAH', 61, 61)
(627, 'THIRST', 52, 100)
(630, 'GOODBYE CHRISTOPHER ROBIN', 71, 64)
(653, 'LAMB', 61, 86)
(675, 'DOWNHILL', 14, 36)
(707, 'TIGER 24', 95, 100)
(710, '

In [9]:
connection.close()