# Setting Up The SQLite Server
Below is the code to import the sqlite3 library that is included with the Python standard library package.

In [None]:
import sqlite3

Next is the code to create/connect with the database

In [None]:
connection = sqlite3.connect('movies.db')

Now the next code will create a cursor object so we can execute SQL statements

In [None]:
cursor = connection.cursor()

## Drop Tables
Drop all the tables from the database to start fresh.

In [None]:
cursor.execute('DROP TABLE IF EXISTS movie_reviews')

## Create Table for Rotten Tomato Movie Reviews and call it movie_reviews

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS movie_reviews
               (
                    movies_id INTEGER PRIMARY KEY NOT NULL,
                    movie_name VARCHAR(100) NOT NULL,
                    audience_score INTEGER, 
                    tomato_meter INTEGER
                )''')

## Use pandas to clean up our data
Using pandas we will clean up our data to only have movies released from 2018 to 2022

In [None]:
# import pandas
import pandas as pd

# path to data file
path_to_file = "./datasets/rotten-tomatoes/rotten_tomatoes_movies.csv"

# open csv file
movie_reviews_df = pd.read_csv(path_to_file)

movie_reviews_df.index.name = 'review_id'

# get movies before 2023 and after 2017
movies_afer_2018 = movie_reviews_df["releaseDateStreaming"] >= '2018-01-01'
movies_before_2023 = movie_reviews_df["releaseDateStreaming"] <= '2022-12-31'

reviews_in_range_df = movie_reviews_df[movies_afer_2018 & movies_before_2023]
reviews_in_range_df = reviews_in_range_df.sort_values(by="releaseDateStreaming")

# Get rid of rows that have no audience score and tomato meter score
cleaned_reviews_df = reviews_in_range_df.dropna(subset=['audienceScore', 'tomatoMeter'])

cleaned_reviews_df

## Take Data from DataFrame and insert the Data into the Database Table

In [None]:
# import package for regular expressions
import re

for index, row in cleaned_reviews_df.iterrows():
    movie_review_id = index
    movie_name = re.sub(r'[^\w\s]', '',row['title']).upper()
    audience_score = int(row['audienceScore'])
    tomato_meter = int(row['tomatoMeter'])
    print(movie_review_id, ', ', movie_name, ', ', audience_score, ', ', tomato_meter)
    cursor.execute('INSERT INTO movie_reviews(movies_id, movie_name, audience_score, tomato_meter) VALUES(?, ?, ?, ?)', (movie_review_id, movie_name, audience_score, tomato_meter))

connection.commit()

In [None]:
cursor.execute('SELECT * FROM movie_reviews')
review_rows = cursor.fetchall()
for review in review_rows:
    print(review)

In [None]:
connection.close()