In [37]:
import pandas as pd
import mysql.connector as mysql

def connect_to_mysql(host, user, password, database):
    """
    Establishes a connection to the MySQL database.
    """
    try:
        connection = mysql.connect(
            host=host,
            user=user,
            password=password,
            database=database
        )
        print("Connection to MySQL established successfully.")
        return connection
    except mysql.Error as e:
        print("Error connecting to MySQL:", e)
        raise

def populate_media_table(csv_file, connection):
    """
    Reads a CSV file and inserts data into the MySQL `media` table.
    """
    # Load CSV into a Pandas DataFrame
    data = pd.read_csv(csv_file)

    # Replace null values in 'title' with 'original_title'
    data['title'] = data['title'].fillna(data['original_title'])

    # Replace NaN values with None
    data = data.where(pd.notnull(data), None)
    data.replace("nan", None, inplace=True)
    
    #Serial counter for the insertion
    serial = 0
    # Prepare the MySQL queries
    insert_media_query = """
    INSERT INTO media (type, genre, genre2, genre3, date_released, studio, name, full_average, total_reviews)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    """

    insert_genre_query = """
    INSERT IGNORE INTO genre (name) VALUES (%s)
    """

    insert_type_query = """
    INSERT IGNORE INTO type (name) VALUES (%s)
    """

    try:
        with connection.cursor(dictionary=True) as cursor:
            for _, row in data.iterrows():
                # Process the genres (assuming the `genres` field is JSON-like in the CSV)
                genres = eval(row['genres']) if row['genres'] != "[]" else []  # Safely handle empty genres
                # Initialize genre IDs and names(maximum 3)
                genre_ids = [None, None, None]
                genre_names = [None, None, None]

                if genres:
                    # Add genres to the genre table and retrieve their IDs
                    for i, genre in enumerate(genres[:3]):  # Limit to 3 genres
                        genre_name = genre['name']

                        # Check if the genre already exists
                        cursor.execute("SELECT id FROM genre WHERE name = %s", (genre_name,))
                        result = cursor.fetchone()

                        if result:
                            # If genre exists, fetch its ID and name
                            genre_ids[i] = result['id']
                            genre_names[i] = genre_name
                        else:
                            # If genre doesn't exist, insert it and fetch the ID
                            cursor.execute(insert_genre_query, (genre_name,))
                            connection.commit()  # Commit to ensure the insertion is completed
                            cursor.execute("SELECT id FROM genre WHERE name = %s", (genre_name,))
                            result = cursor.fetchone()
                            if result:
                                genre_ids[i] = result['id']
                                genre_names[i] = genre_name

                    #Add type to the type table and retrieve its ID
                    if genre_names[0] == "TV Movie":
                        type_name = "Television"
                    elif genre_names[0] == "Documentary":
                        type_name = "Documentary"
                    else:
                        type_name = "Movie"
                     # Check if the type already exists
                    cursor.execute("SELECT id FROM type WHERE name = %s", (type_name,))
                    result = cursor.fetchone()

                    if result:
                        # If type exists, fetch its ID
                        type_id = result['id']
                    else:
                        # If genre doesn't exist, insert it and fetch the ID
                        cursor.execute(insert_type_query, (type_name,))
                        connection.commit()  # Commit to ensure the insertion is completed
                        cursor.execute("SELECT id FROM type WHERE name = %s", (type_name,))
                        result = cursor.fetchone()
                        if result:
                            type_id = result['id']
                    
                # Process the producers (assuming the `producers` field is JSON-like in the CSV)
                studios = eval(row['production_companies']) if row['production_companies'] != "[]" else []  # Safely handle empty studios
                
                if studios:
                    # Obtain first studio name
                    first_studio_name = studios[0]['name']

                # Insert into the media table
                cursor.execute(insert_media_query, (
                    type_id,
                    genre_ids[0],  # Use the genre's ID or None if no genres are available
                    genre_ids[1],
                    genre_ids[2],
                    row['release_date'],
                    first_studio_name,  #Use the first studio's name only
                    row['title'],
                    row['vote_average'],
                    row['vote_count']
                ))
                
                print("Row " + str(serial) + " inserted.")
                serial +=1
        # Commit the transaction
        connection.commit()
        print("Data successfully inserted into MySQL.")
    except Exception as e:
        connection.rollback()
        print("Error inserting data:", e)
        print("Problematic row: " + str(row))
        raise

def main():
    # MySQL connection parameters
    host = "localhost"
    user = "root"
    password = "Mckennasmith080419"
    database = "cp317_schema"

    # Path to your CSV file
    csv_file = r"C:\Users\zachr\OneDrive\Desktop\CP317 - Software Engineering\movies_data.csv"

    # Connect to MySQL and insert data
    connection = connect_to_mysql(host, user, password, database)
    try:
        populate_media_table(csv_file, connection)
    finally:
        connection.close()

if __name__ == "__main__":
    main()


Connection to MySQL established successfully.
Row 0 inserted.
Row 1 inserted.
Row 2 inserted.
Row 3 inserted.
Row 4 inserted.
Row 5 inserted.
Row 6 inserted.
Row 7 inserted.
Row 8 inserted.
Row 9 inserted.
Row 10 inserted.
Row 11 inserted.
Row 12 inserted.
Row 13 inserted.
Row 14 inserted.
Row 15 inserted.
Row 16 inserted.
Row 17 inserted.
Row 18 inserted.
Row 19 inserted.
Row 20 inserted.
Row 21 inserted.
Row 22 inserted.
Row 23 inserted.
Row 24 inserted.
Row 25 inserted.
Row 26 inserted.
Row 27 inserted.
Row 28 inserted.
Row 29 inserted.
Row 30 inserted.
Row 31 inserted.
Row 32 inserted.
Row 33 inserted.
Row 34 inserted.
Row 35 inserted.
Row 36 inserted.
Row 37 inserted.
Row 38 inserted.
Row 39 inserted.
Row 40 inserted.
Row 41 inserted.
Row 42 inserted.
Row 43 inserted.
Row 44 inserted.
Row 45 inserted.
Row 46 inserted.
Row 47 inserted.
Row 48 inserted.
Row 49 inserted.
Row 50 inserted.
Row 51 inserted.
Row 52 inserted.
Row 53 inserted.
Row 54 inserted.
Row 55 inserted.
Row 56 inser