In [28]:
# Importing all the libraries I'll need
import time  # Used to measure how long certain tasks take
import mysql.connector  # Connects Python to my MySQL database
import pandas as pd  # Helps me easily handle and analyse my data
from tabulate import tabulate  # Formats my data into clear, readable tables

# Connecting to my MySQL database
conn = mysql.connector.connect(
    host="localhost",  # Database running on my own computer
    user="root",  # Username for my database
    password="Aliza.123",  # My password to access the database
    database="bookstore_db"  # The specific database I'm using
)

# Creating a cursor to interact with the database
cursor = conn.cursor()

# Ensuring the table for authors exists
cursor.execute("""
    CREATE TABLE IF NOT EXISTS authors (
        AuthorID INT AUTO_INCREMENT PRIMARY KEY,  # Unique identifier for each author
        AuthorName VARCHAR(255) UNIQUE  # Ensures no duplicate author names
    )
""")

# Ensuring the table for books exists
cursor.execute("""
    CREATE TABLE IF NOT EXISTS books (
        BookID INT AUTO_INCREMENT PRIMARY KEY,  # Unique identifier for each book
        Title VARCHAR(255) UNIQUE  # Each book title is unique
    )
""")

# Ensuring the bookstore table exists
cursor.execute("""
    CREATE TABLE IF NOT EXISTS bookstore (
        StoreID INT AUTO_INCREMENT PRIMARY KEY,  # Unique identifier for each entry in the bookstore
        BookID INT,  # Links to a specific book
        AuthorID INT,  # Links to an author
        Price DECIMAL(10,2),  # Price of the book
        Rating FLOAT,  # Rating given to the book
        FOREIGN KEY (BookID) REFERENCES books(BookID),  # Linking to books table
        FOREIGN KEY (AuthorID) REFERENCES authors(AuthorID)  # Linking to authors table
    )
""")
conn.commit()  # Saves these changes in the database

print("✅ Database and tables are ready!")

# Loading book data from a CSV file into a DataFrame
# This CSV file contains details like titles, authors, prices, and ratings
df = pd.read_csv("amazon_data_engineering_books.csv")

# Now, I'll insert this data into the MySQL tables
for index, row in df.iterrows():
    # Checking if all necessary data is present
    if pd.notnull(row['Title']) and pd.notnull(row['Author']) and pd.notnull(row['Rating']) and pd.notnull(row['Price']):
        # Inserting the author, ignoring duplicates
        cursor.execute("INSERT IGNORE INTO authors (AuthorName) VALUES (%s)", (row['Author'],))
        conn.commit()

        # Retrieving the AuthorID to link with books
        cursor.execute("SELECT AuthorID FROM authors WHERE AuthorName = %s", (row['Author'],))
        author_id = cursor.fetchone()[0]

        # Inserting the book title, ignoring duplicates
        cursor.execute("INSERT IGNORE INTO books (Title) VALUES (%s)", (row['Title'],))
        conn.commit()

        # Getting the BookID to link with the bookstore entry
        cursor.execute("SELECT BookID FROM books WHERE Title = %s", (row['Title'],))
        book_id = cursor.fetchone()[0]

        # Inserting book details into the bookstore table
        cursor.execute("""
            INSERT IGNORE INTO bookstore (BookID, AuthorID, Price, Rating)
            VALUES (%s, %s, %s, %s)
        """, (book_id, author_id, row['Price'], row['Rating']))
        conn.commit()  # Saves this entry to the database

print("✅ Data successfully inserted into MySQL!")

# Writing an SQL query to find books priced below $50
sql_query = """
    SELECT authors.AuthorName, books.Title, bookstore.Rating, bookstore.Price
    FROM bookstore
    JOIN books ON bookstore.BookID = books.BookID
    JOIN authors ON bookstore.AuthorID = authors.AuthorID
    WHERE bookstore.Price < 50
"""

# Measuring how quickly this query executes
start_time = time.time()  # Starting the timer
cursor.execute(sql_query)  # Running the query
sql_results = cursor.fetchall()  # Getting all results
sql_time = time.time() - start_time  # Stopping timer and measuring execution time

# Turning query results into a neat DataFrame
sql_results_df = pd.DataFrame(sql_results, columns=["Author", "Title", "Rating", "Price"])

# Showing how long the query took to run
print(f"📌 MySQL Query Execution Time: {sql_time:.5f} seconds")

# Displaying the query results clearly
print(tabulate(sql_results_df, headers='keys', tablefmt='psql'))

# Closing connections to free up resources
cursor.close()
conn.close()


✅ Database and tables are ready!
✅ Data successfully inserted into MySQL!
📌 MySQL Query Execution Time: 0.00057 seconds
+----+----------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+---------+
|    | Author                                                                           | Title                                                                                                                                                                              |   Rating |   Price |
|----+----------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+---------|
|  0