In [2]:
import mysql.connector
import pandas as pd
from tabulate import tabulate
import time  # For measuring query execution time

# Connect to MySQL
conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="Aliza.123",
    database="bookstore_db"
)

cursor = conn.cursor(buffered=True)

# Ensure the Publication Date column exists in books table
cursor.execute("SHOW COLUMNS FROM books LIKE 'PublicationDate'")
if not cursor.fetchone():
    cursor.execute("ALTER TABLE books ADD COLUMN PublicationDate VARCHAR(50)")
    conn.commit()
    print("✅ Added missing column: PublicationDate")

# Load book data
df = pd.read_csv("amazon_data_engineering_books.csv")

# Trim whitespace in column names (Fixes potential mismatch)
df.columns = df.columns.str.strip()

# Get the correct column name for publication date
publication_date_column = "Publication Date"  # Ensure this matches exactly

# Insert data while preventing duplicates
for index, row in df.iterrows():
    if pd.notnull(row['Title']) and pd.notnull(row['Author']) and pd.notnull(row['Rating']) and pd.notnull(row['Price']) and pd.notnull(row[publication_date_column]):
        # Insert Author if not exists
        cursor.execute("SELECT AuthorID FROM authors WHERE AuthorName = %s", (row['Author'],))
        author = cursor.fetchone()
        if not author:
            cursor.execute("INSERT INTO authors (AuthorName) VALUES (%s)", (row['Author'],))
            conn.commit()
            cursor.execute("SELECT AuthorID FROM authors WHERE AuthorName = %s", (row['Author'],))
            author_id = cursor.fetchone()[0]
        else:
            author_id = author[0]

        # Insert Book with Publication Date, ensuring it is updated if missing
        cursor.execute("SELECT BookID, PublicationDate FROM books WHERE Title = %s", (row['Title'],))
        book = cursor.fetchone()
        
        if not book:
            cursor.execute("""
                INSERT INTO books (Title, PublicationDate)
                VALUES (%s, %s)
                ON DUPLICATE KEY UPDATE PublicationDate = VALUES(PublicationDate);
            """, (row['Title'], row[publication_date_column]))
            conn.commit()
            cursor.execute("SELECT BookID FROM books WHERE Title = %s", (row['Title'],))
            book_id = cursor.fetchone()[0]
        else:
            book_id, existing_pub_date = book
            # Update if publication date is missing
            if existing_pub_date is None:
                cursor.execute("""
                    UPDATE books SET PublicationDate = %s WHERE BookID = %s;
                """, (row[publication_date_column], book_id))
                conn.commit()

        # Prevent duplicate bookstore entries
        cursor.execute("""
            SELECT StoreID FROM bookstore WHERE BookID = %s AND AuthorID = %s;
        """, (book_id, author_id))
        if cursor.fetchone() is None:
            cursor.execute("""
                INSERT INTO bookstore (BookID, AuthorID, Price, Rating)
                VALUES (%s, %s, %s, %s);
            """, (book_id, author_id, row['Price'], row['Rating']))
            conn.commit()

print("✅ Data successfully updated in MySQL!")

# Query books with Publication Date
query = """
    SELECT authors.AuthorName, books.Title, books.PublicationDate, bookstore.Rating, bookstore.Price
    FROM bookstore
    JOIN books ON bookstore.BookID = books.BookID
    JOIN authors ON bookstore.AuthorID = authors.AuthorID
    ORDER BY bookstore.Rating DESC;
"""

# Start the timer before running the query
start_time = time.time()

# Execute the query
cursor.execute(query)
results = cursor.fetchall()

# Calculate execution time
execution_time = time.time() - start_time

# Convert to DataFrame
df_results = pd.DataFrame(results, columns=["Author", "Title", "Publication Date", "Rating", "Price"])

# Display execution time
print(f"📌 MySQL Query Execution Time: {execution_time:.5f} seconds")

# Display updated results
print(tabulate(df_results, headers='keys', tablefmt='psql'))

# Close connections
cursor.close()
conn.close()


✅ Data successfully updated in MySQL!
📌 MySQL Query Execution Time: 0.00074 seconds
+-----+----------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+----------+---------+
|     | Author                                                                           | Title                                                                                                                                                                              | Publication Date   |   Rating |   Price |
|-----+----------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------