In [1]:
import sqlite3
import mysql.connector
import pandas as pd
from mysql.connector import MySQLConnection, Error

# Function to connect to MySQL database
def make_connection():
    try:
        conn = MySQLConnection(
            host='IES-ADS-ClassDB.sjsu.edu',
            database='querycrew_db',
            user='querycrew_user',
            password='Pomegranate_746'
        )
        
        if conn.is_connected():
            print('Connected to the MySQL database!')
            return conn
                
    except Error as e:
        print('Connection failed:', e)
        return None

# Establish database connection
conn = make_connection()
if conn is None:
    print("Database connection failed. Exiting...")
    exit()
cursor = conn.cursor()

Connected to the MySQL database!


In [2]:
# 1. INSERT INTO SELECT with CASE

# Create vehicle table
cursor.execute('''
CREATE TABLE IF NOT EXISTS vehicle (
    vin VARCHAR(17) NOT NULL PRIMARY KEY,
    model_year INT NOT NULL,
    base_msrp DECIMAL(10, 2),
    make VARCHAR(100) NOT NULL,
    model VARCHAR(100) NOT NULL
);
''')
conn.commit()
print("Vehicle table created successfully.")

# Insert sample data
cursor.execute('''
INSERT INTO vehicle (vin, model_year, base_msrp, make, model)
VALUES 
    ('1HGCM82633A123456', 2022, 25000.00, 'Toyota', 'Corolla'),
    ('1HGCM82633A654321', 2021, 35000.00, 'Honda', 'Civic'),
    ('5YJSA1E26HF123456', 2023, 60000.00, 'Tesla', 'Model S'),
    ('2FMDK49C59B123456', 2020, 28000.00, 'Ford', 'Escape'),
    ('3VW2K7AJ5JM123456', 2019, 18000.00, 'Volkswagen', 'Jetta'),
    ('4T1BE46K67U123456', 2021, 45000.00, 'Lexus', 'RX 350')
ON DUPLICATE KEY UPDATE model_year = VALUES(model_year);
''')
conn.commit()

# Drop vehicle_transformed table if exists
cursor.execute("DROP TABLE IF EXISTS vehicle_transformed;")
conn.commit()

# Create vehicle_transformed table
cursor.execute('''
CREATE TABLE vehicle_transformed (
    vin VARCHAR(17) NOT NULL PRIMARY KEY,
    model_year INT NOT NULL,
    base_msrp DECIMAL(10, 2),
    make VARCHAR(100) NOT NULL,
    model VARCHAR(100) NOT NULL,
    price_category VARCHAR(20),
    CONSTRAINT fk_vehicle_vin FOREIGN KEY (vin) REFERENCES vehicle(vin) ON DELETE CASCADE
);
''')
conn.commit()
print("vehicle_transformed table created successfully.")

# INSERT INTO vehicle_transformed using SELECT WITH CASE
cursor.execute('''
INSERT INTO vehicle_transformed (vin, model_year, base_msrp, make, model, price_category)
SELECT 
    vin,
    model_year,
    base_msrp,
    make,
    model,
    CASE 
        WHEN base_msrp < 30000 THEN 'Low'
        WHEN base_msrp BETWEEN 30000 AND 50000 THEN 'Mid'
        ELSE 'High'
    END AS price_category
FROM vehicle;
''')
conn.commit()
print("Data inserted into vehicle_transformed with price categories.")

# Fetch and display vehicle table
cursor.execute("SELECT * FROM vehicle;")
columns_vehicle = [desc[0] for desc in cursor.description]  # Extract column names
data_vehicle = cursor.fetchall()
df_vehicle = pd.DataFrame(data_vehicle, columns=columns_vehicle)

print("\nVehicle Table:")
print(df_vehicle)

# Fetch and display vehicle_transformed table
cursor.execute("SELECT * FROM vehicle_transformed;")
columns_transformed = [desc[0] for desc in cursor.description]  # Extract column names
data_transformed = cursor.fetchall()
df_vehicle_transformed = pd.DataFrame(data_transformed, columns=columns_transformed)

print("\nVehicle_Transformed Table:")
print(df_vehicle_transformed)

Vehicle table created successfully.
vehicle_transformed table created successfully.
Data inserted into vehicle_transformed with price categories.

Vehicle Table:
                 vin  model_year base_msrp        make    model
0  1HGCM82633A123456        2022  25000.00      Toyota  Corolla
1  1HGCM82633A654321        2021  35000.00       Honda    Civic
2  2FMDK49C59B123456        2020  28000.00        Ford   Escape
3  3VW2K7AJ5JM123456        2019  18000.00  Volkswagen    Jetta
4  4T1BE46K67U123456        2021  45000.00       Lexus   RX 350
5  5YJSA1E26HF123456        2023  60000.00       Tesla  Model S

Vehicle_Transformed Table:
                 vin  model_year base_msrp        make    model price_category
0  1HGCM82633A123456        2022  25000.00      Toyota  Corolla            Low
1  1HGCM82633A654321        2021  35000.00       Honda    Civic            Mid
2  2FMDK49C59B123456        2020  28000.00        Ford   Escape            Low
3  3VW2K7AJ5JM123456        2019  18000.00  Vo

In [3]:
# 4. Nested query
nested_query = '''
SELECT vin, model_year, base_msrp, make, model
FROM vehicle
WHERE base_msrp > (
    SELECT AVG(base_msrp) FROM vehicle
);
'''

# Execute the query
cursor.execute(nested_query)
columns = [desc[0] for desc in cursor.description]  # Extract column names
data = cursor.fetchall()

# Convert to DataFrame
df_nested_query = pd.DataFrame(data, columns=columns)

# Display DataFrame
print("\nVehicles with Base MSRP Higher Than Average:")
print(df_nested_query)


Vehicles with Base MSRP Higher Than Average:
                 vin  model_year base_msrp   make    model
0  4T1BE46K67U123456        2021  45000.00  Lexus   RX 350
1  5YJSA1E26HF123456        2023  60000.00  Tesla  Model S


4.
A nested query is used when one query relies on the result of another query. In this case, the query finds all vehicles with a base MSRP higher than the average MSRP of all vehicles in the table which effectively filters out the lower-priced vehicles.

In [4]:
# Close the connection
cursor.close()
conn.close()