In [45]:
import pandas as pd
import configparser
from sqlalchemy import create_engine, text

# Load credentials from querycrew.ini
config = configparser.ConfigParser()
config.read("querycrew.ini")

# Extract MySQL details
db_host = config["mysql"]["host"]
db_user = config["mysql"]["user"]
db_password = config["mysql"]["password"]
db_name = config["mysql"]["database"]

# Connect to MySQL
engine = create_engine(f"mysql+mysqlconnector://{db_user}:{db_password}@{db_host}/{db_name}")

# Test connection
try:
    with engine.connect() as conn:
        print("Connected to MySQL successfully!")
except Exception as e:
    print("Connection failed:", e)


Connected to MySQL successfully!


In [46]:
# Load CSV file
csv_file = "/Users/keon/Downloads/Electric_Vehicle_Population_Data.csv"
df = pd.read_csv(csv_file)

# Select and clean Makes data (limit to 100 rows)
make_data = df[["Make"]].drop_duplicates().head(100)
make_data.columns = ["make_name"]

# Select and clean Counties data (limit to 100 rows)
county_data = df[["County"]].drop_duplicates().head(100)
county_data.columns = ["county_name"]

# Extract Make-County Relationships (limit to 100 rows)
make_county_data = df[["Make", "County"]].drop_duplicates().head(100)
make_county_data.columns = ["make_name", "county_name"]

# Display data
print("Makes Data (First 25 Rows):")
print(make_data.head(25))

print("\nCounties Data (First 25 Rows):")
print(county_data.head(25))

print("\nMake-County Relationships (First 25 Rows):")
print(make_county_data.head(25))


Makes Data (First 25 Rows):
         make_name
0             JEEP
1        CHEVROLET
2              BMW
3            TESLA
10          NISSAN
11           LEXUS
12             KIA
17        POLESTAR
19         HYUNDAI
25          TOYOTA
27          RIVIAN
28           VOLVO
30            FIAT
40           MAZDA
55            AUDI
79        CHRYSLER
92            FORD
136  MERCEDES-BENZ
161          HONDA
165     MITSUBISHI
240     VOLKSWAGEN
242         SUBARU
269        PORSCHE
333         JAGUAR
500           MINI

Counties Data (First 25 Rows):
        county_name
0            Kitsap
1         Snohomish
2              King
4            Yakima
6          Thurston
17           Island
95          Stevens
107           Grant
130        Kittitas
148         Spokane
150         Whitman
151          Skagit
156         Douglas
169     Walla Walla
175          Chelan
225      Williamson
253           Clark
260         Cowlitz
264       Jefferson
1061      Klickitat
2262        Clallam
37687 

In [47]:
with engine.connect() as conn:
    # Drop existing tables
    conn.execute(text("DROP TABLE IF EXISTS make_counties;"))
    conn.execute(text("DROP TABLE IF EXISTS makes;"))
    conn.execute(text("DROP TABLE IF EXISTS counties;"))

    # Create Makes Table
    
    conn.execute(text("""
        CREATE TABLE makes (
            make_id INT AUTO_INCREMENT PRIMARY KEY,  -- makes_id is the Primary Key for makes table
            make_name VARCHAR(50) UNIQUE NOT NULL    -- Make names must be unique
        )
    """))

    # Create Counties Table
    conn.execute(text("""
        CREATE TABLE counties (
            county_id INT AUTO_INCREMENT PRIMARY KEY,  -- county_id is the Primary Key for counties table
            county_name VARCHAR(100) UNIQUE NOT NULL   -- Each county has a unique name
        )
    """))

    # Create Make-County Linking Table
    conn.execute(text("""
        CREATE TABLE make_counties (
            make_id INT,    -- Foreign Key referencing the makes table
            county_id INT,  -- Foreign Key referencing the counties table
            PRIMARY KEY (make_id, county_id),  -- Composite Primary Key that ensures unique pairs
            FOREIGN KEY (make_id) REFERENCES makes(make_id) ON DELETE CASCADE,  -- Deletes linked records if make is removed
            FOREIGN KEY (county_id) REFERENCES counties(county_id) ON DELETE CASCADE  -- Ensures valid county refrences
        )
    """))

    print("Tables created successfully!")


Tables created successfully!


In [48]:
# Insert Makes Data
with engine.connect() as conn:
    for _, row in make_data.iterrows():
        conn.execute(text("""
            INSERT IGNORE INTO makes (make_name) 
            VALUES (:make_name)
        """), row.to_dict())

    print("Makes inserted successfully!")

# Insert Counties Data
with engine.connect() as conn:
    for _, row in county_data.iterrows():
        conn.execute(text("""
            INSERT IGNORE INTO counties (county_name) 
            VALUES (:county_name)
        """), row.to_dict())

    print("Counties inserted successfully!")

# Insert Make-County Relationships
with engine.connect() as conn:
    for _, row in make_county_data.iterrows():
        conn.execute(text("""
            INSERT IGNORE INTO make_counties (make_id, county_id)
            SELECT m.make_id, c.county_id
            FROM makes m, counties c
            WHERE m.make_name = :make_name AND c.county_name = :county_name
        """), row.to_dict())

    print("Make-County relationships inserted successfully!")


Makes inserted successfully!
Counties inserted successfully!
Make-County relationships inserted successfully!


In [49]:
# Query and display first 25 rows from each table
with engine.connect() as conn:
    print("\nMakes Table (First 25 Rows):")
    result = conn.execute(text("SELECT * FROM makes LIMIT 25;"))
    for row in result:
        print(row)

    print("\nCounties Table (First 25 Rows):")
    result = conn.execute(text("SELECT * FROM counties LIMIT 25;"))
    for row in result:
        print(row)

    print("\nMake-County Relationships (First 25 Rows):")
    result = conn.execute(text("""
        SELECT m.make_name, c.county_name 
        FROM make_counties mc
        JOIN makes m ON mc.make_id = m.make_id
        JOIN counties c ON mc.county_id = c.county_id
        LIMIT 25;
    """))
    for row in result:
        print(row)



Makes Table (First 25 Rows):
(36, 'ACURA')
(33, 'ALFA ROMEO')
(15, 'AUDI')
(41, 'AZURE DYNAMICS')
(44, 'BENTLEY')
(3, 'BMW')
(40, 'BRIGHTDROP')
(28, 'CADILLAC')
(2, 'CHEVROLET')
(16, 'CHRYSLER')
(35, 'DODGE')
(13, 'FIAT')
(34, 'FISKER')
(17, 'FORD')
(32, 'GENESIS')
(26, 'GMC')
(19, 'HONDA')
(9, 'HYUNDAI')
(24, 'JAGUAR')
(1, 'JEEP')
(7, 'KIA')
(45, 'LAMBORGHINI')
(31, 'LAND ROVER')
(6, 'LEXUS')
(30, 'LINCOLN')

Counties Table (First 25 Rows):
(35, 'Adams')
(63, 'Alameda')
(83, 'Allegheny')
(93, 'Anne Arundel')
(80, 'Arlington')
(34, 'Asotin')
(22, 'Benton')
(71, 'Berkeley')
(85, 'Bexar')
(82, 'Bristol')
(66, 'Brown')
(15, 'Chelan')
(91, 'Churchill')
(58, 'Clackamas')
(21, 'Clallam')
(17, 'Clark')
(67, 'Collin')
(42, 'Columbia')
(18, 'Cowlitz')
(53, 'Cuyahoga')
(100, 'DeKalb')
(97, 'District of Columbia')
(13, 'Douglas')
(98, 'El Paso')
(70, 'Essex')

Make-County Relationships (First 25 Rows):
('AUDI', 'Snohomish')
('AUDI', 'King')
('AUDI', 'Yakima')
('AUDI', 'Thurston')
('AUDI', 'Kitti