# Creating an Extract, Transform and Load process 

### Imports 

In [32]:
import pandas as pd
import mysql.connector
from dotenv import load_dotenv
import os
load_dotenv()

True

### Data

In [33]:
car_sales = pd.read_csv("Data/Cleaned_Car_Sales_Data.csv")

### Creating Connection to Database

In [5]:
conn = mysql.connector.connect(
    host=os.getenv("DB_HOST"),
    user=os.getenv("DB_USER"),
    password=os.getenv("DB_PASSWORD"),
    database=os.getenv("DB_NAME")
)

cur = conn.cursor()

### Creating Tables
- Fact_Tables_Sales
- Dim_Dealer
- Dim_Region
- Dim_Date
- Dim_Car
- Dim_Customer

### Dim_Customer Table

In [6]:
customer_data = car_sales[['Customer_Name', 'Gender']]

In [37]:
cur.execute("""
CREATE TABLE IF NOT EXISTS Dim_Customer (
            Customer_ID INT AUTO_INCREMENT PRIMARY KEY,
            Customer_Name VARCHAR(255),
            Gender VARCHAR(20)
            );""")

for i, cust_row in customer_data.iterrows():
    cust_sql = """INSERT INTO Dim_Customer (Customer_Name, Gender) VALUES (%s, %s)"""

    cust_values = tuple(cust_row)

    cur.execute(cust_sql, cust_values)

conn.commit()


### Dim_Car Table

In [8]:
car_data = car_sales[['Car_Manufacturer','Car_Model', 'Engine_Type','Transmission', 'Colour','Body_Style']]

In [11]:
car_data = car_data.drop_duplicates().reset_index(drop=True)

In [12]:
cur.execute("""
    CREATE TABLE IF NOT EXISTS Dim_Car (
                Car_ID INT AUTO_INCREMENT PRIMARY KEY,
                Car_Manufacturer VARCHAR(50),
                Car_Model VARCHAR(50),
                Engine_Type VARCHAR(50),
                Transmission VARCHAR(50),
                Colour VARCHAR(50),
                Body_Style VARCHAR(50)
            );
    """)

for i, car_row in car_data.iterrows():
    car_sql = """INSERT INTO Dim_Car (Car_Manufacturer, Car_Model, Engine_Type, Transmission,Colour, Body_Style) VALUES (%s, %s, %s, %s, %s, %s)"""

    car_values = tuple(car_row)

    cur.execute(car_sql, car_values)

conn.commit()

### Dim_Date

In [36]:
car_sales["Sale_Date"] = pd.to_datetime(car_sales["Sale_Date"])

In [39]:
date_data = pd.DataFrame(car_sales["Sale_Date"].drop_duplicates().reset_index(drop=True))

In [40]:
date_data["Year"] = date_data["Sale_Date"].dt.year
date_data["Quarter"] = date_data["Sale_Date"].dt.quarter
date_data["Month"] = date_data["Sale_Date"].dt.month_name()
date_data["Day"] = date_data["Sale_Date"].dt.day_name()

In [45]:
date_data["Sale_Date"] = date_data["Sale_Date"].dt.date

In [49]:
cur.execute("""
    CREATE TABLE IF NOT EXISTS Dim_Date (
                Date_ID INT AUTO_INCREMENT PRIMARY KEY,
                Sale_Date DATE,
                Year VARCHAR(50),
                Quarter INT,
                Month VARCHAR(50),
                Day VARCHAR(50)
            );
    """)

for i, date_row in date_data.iterrows():
    date_sql = """INSERT INTO Dim_Date (Sale_Date, Year, Quarter, Month,Day) VALUES (%s, %s, %s, %s, %s)"""

    date_values = tuple(date_row)

    cur.execute(date_sql, date_values)

conn.commit()

### Dim_Region

In [52]:
region_data = car_sales[['Dealer_Region', 'Dealer_State']].drop_duplicates().reset_index(drop=True)

In [54]:
cur.execute("""
    CREATE TABLE IF NOT EXISTS Dim_Region (
                Region_ID INT AUTO_INCREMENT PRIMARY KEY,
                Dealer_Region VARCHAR(50),
                Dealer_State VARCHAR(50)
            );
    """)

for i, region_row in region_data.iterrows():
    region_sql = """INSERT INTO Dim_Region (Dealer_Region, Dealer_State) VALUES (%s, %s)"""

    region_values = tuple(region_row)

    cur.execute(region_sql, region_values)

conn.commit()

### Dim_Dealer 

In [111]:
dealer_data = car_sales[['Dealer_ID', "Dealer_Name"]].drop_duplicates().reset_index(drop=True)

In [112]:
dealer_data

Unnamed: 0,Dealer_ID,Dealer_Name
0,D1,Buddy Storbeck's Diesel Service Inc
1,D2,C & M Motors Inc
2,D3,Capitol KIA
3,D4,Chrysler of Tri-Cities
4,D5,Chrysler Plymouth
5,D6,Classic Chevy
6,D7,Clay Johnson Auto Sales
7,D8,U-Haul CO
8,D9,Rabun Used Car Sales
9,D10,Race Car Help


In [113]:
cur.execute("""
    CREATE TABLE IF NOT EXISTS Dim_Dealer (
                Dealer_ID VARCHAR(10) PRIMARY KEY,
                Dealer_Name VARCHAR(200)
            );
    """)

for i, dealer_row in dealer_data.iterrows():
    dealer_sql = """INSERT INTO Dim_Dealer (Dealer_ID, Dealer_Name) VALUES (%s, %s)"""

    dealer_values = tuple(dealer_row)

    cur.execute(dealer_sql, dealer_values)

conn.commit()

### Creating Fact Sales Table

In [114]:
car_sales

Unnamed: 0,Car_ID,Sale_Date,Customer_Name,Gender,Annual_Income,Dealer_Name,Car_Manufacturer,Car_Model,Engine_Type,Transmission,Colour,Sale_Price,Dealer_ID,Body_Style,Customer_Phone_Number,Dealer_Region,Dealer_State
0,C_CND_000001,2022-01-02,Geraldine,Male,13500,Buddy Storbeck's Diesel Service Inc,Ford,Expedition,Double Overhead Camshaft,Auto,Black,26000,D1,SUV,8264678,Middletown,Ohio
1,C_CND_000002,2022-01-02,Gia,Female,1480000,C & M Motors Inc,Dodge,Durango,Double Overhead Camshaft,Auto,Black,19000,D2,SUV,6848189,Aurora,Colarado
2,C_CND_000003,2022-01-02,Gianna,Female,1035000,Capitol KIA,Cadillac,Eldorado,Overhead Camshaft,Manual,Red,31500,D3,Passenger,7298798,Greenville,South Carolina
3,C_CND_000004,2022-01-02,Giselle,Male,13500,Chrysler of Tri-Cities,Toyota,Celica,Overhead Camshaft,Manual,Pale White,14000,D4,SUV,6257557,Pasco,Washington
4,C_CND_000005,2022-01-02,Grace,Female,1465000,Chrysler Plymouth,Acura,TL,Double Overhead Camshaft,Auto,Red,24500,D5,Hatchback,7081483,Janesville,Wisconsin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23901,C_CND_023902,2023-12-31,Martin,Male,13500,C & M Motors Inc,Plymouth,Voyager,Overhead Camshaft,Manual,Red,12000,D2,Passenger,8583598,Pasco,Washington
23902,C_CND_023903,2023-12-31,Jimmy,Female,900000,Ryder Truck Rental and Leasing,Chevrolet,Prizm,Double Overhead Camshaft,Auto,Black,16000,D18,Hardtop,7914229,Middletown,Ohio
23903,C_CND_023904,2023-12-31,Emma,Female,705000,Chrysler of Tri-Cities,BMW,328i,Overhead Camshaft,Manual,Red,21000,D4,Sedan,7659127,Scottsdale,Arizona
23904,C_CND_023905,2023-12-31,Victoire,Male,13500,Chrysler Plymouth,Chevrolet,Metro,Double Overhead Camshaft,Auto,Black,31000,D5,Passenger,6030764,Austin,Texas
