In [None]:
import pandas as pd
import sqlite3

Step 1: Extract data from CSV

In [None]:
df = pd.read_csv('retail_sales_dataset.csv')

Step 2: Transform data - Add columns, calculate total amount, clean data if necessary

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month
df['Season'] = df['Month'].apply(lambda x: 'Winter' if x in [12, 1, 2] else ('Spring' if x in [3, 4, 5] else ('Summer' if x in [6, 7, 8] else 'Fall')))

Split data into dimension tables and fact table

In [None]:
dim_customer = df[['Customer ID', 'Gender', 'Age']].drop_duplicates().rename(columns={'Customer ID': 'CustomerID'})
dim_product = df[['Product Category']].drop_duplicates().rename(columns={'Product Category': 'ProductCategory'})
dim_date = df[['Date', 'Month', 'Season']].drop_duplicates()

Assign ProductID and DateID as they will be foreign keys in FactSales

In [None]:
dim_product['ProductID'] = range(1, len(dim_product) + 1)
dim_date['DateID'] = range(1, len(dim_date) + 1)

Merge to create fact table with foreign keys

In [None]:
fact_sales = df.merge(dim_customer, left_on='Customer ID', right_on='CustomerID') \
               .merge(dim_product, left_on='Product Category', right_on='ProductCategory') \
               .merge(dim_date, on='Date') \
               [['DateID', 'CustomerID', 'ProductID', 'Quantity', 'Price per Unit', 'Total Amount']] \
               .rename(columns={'Price per Unit': 'PricePerUnit'})

Step 3: Load data into SQLite Database

In [None]:
conn = sqlite3.connect('data_mart.db')
dim_customer.to_sql('DimCustomer', conn, if_exists='replace', index=False)
dim_product[['ProductID', 'ProductCategory']].to_sql('DimProduct', conn, if_exists='replace', index=False)
dim_date[['DateID', 'Date', 'Month', 'Season']].to_sql('DimDate', conn, if_exists='replace', index=False)
fact_sales.to_sql('FactSales', conn, if_exists='replace', index=False)

In [None]:
print("DimCustomer row count:", conn.execute("SELECT COUNT(*) FROM DimCustomer").fetchone()[0])
print("DimProduct row count:", conn.execute("SELECT COUNT(*) FROM DimProduct").fetchone()[0])

In [None]:
conn.commit()
conn.close()

In [None]:
print("ETL process completed successfully.")