### 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)


### 2. Load Dataset

In [None]:
df = pd.read_csv('../data/raw/raw_retail_data.csv')
print("Dataset shape:", df.shape)
df.head()


### 3. Data Cleaning

In [None]:
df.dropna(subset=['Customer ID', 'Age', 'Gender', 'Total Amount'], inplace=True)
df['Date'] = pd.to_datetime(df['Date'])
df.drop_duplicates(inplace=True)
df.head()


### 4. Feature Engineering: Customer-Level Aggregation

In [None]:
customer_df = df.groupby('Customer ID').agg({
    'Age': 'first',
    'Gender': 'first',
    'Total Amount': ['sum', 'mean'],
    'Transaction ID': 'count'
}).reset_index()

customer_df.columns = ['Customer_ID', 'Age', 'Gender', 'Total_Spent', 'Avg_Spent', 'Num_Transactions']
customer_df['Gender'] = customer_df['Gender'].map({'Male': 0, 'Female': 1})
customer_df.head()


### 5. Exploratory Data Analysis

In [None]:
sns.boxplot(x='Gender', y='Total_Spent', data=customer_df)
plt.title("Total Spending by Gender")
plt.xticks([0, 1], ['Male', 'Female'])
plt.show()


### 6. Feature Scaling

In [None]:
features = ['Age', 'Gender', 'Total_Spent', 'Avg_Spent', 'Num_Transactions']
X = customer_df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
