# Importing required libraries


In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# Load the dataset


In [2]:
df = pd.read_csv('../data/cleaned_xente_data.csv')

## Creating Aggregate Features


In [3]:
df['Total_Transaction_Amount'] = df.groupby('CustomerId')['Amount'].transform('sum')
df['Average_Transaction_Amount'] = df.groupby('CustomerId')['Amount'].transform('mean')
df['Transaction_Count'] = df.groupby('CustomerId')['TransactionId'].transform('count')
df['Transaction_Std'] = df.groupby('CustomerId')['Amount'].transform('std')

## Extracting Date-Time Features


In [4]:
df['Transaction_Hour'] = pd.to_datetime(df['TransactionStartTime']).dt.hour
df['Transaction_Day'] = pd.to_datetime(df['TransactionStartTime']).dt.day
df['Transaction_Month'] = pd.to_datetime(df['TransactionStartTime']).dt.month
df['Transaction_Year'] = pd.to_datetime(df['TransactionStartTime']).dt.year

## Encoding Categorical Variables using One-Hot Encoding


In [5]:
categorical_vars = ['CurrencyCode', 'CountryCode', 'ProviderId', 'ProductCategory']
df_encoded = pd.get_dummies(df, columns=categorical_vars)

## Handling Missing Values


In [9]:
numeric_cols = df_encoded.select_dtypes(include=['float64', 'int64']).columns

# Create the SimpleImputer
imputer = SimpleImputer(strategy='mean')
# Apply imputer only to the numeric columns
df_encoded[numeric_cols] = imputer.fit_transform(df_encoded[numeric_cols])

# If you want to check if there are still missing values
print(df_encoded.isnull().sum())

TransactionId                         0
BatchId                               0
AccountId                             0
SubscriptionId                        0
CustomerId                            0
ProductId                             0
ChannelId                             0
Amount                                0
Value                                 0
TransactionStartTime                  0
PricingStrategy                       0
FraudResult                           0
Total_Transaction_Amount              0
Average_Transaction_Amount            0
Transaction_Count                     0
Transaction_Std                       0
Transaction_Hour                      0
Transaction_Day                       0
Transaction_Month                     0
Transaction_Year                      0
CurrencyCode_UGX                      0
CountryCode_256                       0
ProviderId_ProviderId_1               0
ProviderId_ProviderId_2               0
ProviderId_ProviderId_3               0
