# Importing required libraries


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.metrics import roc_auc_score

In [2]:
df = pd.read_csv('../data/cleaned_xente_data.csv')

In [3]:
# Data Preparation: Check the structure of the DataFrame
print("Dataset Shape:", df.shape)
print("Data Types:\n", df.dtypes)

Dataset Shape: (95662, 16)
Data Types:
 TransactionId            object
BatchId                  object
AccountId                object
SubscriptionId           object
CustomerId               object
CurrencyCode             object
CountryCode               int64
ProviderId               object
ProductId                object
ProductCategory          object
ChannelId                object
Amount                  float64
Value                     int64
TransactionStartTime     object
PricingStrategy           int64
FraudResult               int64
dtype: object


# Define RFMS calculation function


In [5]:
def calculate_rfms(df):
    """
    Function to calculate RFMS score for each user.
    RFMS (Recency, Frequency, Monetary, Score) - Higher values indicate better customers.
    """
    recency = df.groupby('CustomerId')['TransactionStartTime'].max().reset_index()
    recency['Recency'] = (pd.to_datetime('today') - pd.to_datetime(recency['TransactionStartTime'])).dt.days
    recency = recency[['CustomerId', 'Recency']]
    
    frequency = df.groupby('CustomerId')['TransactionId'].count().reset_index()
    frequency.columns = ['CustomerId', 'Frequency']
    
    monetary = df.groupby('CustomerId')['Amount'].sum().reset_index()
    monetary.columns = ['CustomerId', 'Monetary']
    
    rfms = recency.merge(frequency, on='CustomerId').merge(monetary, on='CustomerId')
    
    # Calculate RFMS score (you can modify the weights as per your criteria)
    rfms['RFMS'] = (1 / rfms['Recency']) + (rfms['Frequency']) + (rfms['Monetary'] / 1000)  # Adjust Monetary scale
    return rfms