Data Preprocessing Module

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
import os
import numpy as np
import pickle
import random
import tensorflow as tf
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import mse
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense
from google.colab import files
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.optimizers import Adam


# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

uploaded = files.upload()

Saving Updated_GKONE_Synthetic_Dataset.csv to Updated_GKONE_Synthetic_Dataset.csv


In [2]:
def load_and_preprocess_data(file_name):
    # Load the dataset
    gkone_data = pd.read_csv(file_name)
    initial_count = gkone_data.shape[0]

    # Remove duplicate entries
    gkone_data.drop_duplicates(inplace=True)
    final_count = gkone_data.shape[0]
    duplicates_removed = initial_count - final_count
    missing_values = gkone_data.isnull().sum().sum()
    na_values = gkone_data.isna().sum().sum()

    # Trim white spaces from column names
    gkone_data.columns = gkone_data.columns.str.strip()

    # Trim white spaces from string values in each column
    for col in gkone_data.select_dtypes(['object']).columns:
        gkone_data[col] = gkone_data[col].str.strip()

    # Print results
    print(f"Number of duplicates removed: {duplicates_removed}")
    print(f"Total missing values: {missing_values}")
    print(f"Total NA values: {na_values}")

    return gkone_data

In [3]:
# Step 1: Classify Age Groups by Percentile

def classify_age_groups(data):
    percentiles = np.percentile(data['Age'], [25, 50, 75])
    def age_group(age):
        if age <= percentiles[0]:
            return '0-25th Percentile'
        elif age <= percentiles[1]:
            return '25th-50th Percentile'
        elif age <= percentiles[2]:
            return '50th-75th Percentile'
        else:
            return '75th-100th Percentile'
    data['AgeGroup'] = data['Age'].apply(age_group)
    return data

In [4]:
# Step 2: One-Hot Encoding and Normalization

def one_hot_encode_and_normalize(data):
    # One-hot encode age groups
    age_group_dummies = pd.get_dummies(data['AgeGroup'], prefix='AgeGroup').astype(int)
    data = pd.concat([data, age_group_dummies], axis=1)
    data.drop(columns=['AgeGroup', 'Age'], inplace=True)

    # One-hot encode product types
    product_types = data['ProductType'].apply(lambda x: x.strip('[]').replace("'", "").split(', '))
    product_type_dummies = product_types.str.join('|').str.get_dummies()
    data = pd.concat([data, product_type_dummies], axis=1)
    data.drop(columns=['ProductType'], inplace=True)

    # One-hot encode IncomeLevel column
    income_level_encoded = pd.get_dummies(data['IncomeLevel'], prefix='IncomeLevel').astype(int)
    data = pd.concat([data, income_level_encoded], axis=1)
    data.drop(columns=['IncomeLevel'], inplace=True)

    # Scale specified columns
    columns_to_scale = [
        'RemittancesFreq_Monthly', 'BillPaymentsFreq_Monthly', 'MarketPlaceFreq_Monthly',
        'PeerToPeerFreq_Monthly', 'CustomerTenure', 'ActivityLevel'
    ]
    scaler = MinMaxScaler()
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])

    # One-hot encode categorical columns and convert to int
    categorical_cols = ['Gender', 'EmploymentStatus', 'EducationLevel']
    for col in categorical_cols:
        if col in data.columns:
            dummies = pd.get_dummies(data[col], prefix=col)
            dummies = dummies.astype(int)  # Convert to int
            data = pd.concat([data, dummies], axis=1)
            data.drop(columns=[col], inplace=True)

    # Standardize numerical columns
    numerical_cols = ['Remittances_MonthlyTransValue', 'BillPayments_MonthlyTransValue', 'PeerToPeer_MonthlyTransValue', 'LinkedBankAccountMonthlyValue']
    scaler = StandardScaler()
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

    # Convert boolean to 0 and 1 for encoded columns
    encoded_cols = [col for col in data.columns if data[col].dtype == 'bool']
    data[encoded_cols] = data[encoded_cols].astype(int)

    return data

In [5]:
# Step 3: Location Classification

top_parishes = {
    'Remittances': ['Kingston', 'St. Andrew', 'St. Catherine'],
    'MarketPlace': ['Kingston', 'St. Andrew', 'St. Catherine'],
    'MotorInsurance': ['Kingston', 'St. Andrew', 'St. Catherine'],
    'PeerToPeer Sending': ['Kingston', 'St. Andrew', 'St. Catherine'],
    'BillPayments': ['St. James', 'St. Thomas', 'Westmoreland']
}

def count_categories_per_location(data, category):
    # Filter the data for the specified category
    category_data = data[data[category] == 1]

    # Count the occurrences per location
    location_counts = category_data['Location'].value_counts().sort_index()

    return location_counts

# Function to classify locations based on predefined top parishes and ranking
def classify_location(data, top_parishes):
    def classify_parish(parish, category, counts):
        if parish in top_parishes[category]:
            return 'Top'
        sorted_counts = counts.sort_values(ascending=False)
        mid_cutoff = int(len(sorted_counts) * 0.5)
        if parish in sorted_counts.index[:mid_cutoff]:
            return 'Mid'
        return 'Low'

    categories = list(top_parishes.keys())
    location_category_counts = {}
    for category in categories:
        counts = count_categories_per_location(data, category)
        classified_counts = counts.index.to_series().apply(lambda parish: classify_parish(parish, category, counts))
        location_category_counts[category] = classified_counts

    parish_classification_mapping = pd.DataFrame(index=data['Location'].unique())
    for category in categories:
        parish_classification_mapping[category] = parish_classification_mapping.index.map(location_category_counts[category].to_dict())

    def determine_overall_classification(row):
        if 'Top' in row.values:
            return 'Top'
        if 'Mid' in row.values:
            return 'Mid'
        return 'Low'

    parish_classification_mapping['Overall_Classification'] = parish_classification_mapping.apply(determine_overall_classification, axis=1)
    data['Overall_Classification'] = data['Location'].map(parish_classification_mapping['Overall_Classification'])

    overall_classification_encoded = pd.get_dummies(data['Overall_Classification'], prefix='Usage').astype(int)
    data = pd.concat([data, overall_classification_encoded], axis=1)
    data.drop(columns=['Overall_Classification', 'Location'], inplace=True)

    # Rename the columns
    data.rename(columns={
        'Usage_Top': 'High_Usage_Location',
        'Usage_Mid': 'Moderate_Usage_Location',
        'Usage_Low': 'Low_Usage_Location'
    }, inplace=True)

    return data

In [6]:
# Step 4: Preprocess Data
def preprocess_data(file_name):
    gkone_data = load_and_preprocess_data(file_name)
    gkone_data = classify_age_groups(gkone_data)
    gkone_data = one_hot_encode_and_normalize(gkone_data)
    gkone_data = classify_location(gkone_data, top_parishes)

    # Drop unnecessary columns and convert data types if needed
    if 'CustomerID' in gkone_data.columns:
        gkone_data.drop(columns=['CustomerID'], inplace=True)
    gkone_data = gkone_data.astype('float32')

    # Save preprocessed data
    save_preprocessed_data(gkone_data, 'gkone_preprocessed_data.pkl')

    return gkone_data

In [7]:
def save_preprocessed_data(data, file_name):
    with open(file_name, 'wb') as f:
        pickle.dump(data, f)

def load_preprocessed_data(file_name):
    with open(file_name, 'rb') as f:
        return pickle.load(f)

# Example usage
if __name__ == "__main__":
    file_name = '/content/Updated_GKONE_Synthetic_Dataset.csv'
    preprocessed_data = preprocess_data(file_name)

Number of duplicates removed: 0
Total missing values: 0
Total NA values: 0


Model Building Module

In [8]:
# Setting seeds for reproducibility
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

# Ensuring TensorFlow uses a single thread (for reproducibility)
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

# Define the input shape
input_dim = preprocessed_data.shape[1]  # Number of features
latent_dims = [2, 5, 10, 20]
kl_weights = [0.5, 1.0, 2.0, 4.0]  # Different KL divergence weights to test

best_reconstruction_loss = float('inf')
best_kl_weight = None
best_latent_dim = None
best_vae = None

for latent_dim in latent_dims:
    for kl_weight in kl_weights:
        # Define the VAE model with current latent_dim
        inputs = Input(shape=(input_dim,))
        h = Dense(64, activation='relu')(inputs)
        h = Dense(32, activation='relu')(h)
        z_mean = Dense(latent_dim)(h)
        z_log_var = Dense(latent_dim)(h)

        def sampling(args):
            z_mean, z_log_var = args
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon

        z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

        decoder_h1 = Dense(32, activation='relu')
        decoder_h2 = Dense(64, activation='relu')
        decoder_mean = Dense(input_dim, activation='sigmoid')
        h_decoded = decoder_h1(z)
        h_decoded = decoder_h2(h_decoded)
        x_decoded_mean = decoder_mean(h_decoded)

        vae = Model(inputs, x_decoded_mean)

        reconstruction_loss = mse(inputs, x_decoded_mean)
        reconstruction_loss *= input_dim
        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        vae_loss = K.mean(reconstruction_loss + kl_weight * kl_loss)  # Adjusted KL weight

        vae.add_loss(vae_loss)
        vae.compile(optimizer='adam')

        # Train the VAE
        vae.fit(preprocessed_data, preprocessed_data, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

        # Calculate reconstruction loss
        recon_loss = vae.evaluate(preprocessed_data, preprocessed_data, verbose=0)

        if recon_loss < best_reconstruction_loss:
            best_reconstruction_loss = recon_loss
            best_kl_weight = kl_weight
            best_latent_dim = latent_dim
            best_vae = vae

# Print the best KL weight and latent dimension
print(f'Best KL Weight: {best_kl_weight}, Best Latent Dimension: {best_latent_dim}')

# Train the final VAE with the optimal latent_dim and KL weight
inputs = Input(shape=(input_dim,))
h = Dense(64, activation='relu')(inputs)
h = Dense(32, activation='relu')(h)
z_mean = Dense(best_latent_dim)(h)
z_log_var = Dense(best_latent_dim)(h)

def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(best_latent_dim,))([z_mean, z_log_var])

decoder_h1 = Dense(32, activation='relu')
decoder_h2 = Dense(64, activation='relu')
decoder_mean = Dense(input_dim, activation='sigmoid')
h_decoded = decoder_h1(z)
h_decoded = decoder_h2(h_decoded)
x_decoded_mean = decoder_mean(h_decoded)

vae = Model(inputs, x_decoded_mean)

reconstruction_loss = mse(inputs, x_decoded_mean)
reconstruction_loss *= input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + best_kl_weight * kl_loss)  # Adjusted KL weight

vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

# Train the final VAE
vae.fit(preprocessed_data, preprocessed_data, epochs=50, batch_size=32, validation_split=0.2)

# Encoder model to get the latent space
encoder = Model(inputs, z_mean)

# Getting the latent representations
latent_representations = encoder.predict(preprocessed_data)

# Save the latent representations to a file
np.save('latent_representations.npy', latent_representations)

# Save the VAE model
vae.save('vae_model.h5')

# Save the encoder model
encoder.save('encoder_model.h5')

Best KL Weight: 0.5, Best Latent Dimension: 5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




In [9]:
# Load latent representations from the encoder
latent_representations = encoder.predict(preprocessed_data)

# Apply PCA to the Latent Representations
pca = PCA(n_components=2)
latent_representations_pca = pca.fit_transform(latent_representations)

# Set the number of clusters to 4
optimal_k = 4

# Apply KMeans Clustering to the Latent Representations with the Optimal Number of Clusters
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
cluster_labels = kmeans.fit_predict(latent_representations)

# Adding cluster labels to preprocessed data
preprocessed_data['Cluster'] = cluster_labels

# Save preprocessed data with cluster labels
with open('/content/gkone_preprocessed_data.pkl', 'wb') as f:
    pickle.dump(preprocessed_data, f)

# Train the decision tree classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(latent_representations, cluster_labels)

# Save the models using joblib
joblib.dump(decision_tree, 'decision_tree_model.pkl')
joblib.dump(kmeans, 'kmeans_model.pkl')

# Save the latent representations to a file
np.save('latent_representations.npy', latent_representations)

# Save the VAE model
vae.save('vae_model.h5')

# Save the encoder model
encoder.save('encoder_model.h5')





In [10]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

# Evaluate the Decision Tree Classifier
predicted_labels = decision_tree.predict(latent_representations)
accuracy = accuracy_score(cluster_labels, predicted_labels)
print(f"Accuracy of Decision Tree Classifier on training data: {accuracy:.2f}")

# Cross-Validation to evaluate Decision Tree Classifier
cv_scores = cross_val_score(decision_tree, latent_representations, cluster_labels, cv=5)
mean_accuracy = np.mean(cv_scores)
std_accuracy = np.std(cv_scores)
print(f"Cross-Validation Accuracy: {mean_accuracy:.2f} ± {std_accuracy:.2f}")

Accuracy of Decision Tree Classifier on training data: 1.00
Cross-Validation Accuracy: 0.99 ± 0.00


Prediction Module

In [11]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.models import load_model

# Load the pre-trained models and data
decision_tree = joblib.load('decision_tree_model.pkl')
kmeans = joblib.load('kmeans_model.pkl')
vae = load_model('vae_model.h5')
encoder = load_model('encoder_model.h5')

latent_representations = np.load('latent_representations.npy')

# Load preprocessed data for recommendation
with open('/content/gkone_preprocessed_data.pkl', 'rb') as f:
    preprocessed_data = pickle.load(f)

# Preprocessing functions
def preprocess_input(data):
    # Drop the CustomerID column if present
    if 'CustomerID' in data.columns:
        data = data.drop(columns=['CustomerID'])
    # Trim white spaces from column names
    data.columns = data.columns.str.strip()
    # Trim white spaces from string values in each column
    for col in data.select_dtypes(['object']).columns:
        data[col] = data[col].str.strip()
    data = classify_age_groups(data)
    data = one_hot_encode_and_normalize(data)
    data = classify_location(data, top_parishes)
    data = data.astype('float32')
    return data

# Function to classify new customer data
def classify_new_customer(new_customer_data):
    new_customer_data_preprocessed = preprocess_input(new_customer_data)
    new_customer_latent = encoder.predict(new_customer_data_preprocessed)
    cluster_label = decision_tree.predict(new_customer_latent)
    return cluster_label, new_customer_latent

# Function to recommend products/services based on cosine similarity with confidence scores
def recommend_products_with_confidence(new_customer_vector, data, top_n=5):
    product_columns = ['PeerToPeer Sending', 'MotorInsurance', 'Remittances', 'MarketPlace', 'BillPayments']

    # Load the latent representations and preprocessed data
    latent_representations = np.load('latent_representations.npy')
    with open('/content/gkone_preprocessed_data.pkl', 'rb') as f:
        preprocessed_data = pickle.load(f)

    # Get the cluster label of the new customer
    cluster_label = decision_tree.predict(new_customer_vector)

    # Filter latent vectors to get those in the same cluster
    same_cluster_indices = np.where(preprocessed_data['Cluster'] == cluster_label[0])[0]
    same_cluster_latents = latent_representations[same_cluster_indices]

    # Calculate cosine similarity between the new customer and customers in the same cluster
    similarity_scores = cosine_similarity(new_customer_vector, same_cluster_latents)

    # Get the top 5 most similar latent vectors
    top_similar_indices = np.argsort(similarity_scores[0])[::-1][:top_n]
    top_similar_customers = same_cluster_indices[top_similar_indices]

    # Retrieve the original data rows corresponding to the top similar customers
    top_similar_data = preprocessed_data.iloc[top_similar_customers]

    # Exclude products already used by the new customer
    new_customer_products = new_customer_vector[0][-len(product_columns):]
    new_customer_products_used = [product_columns[i] for i in range(len(product_columns)) if new_customer_products[i] == 1]

    # Calculate the weighted mean usage of each product among the top similar customers
    similar_customers_products = top_similar_data[product_columns]
    similar_customers_scores = similarity_scores[0][top_similar_indices]
    weighted_usage = similar_customers_products.T.dot(similar_customers_scores)
    weighted_usage /= similar_customers_scores.sum()

    # Convert to percentages
    confidence_scores = (weighted_usage / weighted_usage.sum()) * 100

    # Exclude products already used by the new customer
    recommended_products = confidence_scores.drop(new_customer_products_used)

    # Sort recommendations by confidence scores
    recommended_products = recommended_products.sort_values(ascending=False)

    return recommended_products



Main Function (Main.py)

In [15]:
def main():
    # Load the original dataset to validate input
    original_data = pd.read_csv('/content/Updated_GKONE_Synthetic_Dataset.csv')

    valid_product_types = set(original_data['ProductType'].apply(lambda x: x.strip('[]').replace("'", "").split(', ')).explode().unique())
    valid_income_levels = set(original_data['IncomeLevel'].unique())
    valid_locations = set(original_data['Location'].unique())
    valid_genders = set(original_data['Gender'].unique())
    valid_employment_statuses = set(original_data['EmploymentStatus'].unique())
    valid_education_levels = set(original_data['EducationLevel'].unique())

    # Prompt user for input
    print("Please enter the following details for the new customer:")

    age = int(input("Age: "))

    product_types = validate_multiple_inputs("Product Types (e.g., PeerToPeer_Sending, MotorInsurance): ", valid_product_types)

    income_level = validate_input("Income Level (Low/Medium/High): ", valid_income_levels)
    remittances_freq = int(input("Remittances Frequency (Monthly): "))
    bill_payments_freq = int(input("Bill Payments Frequency (Monthly): "))
    marketplace_freq = int(input("MarketPlace Frequency (Monthly): "))
    peer_to_peer_freq = int(input("Peer-to-Peer Frequency (Monthly): "))
    customer_tenure = int(input("Customer Tenure (in years): "))
    activity_level = float(input("Activity Level (Number of Logins/ Month): "))

    location = validate_input("Location: ", valid_locations)
    gender = validate_input("Gender (Male/Female): ", valid_genders)
    employment_status = validate_input("Employment Status (Employed/Unemployed/Student/Retired): ", valid_employment_statuses)
    education_level = validate_input("Education Level (High School/College/Graduate): ", valid_education_levels)
    remittances_value = float(input("Remittances Monthly Transaction Value: "))
    bill_payments_value = float(input("Bill Payments Monthly Transaction Value: "))
    peer_to_peer_value = float(input("Peer-to-Peer Monthly Transaction Value: "))
    linked_bank_account_value = float(input("Linked Bank Account Monthly Value: "))

    # Create a DataFrame with the input data
    new_customer_data = pd.DataFrame({
        'Age': [age],
        'ProductType': [product_types],
        'IncomeLevel': [income_level],
        'RemittancesFreq_Monthly': [remittances_freq],
        'BillPaymentsFreq_Monthly': [bill_payments_freq],
        'MarketPlaceFreq_Monthly': [marketplace_freq],
        'PeerToPeerFreq_Monthly': [peer_to_peer_freq],
        'CustomerTenure': [customer_tenure],
        'ActivityLevel': [activity_level],
        'Location': [location],
        'Gender': [gender],
        'EmploymentStatus': [employment_status],
        'EducationLevel': [education_level],
        'Remittances_MonthlyTransValue': [remittances_value],
        'BillPayments_MonthlyTransValue': [bill_payments_value],
        'PeerToPeer_MonthlyTransValue': [peer_to_peer_value],
        'LinkedBankAccountMonthlyValue': [linked_bank_account_value]
    })

    # Combine the new customer data with the original dataset
    combined_data = pd.concat([original_data, new_customer_data], ignore_index=True)

    # Preprocess the combined data
    preprocessed_combined_data = preprocess_input(combined_data)

    # Extract the new customer data after preprocessing
    new_customer_preprocessed = preprocessed_combined_data.iloc[-1:]

    # Classify new customer and get latent representation
    new_customer_latent = encoder.predict(new_customer_preprocessed)
    cluster_label = decision_tree.predict(new_customer_latent)

    # Manually assign the cluster label to the new customer data
    new_customer_preprocessed['Cluster'] = cluster_label[0]

    # Filter preprocessed data to include only customers from the same cluster
    cluster_data = preprocessed_data[preprocessed_data['Cluster'] == cluster_label[0]]

    # Recommend products/services for the new customer
    recommendations = recommend_products_with_confidence(new_customer_latent, cluster_data)

    print("Recommended Products with Confidence Scores:")
    print(recommendations)

def validate_input(prompt, valid_options):
    while True:
        value = input(prompt).strip()
        if value in valid_options:
            return value
        else:
            print(f"Invalid input. Valid options are: {valid_options}")

def validate_multiple_inputs(prompt, valid_options):
    while True:
        values = input(prompt).strip().split(',')
        values = [value.strip() for value in values]
        if all(value in valid_options for value in values):
            return f"['{', '.join(values)}']"
        else:
            print(f"Invalid input. Valid options are: {valid_options}")

if __name__ == "__main__":
    main()

Please enter the following details for the new customer:
Age: 32
Product Types (e.g., PeerToPeer_Sending, MotorInsurance): Remittances, PeerToPeer Sending
Income Level (Low/Medium/High): High
Remittances Frequency (Monthly): 20
Bill Payments Frequency (Monthly): 0
MarketPlace Frequency (Monthly): 0
Peer-to-Peer Frequency (Monthly): 17
Customer Tenure (in years): 3
Activity Level (Number of Logins/ Month): 15
Location: St. James
Gender (Male/Female): Female
Employment Status (Employed/Unemployed/Student/Retired): Student
Education Level (High School/College/Graduate): College
Remittances Monthly Transaction Value: 29000.72
Bill Payments Monthly Transaction Value: 0
Peer-to-Peer Monthly Transaction Value: 17888.43
Linked Bank Account Monthly Value: 13456.00
Recommended Products with Confidence Scores:
Remittances           50.000000
MarketPlace           29.910469
MotorInsurance        20.089533
PeerToPeer Sending     0.000000
BillPayments           0.000000
dtype: float32
