In [1]:
import pandas as pd
import argparse
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta
import numpy as np
import boto3
import gc
import datetime as dt
import io
from io import StringIO
import os
import dask
from dask import delayed
pd.set_option('display.max_columns',100)

In [2]:
curr_dir = os.path.abspath(os.getcwd())
data_dir = os.path.join(curr_dir,'Data')
os.makedirs(data_dir, exist_ok=True)

In [3]:
def get_training_cut_off_month(df):
    
    last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=2)
    cut_off_date_3_years = last_day_of_prev_month - timedelta(days=365*3)
    
    training_cut_off_date = cut_off_date_3_years - relativedelta(months=6)
    
    training_cut_off_date = str(training_cut_off_date).split('-')
    training_cut_off_month = training_cut_off_date[:2]
    
    training_cut_off_month = '-'.join(training_cut_off_month)
    
    print('Cut off month for training data: ',training_cut_off_month)

    return training_cut_off_month, cut_off_date_3_years


def preprocessing_common(df_KPI):

    #Removing account_group duplicates 
    df_KPI.drop_duplicates(subset=['accounts_group'], keep='last', ignore_index=True)
    
    # Get cohort details
    cohort_details = df_KPI["accounts_group"].str.split("_", n = 5, expand = True)
    df_KPI["reg_month"] = cohort_details[0]
    df_KPI["country"] = cohort_details[1]
    df_KPI["product_group"] = cohort_details[2]
    df_KPI["area"] = cohort_details[3]
    df_KPI["primary_product"] = cohort_details[4]
    
    
    df_KPI['avg_planned_repayment_days'] = df_KPI['avg_planned_repayment_days'].fillna(-1)
    df_KPI['avg_planned_repayment_days'] = df_KPI['avg_planned_repayment_days'].astype(int)
    df_KPI.loc[df_KPI['frr_3_years']>1, 'frr_3_years'] = 1
    print('Shape of cohorts having frr_3_years <=0 is {}'.format(df_KPI[df_KPI['frr_3_years']<=0].shape[0]))
    df_KPI = df_KPI.loc[df_KPI['frr_3_years']>0]
    
    # Removing records having null FRR at 30 days
    print('Shape of cohorts having frr_30 null is {}'.format(df_KPI[df_KPI['frr_30'].isna()==True].shape[0]))
    df_KPI = df_KPI.loc[df_KPI['frr_30'].notnull()].copy()
    df_KPI.reset_index(drop=True, inplace=True)

    for limit in [30,60,90,180,270,360,450,540,630,720]:
        df_KPI.loc[df_KPI['frr_'+str(limit)+'']>1, 'frr_'+str(limit)+''] = 1
        df_KPI.loc[(df_KPI['at_risk_rate_'+str(limit)+''].isnull()) &(df_KPI['frr_'+str(limit)+'']==1),'at_risk_rate_'+str(limit)+''] = 0
    
    df_KPI.reset_index(drop=True, inplace=True)
    
    return df_KPI


def backtesting_split(data, backtesting):

    df_backtesting = data.copy()
    df_backtesting['backtesting'] = backtesting

    for col_prefix in ['frr', 'repayment_speed', 'avg_cum_days_disabled',
                  'at_risk_rate', 'disabled_gt_two_week_rate', 'unlocked_rate', 'disabled_rate']:
    
        df_backtesting[col_prefix] = np.nan

        df_backtesting.loc[df_backtesting['backtesting']==30, col_prefix] = df_backtesting[col_prefix+'_'+str(30)]
        df_backtesting.loc[df_backtesting['backtesting']==60, col_prefix] = df_backtesting[col_prefix+'_'+str(60)]
        df_backtesting.loc[df_backtesting['backtesting']==90, col_prefix] = df_backtesting[col_prefix+'_'+str(90)]
        df_backtesting.loc[df_backtesting['backtesting']==180, col_prefix] = df_backtesting[col_prefix+'_'+str(180)]
        df_backtesting.loc[df_backtesting['backtesting']==270, col_prefix] = df_backtesting[col_prefix+'_'+str(270)]
        df_backtesting.loc[df_backtesting['backtesting']==360, col_prefix] = df_backtesting[col_prefix+'_'+str(360)]
        df_backtesting.loc[df_backtesting['backtesting']==450, col_prefix] = df_backtesting[col_prefix+'_'+str(450)]
        df_backtesting.loc[df_backtesting['backtesting']==540, col_prefix] = df_backtesting[col_prefix+'_'+str(540)]
        df_backtesting.loc[df_backtesting['backtesting']==630, col_prefix] = df_backtesting[col_prefix+'_'+str(630)]
        df_backtesting.loc[df_backtesting['backtesting']==720, col_prefix] = df_backtesting[col_prefix+'_'+str(720)]


    # Removing the old temporal features
    cols_to_remove = [col for col in df_backtesting.columns if '30' in col or '60' in col or '90' in col or '180' in col or '270' in col or '360' in col or '450' in col or '540' in col or '630' in col or '720' in col]
    df_backtesting.drop(cols_to_remove, axis=1, inplace=True)
    print('Shape of {} is {}'.format(backtesting, df_backtesting.shape))
    print(df_backtesting.head())

    return df_backtesting

def feature_engineering(data):
    
    # Creating column for Unlock price
    data['unlock_price_usd'] = data['upfront_price_usd'] + data['total_follow_on_revenue_usd']
    
    # Calculating averge unlock and upfront price

    data['avg_upfront_price_usd'] = np.round((data['upfront_price_usd']/data['count_units']),0)
    data['avg_unlock_price_usd'] = np.round((data['unlock_price_usd']/data['count_units']),0)
    
    # Removing unnecessary columns
    cols_to_remove = []
    cols_to_remove.append('upfront_price_usd')
    cols_to_remove.append('unlock_price_usd')

    print('columns to remove: ',cols_to_remove)
    
    data.drop(cols_to_remove, axis=1, inplace=True)
    
    print('Sample from data------------')
    print(data.sample(1))


    data = data.reset_index().drop(columns = 'index')
    
    return data
    

def create_train_test_validation_split(df_KPI):
    
    # Random shuffling
    df_KPI = df_KPI.sample(frac = 1)
    
    training_cut_off_date, cut_off_date_3_years = get_training_cut_off_month(df_KPI)
    training_cut_off_date = str(training_cut_off_date)
    cut_off_date_3_years = str(cut_off_date_3_years)
    df_train_test = df_KPI.loc[df_KPI['reg_month']<=training_cut_off_date]
    df_oot_validation = df_KPI.loc[(df_KPI['reg_month']>training_cut_off_date) & (df_KPI['reg_month']<=cut_off_date_3_years)]
    
    # Create train set using 85% rows randomly
    df_train = df_train_test.sample(frac = 0.85, random_state=100)

    # Create test set using remaining 15% rows
    df_test = df_train_test.drop(df_train.index)
    
    print('Num of rows in train subset: ',df_train.shape[0])
    print('Num of rows in test subset: ',df_test.shape[0])
    print('Num of rows in validation subset: ',df_oot_validation.shape[0])
    
    print('Last registration month in training: ',list(df_train.sort_values(by='reg_month', ascending=False)['reg_month'])[0])
    print('Last registration month in validation: ',list(df_oot_validation.sort_values(by='reg_month', ascending=False)['reg_month'])[0])
    
    df_train.reset_index(drop=True, inplace=True)
    df_test.reset_index(drop=True, inplace=True)
    df_oot_validation.reset_index(drop=True, inplace=True)
    
    return df_train, df_test, df_oot_validation

## Main flow

In [4]:
df_KPI = pd.read_csv(os.path.join(data_dir,'KPIs_data_modelling_2025-02-19.csv'))

In [5]:
print('Shape of the combined dataset: ',df_KPI.shape)

Shape of the combined dataset:  (62048, 77)


In [6]:
df_KPI.head()

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77.0,0.35218,0.626566,0.785424,0.888827,0.924767,0.935544,0.945594,0.963138,0.964892,0.971333,1.0718,0.8275,0.7854,0.8888,0.9247,0.9355,0.9455,0.9631,0.9648,0.9713,1.0,5.0,11.0,25.0,33.0,42.0,43.0,51.0,57.0,69.0,0.181818,0.08,0.148148,0.125,0.222222,0.25,0.333333,0.166667,0.2,0.333333,0.017544,0.122807,0.175439,0.157895,0.157895,0.105263,0.105263,0.087719,0.087719,0.052632,0.035088,0.122807,0.526316,0.719298,0.842105,0.859649,0.894737,0.894737,0.912281,0.947368,0.263158,0.315789,0.368421,0.22807,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632,0.977148,1386.031419,1418.445741
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77.0,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,0.856984,0.856984,0.892698,0.90381,1.2956,0.9068,0.8073,0.849,0.8569,0.8569,0.8569,0.8569,0.8926,0.9038,0.0,3.0,8.0,14.0,14.0,14.0,14.0,14.0,102.0,105.0,0.055556,0.0625,0.090909,0.166667,0.166667,0.166667,0.166667,0.166667,0.25,1.0,0.0,0.111111,0.277778,0.333333,0.333333,0.333333,0.333333,0.333333,0.166667,0.055556,0.0,0.111111,0.388889,0.666667,0.666667,0.666667,0.666667,0.666667,0.777778,0.833333,0.277778,0.388889,0.444444,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556,0.90381,404.843825,447.930234
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77.0,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,0.924428,0.933954,0.938175,0.951517,1.1129,0.8173,0.763,0.8675,0.8974,0.9175,0.9244,0.9339,0.9381,0.9515,1.0,7.0,13.0,32.0,43.0,55.0,63.0,72.0,80.0,90.0,0.156364,0.113725,0.130435,0.166667,0.196721,0.22449,0.244444,0.263158,0.272727,0.2,0.045296,0.087108,0.198606,0.205575,0.163763,0.139373,0.132404,0.114983,0.101045,0.059233,0.041812,0.111498,0.439024,0.707317,0.787456,0.829268,0.843206,0.867596,0.878049,0.909408,0.229965,0.341463,0.38676,0.268293,0.188153,0.163763,0.15331,0.12892,0.108014,0.062718,0.958262,6843.905988,7141.998731
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77.0,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,0.876922,0.888125,0.903764,0.90998,1.0391,0.7486,0.706,0.8124,0.8478,0.8666,0.8769,0.8881,0.9037,0.9099,1.0,5.0,11.0,29.0,44.0,54.0,63.0,80.0,93.0,101.0,0.157407,0.153846,0.176471,0.243902,0.28125,0.346154,0.32,0.217391,0.2,0.181818,0.035088,0.263158,0.280702,0.254386,0.219298,0.210526,0.201754,0.192982,0.087719,0.096491,0.052632,0.087719,0.403509,0.640351,0.719298,0.77193,0.780702,0.798246,0.842105,0.850877,0.307018,0.482456,0.438596,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491,0.92503,2624.209728,2836.891482
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77.0,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,0.905249,0.915813,0.925324,0.932185,1.0911,0.8128,0.7591,0.8542,0.878,0.8923,0.9052,0.9158,0.9253,0.9321,1.0,6.0,12.0,27.0,40.0,52.0,68.0,73.0,88.0,108.0,0.181159,0.141732,0.168831,0.225,0.147059,0.137931,0.153846,0.130435,0.157895,0.176471,0.035211,0.112676,0.211268,0.197183,0.183099,0.169014,0.133803,0.140845,0.098592,0.098592,0.028169,0.105634,0.457746,0.71831,0.753521,0.788732,0.809859,0.830986,0.852113,0.866197,0.225352,0.359155,0.380282,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676,0.945294,3340.358794,3533.671846


In [7]:
df_KPI = preprocessing_common(df_KPI)

Shape of cohorts having frr_3_years <=0 is 154
Shape of cohorts having frr_30 null is 0


In [8]:
df_KPI.head()

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77,0.35218,0.626566,0.785424,0.888827,0.924767,0.935544,0.945594,0.963138,0.964892,0.971333,1.0718,0.8275,0.7854,0.8888,0.9247,0.9355,0.9455,0.9631,0.9648,0.9713,1.0,5.0,11.0,25.0,33.0,42.0,43.0,51.0,57.0,69.0,0.181818,0.08,0.148148,0.125,0.222222,0.25,0.333333,0.166667,0.2,0.333333,0.017544,0.122807,0.175439,0.157895,0.157895,0.105263,0.105263,0.087719,0.087719,0.052632,0.035088,0.122807,0.526316,0.719298,0.842105,0.859649,0.894737,0.894737,0.912281,0.947368,0.263158,0.315789,0.368421,0.22807,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632,0.977148,1386.031419,1418.445741,2016-01,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,0.856984,0.856984,0.892698,0.90381,1.2956,0.9068,0.8073,0.849,0.8569,0.8569,0.8569,0.8569,0.8926,0.9038,0.0,3.0,8.0,14.0,14.0,14.0,14.0,14.0,102.0,105.0,0.055556,0.0625,0.090909,0.166667,0.166667,0.166667,0.166667,0.166667,0.25,1.0,0.0,0.111111,0.277778,0.333333,0.333333,0.333333,0.333333,0.333333,0.166667,0.055556,0.0,0.111111,0.388889,0.666667,0.666667,0.666667,0.666667,0.666667,0.777778,0.833333,0.277778,0.388889,0.444444,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556,0.90381,404.843825,447.930234,2016-02,Kenya,Lanterns,Bungoma,Sun King Pro EasyBuy
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,0.924428,0.933954,0.938175,0.951517,1.1129,0.8173,0.763,0.8675,0.8974,0.9175,0.9244,0.9339,0.9381,0.9515,1.0,7.0,13.0,32.0,43.0,55.0,63.0,72.0,80.0,90.0,0.156364,0.113725,0.130435,0.166667,0.196721,0.22449,0.244444,0.263158,0.272727,0.2,0.045296,0.087108,0.198606,0.205575,0.163763,0.139373,0.132404,0.114983,0.101045,0.059233,0.041812,0.111498,0.439024,0.707317,0.787456,0.829268,0.843206,0.867596,0.878049,0.909408,0.229965,0.341463,0.38676,0.268293,0.188153,0.163763,0.15331,0.12892,0.108014,0.062718,0.958262,6843.905988,7141.998731,2016-02,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,0.876922,0.888125,0.903764,0.90998,1.0391,0.7486,0.706,0.8124,0.8478,0.8666,0.8769,0.8881,0.9037,0.9099,1.0,5.0,11.0,29.0,44.0,54.0,63.0,80.0,93.0,101.0,0.157407,0.153846,0.176471,0.243902,0.28125,0.346154,0.32,0.217391,0.2,0.181818,0.035088,0.263158,0.280702,0.254386,0.219298,0.210526,0.201754,0.192982,0.087719,0.096491,0.052632,0.087719,0.403509,0.640351,0.719298,0.77193,0.780702,0.798246,0.842105,0.850877,0.307018,0.482456,0.438596,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491,0.92503,2624.209728,2836.891482,2016-03,Kenya,Lanterns,Bungoma,Sun King Pro EasyBuy
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,0.905249,0.915813,0.925324,0.932185,1.0911,0.8128,0.7591,0.8542,0.878,0.8923,0.9052,0.9158,0.9253,0.9321,1.0,6.0,12.0,27.0,40.0,52.0,68.0,73.0,88.0,108.0,0.181159,0.141732,0.168831,0.225,0.147059,0.137931,0.153846,0.130435,0.157895,0.176471,0.035211,0.112676,0.211268,0.197183,0.183099,0.169014,0.133803,0.140845,0.098592,0.098592,0.028169,0.105634,0.457746,0.71831,0.753521,0.788732,0.809859,0.830986,0.852113,0.866197,0.225352,0.359155,0.380282,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676,0.945294,3340.358794,3533.671846,2016-03,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy


In [9]:
df_features = df_KPI.copy()
# Backtesting-wise spliting features data
inputs = [30, 60, 90, 180, 270, 360, 450, 540, 630, 720]
tasks = [delayed(backtesting_split)(df_features, i) for i in inputs]
results = dask.compute(*tasks) # Compute the tasks in parallel

df_features = pd.concat(results, ignore_index=True) # Concatenate all DataFrames

Shape of 630 is (61894, 20)
                                      accounts_group  count_units  \
0  2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...           57   
1  2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...           18   
2  2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...          287   
3  2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...          114   
4  2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...          142   

   upfront_price_usd  avg_planned_repayment_days  frr_3_years    actual_fr  \
0         403.243821                          77     0.977148  1386.031419   
1         127.340154                          77     0.903810   404.843825   
2        2030.368011                          77     0.958262  6843.905988   
3         806.487642                          77     0.925030  2624.209728   
4        1004.572326                          77     0.945294  3340.358794   

   total_follow_on_revenue_usd reg_month country product_group      area  \
0           

In [10]:
df_features.columns

Index(['accounts_group', 'count_units', 'upfront_price_usd',
       'avg_planned_repayment_days', 'frr_3_years', 'actual_fr',
       'total_follow_on_revenue_usd', 'reg_month', 'country', 'product_group',
       'area', 'primary_product', 'backtesting', 'frr', 'repayment_speed',
       'avg_cum_days_disabled', 'at_risk_rate', 'disabled_gt_two_week_rate',
       'unlocked_rate', 'disabled_rate'],
      dtype='object')

In [11]:
df_features.shape

(618940, 20)

In [12]:
df_KPIs_final = feature_engineering(df_features)
df_train, df_test, df_oot_validation = create_train_test_validation_split(df_KPIs_final)

columns to remove:  ['upfront_price_usd', 'unlock_price_usd']
Sample from data------------
                                           accounts_group  count_units  \
431139  2022-02_Tanzania_SHS Entry-Level_Mbeya_Sun Kin...           18   

        avg_planned_repayment_days  frr_3_years    actual_fr  \
431139                         371     0.975945  3593.017042   

        total_follow_on_revenue_usd reg_month   country    product_group  \
431139                  3681.577386   2022-02  Tanzania  SHS Entry-Level   

         area                 primary_product  backtesting       frr  \
431139  Mbeya  Sun King Home 120 Plus EasyBuy          450  0.777616   

        repayment_speed  avg_cum_days_disabled  at_risk_rate  \
431139           0.7776                  137.0           0.0   

        disabled_gt_two_week_rate  unlocked_rate  disabled_rate  \
431139                   0.166667       0.388889       0.333333   

        avg_upfront_price_usd  avg_unlock_price_usd  
431139         

In [13]:
df_oot_validation['reg_month'].unique()

array(['2021-09', '2021-10', '2021-12', '2022-01', '2021-11', '2021-08'],
      dtype=object)

In [14]:
df_oot_validation

Unnamed: 0,accounts_group,count_units,avg_planned_repayment_days,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product,backtesting,frr,repayment_speed,avg_cum_days_disabled,at_risk_rate,disabled_gt_two_week_rate,unlocked_rate,disabled_rate,avg_upfront_price_usd,avg_unlock_price_usd
0,2021-09_Myanmar (Burma)_SHS with TV_Pantanaw_S...,1,560,0.459620,149.996647,326.349260,2021-09,Myanmar (Burma),SHS with TV,Pantanaw,Sun King Home 400 Easybuy GSM,30,0.054945,1.8750,0.0,0.000000,0.000000,0.000000,0.000000,28.0,355.0
1,2021-10_Kenya_Cash Loans_Narok_Cash Loans,1,147,1.000000,214.661797,214.661797,2021-10,Kenya,Cash Loans,Narok,Cash Loans,450,1.000000,1.0000,13.0,0.000000,0.000000,1.000000,0.000000,0.0,215.0
2,2021-12_Kenya_SHS with TV_Eldama Ravine_Sun Ki...,12,639,0.879487,6538.895988,7434.897830,2021-12,Kenya,SHS with TV,Eldama Ravine,Sun King Home 400 EasyBuy,90,0.101806,0.7445,22.0,0.000000,0.000000,0.000000,0.583333,54.0,674.0
3,2021-09_Uganda_Lanterns_Jinja_Sun King Boom Ea...,9,259,1.000000,458.920698,458.835813,2021-09,Uganda,Lanterns,Jinja,Sun King Boom EasyBuy,60,0.166667,0.7927,8.0,0.111111,0.000000,0.000000,0.222222,6.0,57.0
4,2021-09_Nigeria_SHS Entry-Level_Ogoja_Sun King...,68,259,1.000000,8334.838594,8329.008288,2021-09,Nigeria,SHS Entry-Level,Ogoja,Sun King Home 120 EasyBuy,720,0.997461,0.9974,62.0,0.000000,0.014706,0.985294,0.014706,15.0,137.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138465,2021-11_Nigeria_SHS Entry-Level_Benin_Sun King...,135,259,0.991115,16414.360870,16561.509885,2021-11,Nigeria,SHS Entry-Level,Benin,Sun King Home 120 EasyBuy,90,0.351381,1.0668,6.0,0.000000,0.014815,0.029630,0.155556,15.0,137.0
138466,2021-10_Zambia_SHS Entry-Level_Ndola_Sun King ...,105,371,0.840353,18836.746546,22415.278515,2021-10,Zambia,SHS Entry-Level,Ndola,Sun King Home 60 EasyBuy,60,0.130511,0.8963,5.0,0.038095,0.066667,0.000000,0.247619,18.0,231.0
138467,2022-01_Kenya_SHS with TV_Hindi_Sun King Home ...,1,634,1.000000,555.543313,555.543313,2022-01,Kenya,SHS with TV,Hindi,Sun King Home 600 EasyBuy,180,0.209524,0.7500,45.0,0.000000,0.000000,0.000000,0.000000,48.0,604.0
138468,2021-12_Kenya_SHS Entry-Level Upgrade_Kuria_Su...,17,371,0.898836,1723.241830,1917.192714,2021-12,Kenya,SHS Entry-Level Upgrade,Kuria,Sun King Home 60 EasyBuy,30,0.062102,0.9828,2.0,0.117647,0.000000,0.000000,0.176471,4.0,117.0


In [15]:
df_train.to_csv(os.path.join(data_dir,'KPIs_data_modelling_train_days.csv'), index = False)
df_test.to_csv(os.path.join(data_dir,'KPIs_data_modelling_test_days.csv'), index = False)
df_oot_validation.to_csv(os.path.join(data_dir,'KPIs_data_modelling_oot_validation_days.csv'), index = False)

In [16]:
df_features['backtesting'].value_counts()

backtesting
30     61894
60     61894
90     61894
180    61894
270    61894
360    61894
450    61894
540    61894
630    61894
720    61894
Name: count, dtype: int64

In [17]:
df_features['accounts_group'][618939]

'2022-03_Zambia_SHS with TV_Ndola_Sun King Home 400 Easybuy GSM'

In [18]:
df_features[df_features['accounts_group']=='2022-03_Zambia_SHS with TV_Ndola_Sun King Home 400 Easybuy GSM']

Unnamed: 0,accounts_group,count_units,avg_planned_repayment_days,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product,backtesting,frr,repayment_speed,avg_cum_days_disabled,at_risk_rate,disabled_gt_two_week_rate,unlocked_rate,disabled_rate,avg_upfront_price_usd,avg_unlock_price_usd
61893,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,30,0.048295,1.3709,0.0,0.0,0.0,0.0,0.0,67.0,1343.0
123787,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,60,0.086757,1.1434,0.0,0.0,0.0,0.0,0.0,67.0,1343.0
185681,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,90,0.125219,1.0746,0.0,0.0,0.0,0.0,0.0,67.0,1343.0
247575,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,180,0.250219,1.0493,0.0,0.0,0.0,0.0,0.0,67.0,1343.0
309469,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,270,0.375219,1.0411,0.0,0.0,0.0,0.0,0.0,67.0,1343.0
371363,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,360,0.490603,1.0171,1.0,0.0,0.0,0.0,0.0,67.0,1343.0
433257,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,450,0.615603,1.0187,3.0,0.0,0.0,0.0,0.0,67.0,1343.0
495151,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,540,0.644449,0.8873,3.0,0.0,1.0,0.0,1.0,67.0,1343.0
557045,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,630,0.644449,0.7598,3.0,0.0,1.0,0.0,1.0,67.0,1343.0
618939,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,732,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM,720,0.644449,0.6643,3.0,0.0,1.0,0.0,1.0,67.0,1343.0


In [19]:
df_KPI[df_KPI['accounts_group']=='2022-03_Zambia_SHS with TV_Ndola_Sun King Home 400 Easybuy GSM']

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product
61893,2022-03_Zambia_SHS with TV_Ndola_Sun King Home...,1,66.924022,732,0.048295,0.086757,0.125219,0.250219,0.375219,0.490603,0.615603,0.644449,0.644449,0.644449,1.3709,1.1434,1.0746,1.0493,1.0411,1.0171,1.0187,0.8873,0.7598,0.6643,0.0,0.0,0.0,0.0,0.0,1.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.644449,822.328549,1276.018038,2022-03,Zambia,SHS with TV,Ndola,Sun King Home 400 Easybuy GSM
