In [1]:
from setup_notebook import setup_project_path
setup_project_path()

In [2]:
from src.config import RAW_DATA_DIR, PROCESSED_DATA_DIR

In [3]:
import pandas as pd
import argparse
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta
import numpy as np
import boto3
import gc
import datetime as dt
import io
from io import StringIO
import os
pd.set_option('display.max_columns',100)

In [4]:
def get_training_cut_off_month(df):
    
    last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=2)
    cut_off_date_3_years = last_day_of_prev_month - timedelta(days=365*3)
    
    training_cut_off_date = cut_off_date_3_years - relativedelta(months=6)
    
    training_cut_off_date = str(training_cut_off_date).split('-')
    training_cut_off_month = training_cut_off_date[:2]
    
    training_cut_off_month = '-'.join(training_cut_off_month)
    
    print('Cut off month for training data: ',training_cut_off_month)

    return training_cut_off_month, cut_off_date_3_years


def get_cohort_age_in_months(row):
    
    reg_year = int(row['reg_month'].split('-')[0])
    reg_month = int(row['reg_month'].split('-')[1])
    first_dt_of_mon = date(reg_year, reg_month, 1)
    next_month = first_dt_of_mon.replace(day=28) + timedelta(days=4)
    last_dt_of_mon = next_month - timedelta(days=next_month.day)
    # Last day of previous month for reference
    last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=1)
    # Get the relativedelta between two dates
    delta = relativedelta(last_day_of_prev_month, last_dt_of_mon)
    # get months difference
    res_months = delta.months + (delta.years * 12)
    
    return res_months

def preprocessing_common(df_KPI):

    #Removing account_group duplicates 
    df_KPI.drop_duplicates(subset=['accounts_group'], keep='last', ignore_index=True)
    
    # Get cohort details
    cohort_details = df_KPI["accounts_group"].str.split("_", n = 5, expand = True)
    df_KPI["reg_month"] = cohort_details[0]
    df_KPI["country"] = cohort_details[1]
    df_KPI["product_group"] = cohort_details[2]
    df_KPI["area"] = cohort_details[3]
    df_KPI["primary_product"] = cohort_details[4]
    
    #Get age_in_months of cohorts
    df_KPI['age_in_months'] = df_KPI.apply(lambda row: get_cohort_age_in_months(row), 
                                               axis=1)
    
    df_KPI['avg_planned_repayment_days'] = df_KPI['avg_planned_repayment_days'].fillna(-1)
    df_KPI['avg_planned_repayment_days'] = df_KPI['avg_planned_repayment_days'].astype(int)
    df_KPI.loc[df_KPI['frr_3_years']>1, 'frr_3_years'] = 1
    print('Shape of cohorts having frr_3_years <=0 is {}'.format(df_KPI[df_KPI['frr_3_years']<=0].shape[0]))
    df_KPI = df_KPI.loc[df_KPI['frr_3_years']>0]
    
    # Removing records having null FRR at 30 days
    print('Shape of cohorts having frr_30 null is {}'.format(df_KPI[df_KPI['frr_30'].isna()==True].shape[0]))
    df_KPI = df_KPI.loc[df_KPI['frr_30'].notnull()].copy()
    df_KPI.reset_index(drop=True, inplace=True)

    for limit in [30,60,90,180,270,360,450,540,630,720]:
        df_KPI.loc[df_KPI['frr_'+str(limit)+'']>1, 'frr_'+str(limit)+''] = 1
        df_KPI.loc[(df_KPI['at_risk_rate_'+str(limit)+''].isnull()) &(df_KPI['frr_'+str(limit)+'']==1),'at_risk_rate_'+str(limit)+''] = 0
    
    df_KPI.reset_index(drop=True, inplace=True)
    
    return df_KPI

def preprocessing_backtesting(df_KPI, limit):
    
    #dict_backtesting_age_in_months = { 30 : 1, 60 : 2, 90 : 3, 180 : 6, 270: 9 , 360 : 12, 450: 15, 540: 18, 630: 21, 720: 24}
    backtesting_limits = [30, 60, 90, 180, 270, 360]
    limit_list = [item for item in backtesting_limits if item <= limit]
    
    #df_KPI_backtesting = df_KPI.loc[df_KPI['age_in_months']>=dict_backtesting_age_in_months[limit]]
    #Taking columns according to limit
    
    if limit==360:
        
        return df_KPI
    
    else:
        cols = ['accounts_group', 'count_units', 'upfront_price_usd','avg_planned_repayment_days','frr_3_years', 'total_follow_on_revenue_usd','reg_month','country','product_group','area',
                                                            'primary_product','age_in_months']
        col_temp = []
        for l in limit_list:

            col_list = [col for col in df_KPI.columns if ('_'+str(l) in col)]
            col_temp.extend(col_list)

        cols.extend(col_temp)
        
        df_KPI = df_KPI[cols]

        return df_KPI

def compare_KPIs_accounts(df_KPI, df_accounts):
    
    ratio_kpis_accounts_cohorts = np.round(df_KPI.shape[0]/df_accounts.shape[0],4)
    ratio_kpis_accounts_units = np.round(df_KPI['count_units'].sum()/df_accounts['num_accounts'].sum(),4)
    
    print('Percent of total cohorts present in training data: ',ratio_kpis_accounts_cohorts)
    print('Percent of total accounts present in training data: ',ratio_kpis_accounts_units)
    
    if ratio_kpis_accounts_cohorts >= 0.98 and ratio_kpis_accounts_units >= 0.99:
        return True
    else:
        return False


def split_cohorts_by_age(df_KPI):
    
    # Define Unit Age Days Limits
    list_unit_age_days_limit = [360, 450, 540, 630, 720]
    
    # Reshuffle the data
    df_KPI = df_KPI.sample(frac = 1)
    
    subset_size = {}
    subset_size[360] = int(0.1 * df_KPI.shape[0])
    subset_size[450] = int(0.1 * df_KPI.shape[0])
    subset_size[540] = int(0.1 * df_KPI.shape[0])
    subset_size[630] = int(0.1 * df_KPI.shape[0])
    subset_size[720] = int(0.6 * df_KPI.shape[0])

    # Creating subsets by Cohorts
    df_accounts_comb = pd.DataFrame()

    subset_start =0
    for unit_age_days_limit in list_unit_age_days_limit:

        subset_end = subset_start + subset_size[unit_age_days_limit]
        print('unit age days limit: ',unit_age_days_limit)
        print('subset start: {0}. subset end: {1}'.format(subset_start,subset_end ))
        print('Number of accounts in subset: ',df_KPI.iloc[subset_start:subset_end].shape[0])

        df_subset = df_KPI.iloc[subset_start:subset_end]
        df_subset = df_subset.copy()
        df_subset['cohort_age'] = unit_age_days_limit

        if df_accounts_comb.shape[0] > 0:
            df_accounts_comb = pd.concat([df_accounts_comb, df_subset])
        else:
            df_accounts_comb = df_subset

        subset_start = subset_end
    
    # Removing the insignificant columns based on Cohort age
    for unit_age_days_limit in list_unit_age_days_limit:
        
        unit_age_limits_to_null = [limit for limit in list_unit_age_days_limit if limit > unit_age_days_limit]
        unit_age_limits_to_null = list(map(str, unit_age_limits_to_null))
    
        # Identify the columns to nullify
        cols_to_null = [col for col in df_accounts_comb.columns if any(limit in col for limit in unit_age_limits_to_null)]
    
        # Nullify the identified columns
        df_accounts_comb.loc[df_accounts_comb['cohort_age']==unit_age_days_limit, cols_to_null] = -99 #np.nan
        
    return df_accounts_comb

def create_train_test_validation_split(df_KPI):
    
    # Random shuffling
    df_KPI = df_KPI.sample(frac = 1)
    
    training_cut_off_date, cut_off_date_3_years = get_training_cut_off_month(df_KPI)
    training_cut_off_date = str(training_cut_off_date)
    cut_off_date_3_years = str(cut_off_date_3_years)
    df_train_test = df_KPI.loc[df_KPI['reg_month']<=training_cut_off_date]
    df_oot_validation = df_KPI.loc[(df_KPI['reg_month']>training_cut_off_date) & (df_KPI['reg_month']<=cut_off_date_3_years)]
    
    # Create train set using 85% rows randomly
    df_train = df_train_test.sample(frac = 0.85, random_state=100)

    # Create test set using remaining 15% rows
    df_test = df_train_test.drop(df_train.index)
    
    print('Num of rows in train subset: ',df_train.shape[0])
    print('Num of rows in test subset: ',df_test.shape[0])
    print('Num of rows in validation subset: ',df_oot_validation.shape[0])
    
    print('Last registration month in training: ',list(df_train.sort_values(by='reg_month', ascending=False)['reg_month'])[0])
    print('Last registration month in validation: ',list(df_oot_validation.sort_values(by='reg_month', ascending=False)['reg_month'])[0])
    
    return df_train, df_test, df_oot_validation

## Main flow

In [5]:
df_KPI = pd.read_csv(os.path.join(PROCESSED_DATA_DIR,'KPIs_data_modelling_2025-09-18.csv'))
df_accounts = pd.read_csv(os.path.join(PROCESSED_DATA_DIR,'accounts_data_modelling.csv'))

In [6]:
df_KPI[df_KPI.accounts_group=="2022-09_Kenya_Lanterns_Busia_Sun King Boom EasyBuy"]

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd
80270,2022-09_Kenya_Lanterns_Busia_Sun King Boom Eas...,602,2498.973638,368.0,0.071324,0.131652,0.185374,0.340388,0.481296,0.625501,0.728918,0.790225,0.829753,0.852279,0.9641,0.843,0.7777,0.7019,0.6579,0.6395,0.7289,0.7902,0.8297,0.8522,3.0,9.0,18.0,47.0,79.0,113.0,144.0,170.0,193.0,210.0,0.119601,0.0299,0.019967,0.008333,0.008375,0.00692,0.009685,0.0125,0.016,0.02439,0.009967,0.028239,0.033223,0.081395,0.126246,0.094684,0.147841,0.136213,0.146179,0.083056,0.0,0.0,0.001661,0.003322,0.006645,0.036545,0.310631,0.463455,0.579734,0.651163,0.247508,0.345515,0.360465,0.390365,0.460133,0.398671,0.405316,0.320598,0.292359,0.199336,0.89678,32719.039197,36485.023302


In [7]:
# df_KPI = merge_KPIs_target(df_merged_KPI_target)
print('Shape of the combined dataset: ',df_KPI.shape)

Shape of the combined dataset:  (85712, 77)


In [8]:
df_KPI

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77.0,0.352180,0.626566,0.785424,0.888827,0.924767,0.935544,0.945594,0.963138,0.964892,0.971333,1.0718,0.8275,0.7854,0.8888,0.9247,0.9355,0.9455,0.9631,0.9648,0.9713,1.0,5.0,11.0,25.0,33.0,42.0,43.0,51.0,57.0,69.0,0.181818,0.080000,0.148148,0.125000,0.222222,0.250000,0.333333,0.166667,0.200000,0.333333,0.017544,0.122807,0.175439,0.157895,0.157895,0.105263,0.105263,0.087719,0.087719,0.052632,0.035088,0.122807,0.526316,0.719298,0.842105,0.859649,0.894737,0.894737,0.912281,0.947368,0.263158,0.315789,0.368421,0.228070,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632,0.977148,1386.031419,1418.445741
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77.0,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,0.856984,0.856984,0.892698,0.903810,1.2956,0.9068,0.8073,0.8490,0.8569,0.8569,0.8569,0.8569,0.8926,0.9038,0.0,3.0,8.0,14.0,14.0,14.0,14.0,14.0,102.0,105.0,0.055556,0.062500,0.090909,0.166667,0.166667,0.166667,0.166667,0.166667,0.250000,1.000000,0.000000,0.111111,0.277778,0.333333,0.333333,0.333333,0.333333,0.333333,0.166667,0.055556,0.000000,0.111111,0.388889,0.666667,0.666667,0.666667,0.666667,0.666667,0.777778,0.833333,0.277778,0.388889,0.444444,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556,0.903810,404.843825,447.930234
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77.0,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,0.924428,0.933954,0.938175,0.951517,1.1129,0.8173,0.7630,0.8675,0.8974,0.9175,0.9244,0.9339,0.9381,0.9515,1.0,7.0,13.0,32.0,43.0,55.0,63.0,72.0,80.0,90.0,0.156364,0.113725,0.130435,0.166667,0.196721,0.224490,0.244444,0.263158,0.272727,0.200000,0.045296,0.087108,0.198606,0.205575,0.163763,0.139373,0.132404,0.114983,0.101045,0.059233,0.041812,0.111498,0.439024,0.707317,0.787456,0.829268,0.843206,0.867596,0.878049,0.909408,0.229965,0.341463,0.386760,0.268293,0.188153,0.163763,0.153310,0.128920,0.108014,0.062718,0.958262,6843.905988,7141.998731
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77.0,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,0.876922,0.888125,0.903764,0.909980,1.0391,0.7486,0.7060,0.8124,0.8478,0.8666,0.8769,0.8881,0.9037,0.9099,1.0,5.0,11.0,29.0,44.0,54.0,63.0,80.0,93.0,101.0,0.157407,0.153846,0.176471,0.243902,0.281250,0.346154,0.320000,0.217391,0.200000,0.181818,0.035088,0.263158,0.280702,0.254386,0.219298,0.210526,0.201754,0.192982,0.087719,0.096491,0.052632,0.087719,0.403509,0.640351,0.719298,0.771930,0.780702,0.798246,0.842105,0.850877,0.307018,0.482456,0.438596,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491,0.925030,2624.209728,2836.891482
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77.0,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,0.905249,0.915813,0.925324,0.932185,1.0911,0.8128,0.7591,0.8542,0.8780,0.8923,0.9052,0.9158,0.9253,0.9321,1.0,6.0,12.0,27.0,40.0,52.0,68.0,73.0,88.0,108.0,0.181159,0.141732,0.168831,0.225000,0.147059,0.137931,0.153846,0.130435,0.157895,0.176471,0.035211,0.112676,0.211268,0.197183,0.183099,0.169014,0.133803,0.140845,0.098592,0.098592,0.028169,0.105634,0.457746,0.718310,0.753521,0.788732,0.809859,0.830986,0.852113,0.866197,0.225352,0.359155,0.380282,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676,0.945294,3340.358794,3533.671846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85707,2022-10_Zambia_SHS with TV_Chibombo_Sun King H...,1,78.490057,731.0,0.050101,0.099190,0.138158,0.255567,0.381073,0.478239,0.584008,0.709008,0.805162,0.920547,1.3580,1.2735,1.1621,1.0567,1.0445,0.9803,0.9561,0.9662,0.9398,0.9396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1240.770825,1240.770825
85708,2022-10_Zambia_SHS with TV_Chingola_Sun King H...,1,62.792045,731.0,0.048077,0.086538,0.134615,0.250000,0.375000,0.500000,0.625000,0.711538,0.759615,0.826923,1.2820,1.0931,1.1140,1.0169,1.0112,1.0084,1.0067,0.9540,0.8722,0.8303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.0,114.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,1175.467097,1175.467097
85709,2022-10_Zambia_SHS with TV_Kabwe_Sun King Home...,1,78.490057,731.0,0.076923,0.115385,0.153846,0.279099,0.404858,0.492156,0.579200,0.694585,0.819585,0.944838,2.0850,1.4814,1.2941,1.1540,1.1097,1.0089,0.9482,0.9466,0.9566,0.9644,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25.0,25.0,25.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,1240.770825,1240.770825
85710,2022-10_Zambia_SHS with TV_Kitwe_Sun King Home...,3,235.470171,731.0,0.044990,0.080263,0.134767,0.211741,0.301501,0.407524,0.494113,0.584177,0.673954,0.748178,1.2194,1.0305,1.1336,0.8754,0.8264,0.8354,0.8089,0.7961,0.7866,0.7636,0.0,0.0,1.0,24.0,41.0,63.0,82.0,96.0,121.0,147.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.333333,0.000000,0.333333,0.666667,0.000000,0.333333,0.000000,0.000000,0.841675,2088.651568,2481.541650


In [9]:
df_KPI = preprocessing_common(df_KPI)

Shape of cohorts having frr_3_years <=0 is 177
Shape of cohorts having frr_30 null is 0


In [10]:
df_KPI

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product,age_in_months
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77,0.352180,0.626566,0.785424,0.888827,0.924767,0.935544,0.945594,0.963138,0.964892,0.971333,1.0718,0.8275,0.7854,0.8888,0.9247,0.9355,0.9455,0.9631,0.9648,0.9713,1.0,5.0,11.0,25.0,33.0,42.0,43.0,51.0,57.0,69.0,0.181818,0.080000,0.148148,0.125000,0.222222,0.250000,0.333333,0.166667,0.200000,0.333333,0.017544,0.122807,0.175439,0.157895,0.157895,0.105263,0.105263,0.087719,0.087719,0.052632,0.035088,0.122807,0.526316,0.719298,0.842105,0.859649,0.894737,0.894737,0.912281,0.947368,0.263158,0.315789,0.368421,0.228070,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632,0.977148,1386.031419,1418.445741,2016-01,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy,115
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,0.856984,0.856984,0.892698,0.903810,1.2956,0.9068,0.8073,0.8490,0.8569,0.8569,0.8569,0.8569,0.8926,0.9038,0.0,3.0,8.0,14.0,14.0,14.0,14.0,14.0,102.0,105.0,0.055556,0.062500,0.090909,0.166667,0.166667,0.166667,0.166667,0.166667,0.250000,1.000000,0.000000,0.111111,0.277778,0.333333,0.333333,0.333333,0.333333,0.333333,0.166667,0.055556,0.000000,0.111111,0.388889,0.666667,0.666667,0.666667,0.666667,0.666667,0.777778,0.833333,0.277778,0.388889,0.444444,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556,0.903810,404.843825,447.930234,2016-02,Kenya,Lanterns,Bungoma,Sun King Pro EasyBuy,114
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,0.924428,0.933954,0.938175,0.951517,1.1129,0.8173,0.7630,0.8675,0.8974,0.9175,0.9244,0.9339,0.9381,0.9515,1.0,7.0,13.0,32.0,43.0,55.0,63.0,72.0,80.0,90.0,0.156364,0.113725,0.130435,0.166667,0.196721,0.224490,0.244444,0.263158,0.272727,0.200000,0.045296,0.087108,0.198606,0.205575,0.163763,0.139373,0.132404,0.114983,0.101045,0.059233,0.041812,0.111498,0.439024,0.707317,0.787456,0.829268,0.843206,0.867596,0.878049,0.909408,0.229965,0.341463,0.386760,0.268293,0.188153,0.163763,0.153310,0.128920,0.108014,0.062718,0.958262,6843.905988,7141.998731,2016-02,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy,114
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,0.876922,0.888125,0.903764,0.909980,1.0391,0.7486,0.7060,0.8124,0.8478,0.8666,0.8769,0.8881,0.9037,0.9099,1.0,5.0,11.0,29.0,44.0,54.0,63.0,80.0,93.0,101.0,0.157407,0.153846,0.176471,0.243902,0.281250,0.346154,0.320000,0.217391,0.200000,0.181818,0.035088,0.263158,0.280702,0.254386,0.219298,0.210526,0.201754,0.192982,0.087719,0.096491,0.052632,0.087719,0.403509,0.640351,0.719298,0.771930,0.780702,0.798246,0.842105,0.850877,0.307018,0.482456,0.438596,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491,0.925030,2624.209728,2836.891482,2016-03,Kenya,Lanterns,Bungoma,Sun King Pro EasyBuy,113
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,0.905249,0.915813,0.925324,0.932185,1.0911,0.8128,0.7591,0.8542,0.8780,0.8923,0.9052,0.9158,0.9253,0.9321,1.0,6.0,12.0,27.0,40.0,52.0,68.0,73.0,88.0,108.0,0.181159,0.141732,0.168831,0.225000,0.147059,0.137931,0.153846,0.130435,0.157895,0.176471,0.035211,0.112676,0.211268,0.197183,0.183099,0.169014,0.133803,0.140845,0.098592,0.098592,0.028169,0.105634,0.457746,0.718310,0.753521,0.788732,0.809859,0.830986,0.852113,0.866197,0.225352,0.359155,0.380282,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676,0.945294,3340.358794,3533.671846,2016-03,Kenya,Lanterns,Kakamega,Sun King Pro EasyBuy,113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85529,2022-10_Zambia_SHS with TV_Chibombo_Sun King H...,1,78.490057,731,0.050101,0.099190,0.138158,0.255567,0.381073,0.478239,0.584008,0.709008,0.805162,0.920547,1.3580,1.2735,1.1621,1.0567,1.0445,0.9803,0.9561,0.9662,0.9398,0.9396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1240.770825,1240.770825,2022-10,Zambia,SHS with TV,Chibombo,Sun King Home 600 EasyBuy,34
85530,2022-10_Zambia_SHS with TV_Chingola_Sun King H...,1,62.792045,731,0.048077,0.086538,0.134615,0.250000,0.375000,0.500000,0.625000,0.711538,0.759615,0.826923,1.2820,1.0931,1.1140,1.0169,1.0112,1.0084,1.0067,0.9540,0.8722,0.8303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.0,114.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,1175.467097,1175.467097,2022-10,Zambia,SHS with TV,Chingola,Sun King Home 600 EasyBuy,34
85531,2022-10_Zambia_SHS with TV_Kabwe_Sun King Home...,1,78.490057,731,0.076923,0.115385,0.153846,0.279099,0.404858,0.492156,0.579200,0.694585,0.819585,0.944838,2.0850,1.4814,1.2941,1.1540,1.1097,1.0089,0.9482,0.9466,0.9566,0.9644,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25.0,25.0,25.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,1240.770825,1240.770825,2022-10,Zambia,SHS with TV,Kabwe,Sun King Home 600 EasyBuy,34
85532,2022-10_Zambia_SHS with TV_Kitwe_Sun King Home...,3,235.470171,731,0.044990,0.080263,0.134767,0.211741,0.301501,0.407524,0.494113,0.584177,0.673954,0.748178,1.2194,1.0305,1.1336,0.8754,0.8264,0.8354,0.8089,0.7961,0.7866,0.7636,0.0,0.0,1.0,24.0,41.0,63.0,82.0,96.0,121.0,147.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.333333,0.000000,0.333333,0.666667,0.000000,0.333333,0.000000,0.000000,0.841675,2088.651568,2481.541650,2022-10,Zambia,SHS with TV,Kitwe,Sun King Home 600 EasyBuy,34


In [11]:
# Random shuffling
df_KPI = df_KPI.sample(frac = 1)
training_cut_off_month = str(get_training_cut_off_month(df_KPI))

Cut off month for training data:  2022-02


In [12]:
for btl in [30, 60, 90, 180, 270, 360]:
    
    df = preprocessing_backtesting(df_KPI, btl)
    
    if(compare_KPIs_accounts(df, df_accounts)):
        
        print('Sufficient number of cohorts and units in KPIs data. Continuing with preprocessing.')
        print("Splitting data between train, test and validation...")
        if btl == 360:
            
            df_accounts_comb = split_cohorts_by_age(df_KPI)
            df_train, df_test, df_oot_validation = create_train_test_validation_split(df_accounts_comb)
        
        else:
            
            df_train, df_test, df_oot_validation = create_train_test_validation_split(df)
        
        print("Saving preprocessed data")
        df_train.to_csv(os.path.join(PROCESSED_DATA_DIR,'KPIs_data_modelling_train_'+str(btl)+'_days.csv'), index = False)
        df_test.to_csv(os.path.join(PROCESSED_DATA_DIR,'KPIs_data_modelling_test_'+str(btl)+'_days.csv'), index = False)
        df_oot_validation.to_csv(os.path.join(PROCESSED_DATA_DIR,'KPIs_data_modelling_oot_validation_'+str(btl)+'_days.csv'), index = False)
        
        print("Processing completed")
    
    else: 
        raise Exception("Exception: Either number of cohorts or number of units not sufficient in KPIs data")

Percent of total cohorts present in training data:  1.0162
Percent of total accounts present in training data:  1.0028
Sufficient number of cohorts and units in KPIs data. Continuing with preprocessing.
Splitting data between train, test and validation...
Cut off month for training data:  2022-02
Num of rows in train subset:  50928
Num of rows in test subset:  8987
Num of rows in validation subset:  20151
Last registration month in training:  2022-02
Last registration month in validation:  2022-08
Saving preprocessed data
Processing completed
Percent of total cohorts present in training data:  1.0162
Percent of total accounts present in training data:  1.0028
Sufficient number of cohorts and units in KPIs data. Continuing with preprocessing.
Splitting data between train, test and validation...
Cut off month for training data:  2022-02
Num of rows in train subset:  50928
Num of rows in test subset:  8987
Num of rows in validation subset:  20151
Last registration month in training:  2022

In [13]:
df_train[df_train.disabled_rate_720==-99]

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,frr_450,frr_540,frr_630,frr_720,repayment_speed_30,repayment_speed_60,repayment_speed_90,repayment_speed_180,repayment_speed_270,repayment_speed_360,repayment_speed_450,repayment_speed_540,repayment_speed_630,repayment_speed_720,avg_cum_days_disabled_30,avg_cum_days_disabled_60,avg_cum_days_disabled_90,avg_cum_days_disabled_180,avg_cum_days_disabled_270,avg_cum_days_disabled_360,avg_cum_days_disabled_450,avg_cum_days_disabled_540,avg_cum_days_disabled_630,avg_cum_days_disabled_720,at_risk_rate_30,at_risk_rate_60,at_risk_rate_90,at_risk_rate_180,at_risk_rate_270,at_risk_rate_360,at_risk_rate_450,at_risk_rate_540,at_risk_rate_630,at_risk_rate_720,disabled_gt_two_week_rate_30,disabled_gt_two_week_rate_60,disabled_gt_two_week_rate_90,disabled_gt_two_week_rate_180,disabled_gt_two_week_rate_270,disabled_gt_two_week_rate_360,disabled_gt_two_week_rate_450,disabled_gt_two_week_rate_540,disabled_gt_two_week_rate_630,disabled_gt_two_week_rate_720,unlocked_rate_30,unlocked_rate_60,unlocked_rate_90,unlocked_rate_180,unlocked_rate_270,unlocked_rate_360,unlocked_rate_450,unlocked_rate_540,unlocked_rate_630,unlocked_rate_720,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd,reg_month,country,product_group,area,primary_product,age_in_months,cohort_age
29659,2020-11_Kenya_SHS Entry-Level Upgrade_Nyamira_...,1,4.573711,371,0.065934,0.143171,0.221664,0.385008,0.466641,0.631868,0.816327,1.000000,-99.000000,-99.0,1.0434,0.9832,0.9721,0.8100,0.6458,0.6515,0.8163,1.0000,-99.0000,-99.0,1.0,3.0,5.0,32.0,94.0,123.0,149.0,152.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,-99.000000,-99.0,0.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,0.000000,-99.0,-99.0,1.000000,116.538171,116.538171,2020-11,Kenya,SHS Entry-Level Upgrade,Nyamira,Sun King Home 60 EasyBuy,57,540
57842,2022-02_Kenya_SHS Entry-Level Upgrade_Port Vic...,9,47.498193,371,0.056685,0.113919,0.186264,0.339438,0.484707,0.643565,0.713242,-99.000000,-99.000000,-99.0,0.8971,0.7823,0.8168,0.7141,0.6708,0.6636,0.7132,-99.0000,-99.0000,-99.0,3.0,10.0,19.0,48.0,82.0,118.0,140.0,-99.0,-99.0,-99.0,0.222222,0.111111,0.000000,0.000000,0.000000,0.000000,0.000000,-99.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.222222,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.111111,0.444444,-99.000000,-99.000000,-99.0,0.333333,0.444444,0.444444,0.444444,0.555556,0.444444,0.333333,-99.000000,-99.0,-99.0,0.881068,1269.424022,1440.778716,2022-02,Kenya,SHS Entry-Level Upgrade,Port Victoria,Sun King Home 120 Plus EasyBuy,42,450
28985,2020-10_Myanmar (Burma)_SHS with TV_Minbu_Sun ...,1,54.053265,588,0.052265,0.104530,0.156794,0.362950,0.473287,0.669280,0.773810,-99.000000,-99.000000,-99.0,1.8750,1.3043,1.1842,1.2550,1.0611,1.1103,1.0187,-99.0000,-99.0000,-99.0,,0.0,0.0,0.0,0.0,0.0,0.0,-99.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.000000,-99.000000,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.000000,-99.0,-99.0,1.000000,531.884132,531.884132,2020-10,Myanmar (Burma),SHS with TV,Minbu,Sun King Home 400 Easybuy GSM,58,450
48222,2021-10_Kenya_SHS Entry-Level_Wajir_Sun King H...,41,591.672845,338,0.062003,0.119929,0.169320,0.281453,0.395513,0.528443,0.621283,-99.000000,-99.000000,-99.0,0.8713,0.7313,0.6593,0.5258,0.4860,0.5406,0.6310,-99.0000,-99.0000,-99.0,3.0,13.0,23.0,64.0,111.0,147.0,181.0,-99.0,-99.0,-99.0,0.292683,0.097561,0.073171,0.048780,0.048780,0.027778,0.030303,-99.0,-99.0,-99.0,0.097561,0.048780,0.170732,0.243902,0.195122,0.170732,0.243902,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.121951,0.195122,-99.000000,-99.000000,-99.0,0.390244,0.439024,0.560976,0.634146,0.512195,0.463415,0.487805,-99.000000,-99.0,-99.0,0.836203,5850.671448,6996.711861,2021-10,Kenya,SHS Entry-Level,Wajir,Sun King Home 120 EasyBuy,46,450
17887,2019-11_Tanzania_SHS Entry-Level_Morogoro_Sun ...,197,1788.318548,370,0.064160,0.130659,0.188802,0.353367,0.507255,0.660613,0.771753,0.829342,-99.000000,-99.0,1.0144,0.8965,0.8272,0.7427,0.7013,0.6805,0.7717,0.8293,-99.0000,-99.0,1.0,6.0,11.0,32.0,56.0,83.0,107.0,130.0,-99.0,-99.0,0.086294,0.035714,0.020408,0.005128,0.000000,0.000000,0.000000,0.0,-99.0,-99.0,0.010152,0.020305,0.045685,0.096447,0.081218,0.071066,0.131980,0.131980,-99.0,-99.0,0.000000,0.005076,0.005076,0.005076,0.005076,0.035533,0.314721,0.497462,-99.000000,-99.0,0.142132,0.238579,0.263959,0.304569,0.340102,0.324873,0.335025,0.304569,-99.0,-99.0,0.923171,20356.933505,22051.097256,2019-11,Tanzania,SHS Entry-Level,Morogoro,Sun King Home 60 EasyBuy,69,540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9445,2018-12_Kenya_SHS Entry-Level_Busia_Sun King H...,239,3508.054189,279,0.083525,0.149706,0.215368,0.384636,0.530004,0.630605,0.686458,-99.000000,-99.000000,-99.0,0.9912,0.7710,0.7082,0.6068,0.5500,0.6306,0.6864,-99.0000,-99.0000,-99.0,2.0,8.0,16.0,47.0,80.0,114.0,148.0,-99.0,-99.0,-99.0,0.234310,0.121339,0.054393,0.033898,0.030303,0.027624,0.020134,-99.0,-99.0,-99.0,0.025105,0.079498,0.096234,0.179916,0.230126,0.255230,0.263598,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.012552,0.033473,0.242678,0.376569,-99.000000,-99.000000,-99.0,0.267782,0.380753,0.401674,0.468619,0.523013,0.493724,0.464435,-99.000000,-99.0,-99.0,0.835984,24014.945726,28726.561425,2018-12,Kenya,SHS Entry-Level,Busia,Sun King Home 60 EasyBuy,80,450
54713,2022-01_Kenya_Phones_Eldama Ravine_Infinix HOT...,81,2499.944877,368,0.069245,0.139042,0.206856,0.382084,0.548399,0.715447,-99.000000,-99.000000,-99.000000,-99.0,1.0894,0.9493,0.9018,0.7991,0.7509,0.7331,-99.0000,-99.0000,-99.0000,-99.0,1.0,4.0,8.0,25.0,50.0,73.0,-99.0,-99.0,-99.0,-99.0,0.024691,0.012346,0.012346,0.012500,0.012658,0.013333,-99.000000,-99.0,-99.0,-99.0,0.012346,0.024691,0.037037,0.098765,0.098765,0.111111,-99.000000,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.012346,0.061728,-99.000000,-99.000000,-99.000000,-99.0,0.148148,0.148148,0.172840,0.320988,0.358025,0.271605,-99.000000,-99.000000,-99.0,-99.0,0.914938,13006.275088,14215.471527,2022-01,Kenya,Phones,Eldama Ravine,Infinix HOT 10i,43,360
46934,2021-09_Nigeria_SHS without TV_Ore_Sun King Ho...,24,583.263888,371,0.096154,0.157051,0.235310,0.460203,0.690759,0.886004,0.950855,0.968750,0.982372,-99.0,1.5237,1.0800,1.0333,0.9695,0.9573,0.9148,0.9508,0.9687,0.9823,-99.0,0.0,2.0,5.0,14.0,26.0,38.0,44.0,47.0,52.0,-99.0,0.041667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,-99.0,0.000000,0.000000,0.041667,0.000000,0.000000,0.000000,0.000000,0.041667,0.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.041667,0.333333,0.875000,0.916667,0.916667,-99.0,0.125000,0.041667,0.125000,0.083333,0.083333,0.041667,0.041667,0.041667,0.0,-99.0,1.000000,4549.458312,4549.458312,2021-09,Nigeria,SHS without TV,Ore,Sun King Home 400 EasyBuy,47,630
42721,2021-07_Kenya_SHS with TV_Nyadorera_Sun King H...,5,406.838432,581,0.051318,0.098509,0.130888,0.256408,0.366672,0.463613,0.568697,-99.000000,-99.000000,-99.0,1.2818,1.0678,0.9060,0.8515,0.8010,0.7545,0.7375,-99.0000,-99.0000,-99.0,1.0,4.0,9.0,29.0,47.0,79.0,109.0,-99.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.0,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.200000,-99.000000,-99.0,-99.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-99.000000,-99.000000,-99.0,0.200000,0.000000,0.200000,0.400000,0.600000,0.400000,0.400000,-99.000000,-99.0,-99.0,0.861811,3456.526126,4010.770489,2021-07,Kenya,SHS with TV,Nyadorera,Sun King Home 600 Easybuy GSM,49,450


In [14]:
df_train.reg_month.max()

'2022-02'

In [15]:
df_train.reg_month.min()

'2016-02'