In [1]:
from setup_notebook import setup_project_path
setup_project_path()

In [2]:
from src.config import RAW_DATA_DIR, PROCESSED_DATA_DIR

In [3]:
import subprocess
import sys
import numpy as np
import os
import pandas as pd
import datetime as dt
from datetime import datetime, date, timedelta
# subprocess.check_call([sys.executable,"-m","pip", "install","psycopg2-binary"])
# subprocess.check_call([sys.executable,"-m","pip", "install","pandasql"])
# import psycopg2

## Important: Save Data to S3
Run the queries inside queries folder of this project on Redshift to unload redshift table into S3. 
At the moment, this steps assume that the KPIs and the targets already exist on Redshift.

In [4]:
os.makedirs(RAW_DATA_DIR, exist_ok=True)
os.makedirs(PROCESSED_DATA_DIR, exist_ok=True)
os.environ["AWS_PROFILE"] = 'sunking'

In [5]:
import os
import subprocess
import pandas as pd

def fetch_data_from_s3(filename, bucket_name, prefix=None, data_dir=RAW_DATA_DIR):
    local_path = os.path.join(data_dir, filename)
    if not os.path.exists(local_path):
        if prefix:
            s3_path = f"s3://{bucket_name}/{prefix}/{filename}"
        else:
            s3_path = f"s3://{bucket_name}/{filename}"

        command = ["aws", "s3", "cp", s3_path, local_path]
        subprocess.run(command, check=True)

    df = pd.read_csv(local_path)
    return df


In [6]:
df_KPI_raw = fetch_data_from_s3(filename="cbr_kpis.csv000", bucket_name="temp-2719-570b8c26-44ce-4092-a0a5-52a6a367d660")
df_target = fetch_data_from_s3(filename="cbr_targets.csv000", bucket_name="temp-2719-570b8c26-44ce-4092-a0a5-52a6a367d660")
df_accounts = fetch_data_from_s3(filename="accounts_moddelling_data.csv000", bucket_name="temp-2719-570b8c26-44ce-4092-a0a5-52a6a367d660")
df_accounts.to_csv(os.path.join(RAW_DATA_DIR, "accounts_moddelling_{}.csv".format(dt.date.today())), index = False)

## CBR inputs

In [7]:
df_KPI_raw.columns

Index(['registration_date_month', 'country', 'product_group', 'area',
       'unit_age_days', 'primary_product', 'follow_on_revenue_realization',
       'at_risk_rate', 'average_cumulative_days_disabled',
       'disabled_greater_than_two_week_rate', 'count_units', 'unlocked_rate',
       'disabled_rate', 'repayment_speed_2', 'total_upfront_price_usd',
       'average_planned_repayment_days'],
      dtype='object')

In [8]:
df_KPI_raw

Unnamed: 0,registration_date_month,country,product_group,area,unit_age_days,primary_product,follow_on_revenue_realization,at_risk_rate,average_cumulative_days_disabled,disabled_greater_than_two_week_rate,count_units,unlocked_rate,disabled_rate,repayment_speed_2,total_upfront_price_usd,average_planned_repayment_days
0,2021-09,Tanzania,SHS Entry-Level,Arusha,630,Sun King Home 40Z EasyBuy,1.000916,,57.0,0.000000,4,1.000000,0.000000,1.0009,25.890248,280.000000
1,2022-07,Kenya,Phones,Chebunyo,60,Samsung A03 Core,0.140369,0.062500,7.0,0.062500,48,0.000000,0.208333,0.8894,1216.812768,364.187500
2,2018-02,Nigeria,Lanterns,Kano,360,Sun King Pro EasyBuy,0.849991,0.500000,27.0,0.285714,7,0.714286,0.285714,0.8499,25.410000,189.000000
3,2022-08,Nigeria,SHS with TV,Kano North,630,Sun King Home 600 Mini EasyBuy,1.000000,,5.0,0.000000,1,1.000000,0.000000,1.0000,53.614264,378.000000
4,2022-09,Kenya,SHS Entry-Level Upgrade,Bomet,450,Sun King Home 200X EasyBuy,0.704311,0.000000,168.0,0.045455,22,0.272727,0.318182,0.7043,97.136186,359.136364
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873024,2021-12,Tanzania,SHS Entry-Level,Mkuranga,30,Sun King Home 60 EasyBuy,0.066497,0.019048,1.0,0.000000,105,0.000000,0.114286,1.0523,929.015948,371.000000
873025,2021-04,Tanzania,SHS Entry-Level,Mwanza,60,Sun King Home 40Z EasyBuy,0.183669,0.000000,3.0,0.000000,22,0.000000,0.181818,0.9446,142.307066,279.590909
873026,2022-03,Kenya,SHS Entry-Level,Embu,180,Sun King Home 120 Plus EasyBuy,0.043386,1.000000,32.0,1.000000,1,0.000000,1.000000,0.0789,13.987676,322.000000
873027,2019-05,Nigeria,SHS Entry-Level,Kaduna,180,Sun King Home 120 EasyBuy,0.643040,0.000000,18.0,0.000000,4,0.000000,0.000000,1.0147,130.453726,280.000000


In [9]:
df_KPI_raw['product_group'].value_counts(normalize=True)

product_group
SHS Entry-Level            0.331636
Lanterns                   0.200571
SHS with TV                0.138820
SHS with TV Upgrade        0.094051
SHS Entry-Level Upgrade    0.069467
Phones                     0.055881
SHS without TV             0.053739
SHS with Stove             0.021227
SHS with Fan (w/o TV)      0.013495
Fans                       0.013059
Cash Loans                 0.002108
OOW Replacements           0.002062
SHS without TV Upgrade     0.001272
Water Tanks                0.000871
Water Pumps                0.000836
Inverter Large             0.000516
Water Filters              0.000206
Cook Stoves                0.000115
SHS with Fan and TV        0.000046
E-Bikes                    0.000023
Name: proportion, dtype: float64

In [10]:
def clean_data(df):
    
    #Renaming the columns
    df.rename(columns={'registration_date_month':'registration_month',
                            'average_cumulative_days_disabled':'avg_cum_days_disabled','average_planned_repayment_days':'avg_planned_repayment_days',
                            'disabled_greater_than_two_week_rate':'disabled_gt_two_week_rate','follow_on_revenue_realization':'frr','repayment_speed_2':'repayment_speed',
                            'total_upfront_price_usd':'upfront_price_usd'}, inplace=True)
    
    # Converting unit age days from int to string
    df = df.astype({"unit_age_days": str})

    # Handling missing values in product group
    df['product_group'].fillna('NA', inplace=True)

    # Creating index column for accounts group
    df['accounts_group'] = df['registration_month']+'_'+df['country']+'_'+ df['product_group']+'_'+df['area']+'_'+df['primary_product']
    
    print('Head of the cleaned data')
    print(df.head(1))
    
    return df


def pivot_data(df_accounts):
    
    df_other_features = df_accounts[df_accounts['unit_age_days']=='30'][['accounts_group','count_units',
                                                                                             'upfront_price_usd','avg_planned_repayment_days']].reset_index().drop(columns='index')


    df_pivot = pd.pivot_table(df_accounts,index=['accounts_group'],
                                columns='unit_age_days',
                                values =['frr','at_risk_rate','avg_cum_days_disabled','disabled_gt_two_week_rate','unlocked_rate',
                                        'disabled_rate','repayment_speed'])


    # Formatting column headings
    df_pivot.columns = df_pivot.columns.map('_'.join)

    df_pivot.reset_index(inplace=True)

    df_pivot = pd.merge(df_pivot,df_other_features, on = 'accounts_group',
                        how="inner")
    
    print('Sample of the pivoted data')
    print(df_pivot.sample(5))
    
    print('rearranging the columns')
    # Rearranging the column names
    df_pivot = df_pivot[['accounts_group','count_units','upfront_price_usd','avg_planned_repayment_days','frr_30','frr_60','frr_90','frr_180','frr_270','frr_360','frr_450','frr_540','frr_630','frr_720','repayment_speed_30','repayment_speed_60','repayment_speed_90','repayment_speed_180','repayment_speed_270','repayment_speed_360','repayment_speed_450','repayment_speed_540','repayment_speed_630','repayment_speed_720','avg_cum_days_disabled_30','avg_cum_days_disabled_60','avg_cum_days_disabled_90','avg_cum_days_disabled_180','avg_cum_days_disabled_270','avg_cum_days_disabled_360','avg_cum_days_disabled_450','avg_cum_days_disabled_540','avg_cum_days_disabled_630','avg_cum_days_disabled_720','at_risk_rate_30','at_risk_rate_60','at_risk_rate_90','at_risk_rate_180','at_risk_rate_270','at_risk_rate_360','at_risk_rate_450','at_risk_rate_540','at_risk_rate_630','at_risk_rate_720','disabled_gt_two_week_rate_30', 'disabled_gt_two_week_rate_60','disabled_gt_two_week_rate_90','disabled_gt_two_week_rate_180','disabled_gt_two_week_rate_270','disabled_gt_two_week_rate_360','disabled_gt_two_week_rate_450','disabled_gt_two_week_rate_540','disabled_gt_two_week_rate_630','disabled_gt_two_week_rate_720','unlocked_rate_30','unlocked_rate_60','unlocked_rate_90','unlocked_rate_180','unlocked_rate_270','unlocked_rate_360','unlocked_rate_450','unlocked_rate_540','unlocked_rate_630','unlocked_rate_720','disabled_rate_30','disabled_rate_60','disabled_rate_90','disabled_rate_180','disabled_rate_270','disabled_rate_360','disabled_rate_450','disabled_rate_540','disabled_rate_630','disabled_rate_720']]
    
   
    # Removing the records having no KPI's 
    df_pivot = df_pivot.loc[df_pivot['count_units'].notnull()]
    
    print('Sample of the pivoted data')
    print(df_pivot.sample(5))
    
    return df_pivot

In [11]:
df_KPI = clean_data(df_KPI_raw)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['product_group'].fillna('NA', inplace=True)


Head of the cleaned data
  registration_month   country    product_group    area unit_age_days  \
0            2021-09  Tanzania  SHS Entry-Level  Arusha           630   

             primary_product       frr  at_risk_rate  avg_cum_days_disabled  \
0  Sun King Home 40Z EasyBuy  1.000916           NaN                   57.0   

   disabled_gt_two_week_rate  count_units  unlocked_rate  disabled_rate  \
0                        0.0            4            1.0            0.0   

   repayment_speed  upfront_price_usd  avg_planned_repayment_days  \
0           1.0009          25.890248                       280.0   

                                      accounts_group  
0  2021-09_Tanzania_SHS Entry-Level_Arusha_Sun Ki...  


In [None]:
# df_KPI.to_csv(os.path.join(PROCESSED_DATA_DIR, "KPIs_data_modelling.csv"), index=False)

In [13]:
df_KPI[df_KPI['accounts_group']=='2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i']

Unnamed: 0,registration_month,country,product_group,area,unit_age_days,primary_product,frr,at_risk_rate,avg_cum_days_disabled,disabled_gt_two_week_rate,count_units,unlocked_rate,disabled_rate,repayment_speed,upfront_price_usd,avg_planned_repayment_days,accounts_group
19542,2021-11,Kenya,Phones,Oyugis,720,Infinix HOT 10i,0.01998,,54.0,0.0,1,0.0,0.0,0.0199,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
22674,2021-11,Kenya,Phones,Oyugis,540,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0199,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
46842,2021-11,Kenya,Phones,Oyugis,360,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0206,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
60200,2021-11,Kenya,Phones,Oyugis,30,Infinix HOT 10i,0.004995,1.0,0.0,1.0,1,0.0,1.0,0.079,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
60768,2021-11,Kenya,Phones,Oyugis,450,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0199,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
76628,2021-11,Kenya,Phones,Oyugis,90,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0876,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
105074,2021-11,Kenya,Phones,Oyugis,60,Infinix HOT 10i,0.00999,1.0,1.0,0.0,1,0.0,1.0,0.0686,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
114328,2021-11,Kenya,Phones,Oyugis,180,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.042,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
211628,2021-11,Kenya,Phones,Oyugis,270,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0276,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i
250362,2021-11,Kenya,Phones,Oyugis,630,Infinix HOT 10i,0.01998,1.0,54.0,1.0,1,0.0,1.0,0.0199,31.252311,371.0,2021-11_Kenya_Phones_Oyugis_Infinix HOT 10i


In [14]:
df_pivot = pivot_data(df_KPI)

Sample of the pivoted data
                                          accounts_group  at_risk_rate_180  \
16813  2019-10_Kenya_SHS with TV_Webuye_Sun King Home...               0.0   
37069  2021-04_Kenya_SHS Entry-Level_Kwale_Sun King H...               0.0   
56087  2022-01_Kenya_SHS with TV_Port Victoria_Sun Ki...               0.0   
45367  2021-08_Tanzania_SHS with TV Upgrade_Mlandizi_...               0.0   
7344   2018-08_Tanzania_SHS Entry-Level_Pangani_Sun K...               0.0   

       at_risk_rate_270  at_risk_rate_30  at_risk_rate_360  at_risk_rate_450  \
16813               0.0         0.000000               0.0               0.0   
37069               0.0         0.266667               0.0               0.0   
56087               0.0         0.000000               0.0               0.0   
45367               0.0         0.500000               0.0               0.0   
7344                0.0         1.000000               0.0               0.0   

       at_risk_rate_540

In [15]:
df_pivot.columns

Index(['accounts_group', 'count_units', 'upfront_price_usd',
       'avg_planned_repayment_days', 'frr_30', 'frr_60', 'frr_90', 'frr_180',
       'frr_270', 'frr_360', 'frr_450', 'frr_540', 'frr_630', 'frr_720',
       'repayment_speed_30', 'repayment_speed_60', 'repayment_speed_90',
       'repayment_speed_180', 'repayment_speed_270', 'repayment_speed_360',
       'repayment_speed_450', 'repayment_speed_540', 'repayment_speed_630',
       'repayment_speed_720', 'avg_cum_days_disabled_30',
       'avg_cum_days_disabled_60', 'avg_cum_days_disabled_90',
       'avg_cum_days_disabled_180', 'avg_cum_days_disabled_270',
       'avg_cum_days_disabled_360', 'avg_cum_days_disabled_450',
       'avg_cum_days_disabled_540', 'avg_cum_days_disabled_630',
       'avg_cum_days_disabled_720', 'at_risk_rate_30', 'at_risk_rate_60',
       'at_risk_rate_90', 'at_risk_rate_180', 'at_risk_rate_270',
       'at_risk_rate_360', 'at_risk_rate_450', 'at_risk_rate_540',
       'at_risk_rate_630', 'at_risk_rate

In [16]:
# df_pivot.to_csv(os.path.join(PROCESSED_DATA_DIR,'KPIs_data_predictions.csv'),index = False)

In [17]:
df_pivot

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,...,disabled_rate_30,disabled_rate_60,disabled_rate_90,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77.0,0.352180,0.626566,0.785424,0.888827,0.924767,0.935544,...,0.263158,0.315789,0.368421,0.228070,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77.0,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,...,0.277778,0.388889,0.444444,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77.0,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,...,0.229965,0.341463,0.386760,0.268293,0.188153,0.163763,0.153310,0.128920,0.108014,0.062718
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77.0,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,...,0.307018,0.482456,0.438596,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77.0,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,...,0.225352,0.359155,0.380282,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87291,2022-10_Zambia_SHS with TV_Luanshya_Sun King H...,1,62.792045,731.0,0.038462,0.067415,0.096368,0.144498,0.182959,0.221421,...,0.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
87292,2022-10_Zambia_SHS with TV_Ndola_Sun King Home...,1,62.792045,549.0,0.134615,0.160256,0.166667,0.300641,0.633974,0.691667,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
87293,2022-10_Zambia_SHS with TV_Ndola_Sun King Home...,2,125.584090,731.0,0.030048,0.049012,0.069311,0.116052,0.179621,0.241319,...,0.000000,0.500000,0.500000,0.500000,0.500000,0.000000,0.500000,0.000000,0.500000,0.500000
87294,2022-10_Zambia_SHS without TV_Kapiri Mposhi_Su...,1,31.396022,549.0,0.064103,0.141026,0.192308,0.333333,0.500000,0.653846,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


## Target data

In [18]:
def clean_data_target(df):
    
    print(df.head(1))
    # Renaming the columns
    
    df = df.drop(columns = 'unit_age_days')
    df.columns = ['registration_month','country','product_group','area','primary_product','frr_3_years','total_follow_on_revenue_usd']
    
    print(df.head(1))
    # Fill missing values with NA
    df['product_group'].fillna('NA', inplace=True)

    # Creating index column for accounts group
    df['accounts_group'] = df['registration_month']+'_'+df['country']+'_'+ df['product_group']+'_'+df['area']+'_'+df['primary_product']

    #Calculating actual FR 
    df['actual_fr'] = df['frr_3_years'] * df['total_follow_on_revenue_usd']
    
    # Keeping only required columns
    df = df[['accounts_group','frr_3_years','actual_fr','total_follow_on_revenue_usd']]
    
    print("Shape of the cleaned dataset: {}".format(df.shape))
    
    print('Head of the cleaned data')
    print(df.head(1))
    
    return df

In [19]:
df_target_final = clean_data_target(df_target)

  registration_date_month country    product_group     area  unit_age_days  \
0                 2020-09   Kenya  SHS Entry-Level  Bungoma           1080   

            primary_product  follow_on_revenue_realization  \
0  Sun King Home 60 EasyBuy                        0.82977   

   total_follow_on_revenue_usd  
0                 10588.627719  
  registration_month country    product_group     area  \
0            2020-09   Kenya  SHS Entry-Level  Bungoma   

            primary_product  frr_3_years  total_follow_on_revenue_usd  
0  Sun King Home 60 EasyBuy      0.82977                 10588.627719  
Shape of the cleaned dataset: (87187, 4)
Head of the cleaned data
                                      accounts_group  frr_3_years  \
0  2020-09_Kenya_SHS Entry-Level_Bungoma_Sun King...      0.82977   

     actual_fr  total_follow_on_revenue_usd  
0  8786.125622                 10588.627719  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['product_group'].fillna('NA', inplace=True)


In [20]:
df_target_final

Unnamed: 0,accounts_group,frr_3_years,actual_fr,total_follow_on_revenue_usd
0,2020-09_Kenya_SHS Entry-Level_Bungoma_Sun King...,0.829770,8786.125622,10588.627719
1,2020-09_Kenya_SHS Entry-Level_Kibwezi_Sun King...,0.917080,17622.974693,19216.398453
2,2020-09_Kenya_Lanterns_Kabarnet_Sun King Boom ...,0.897000,47272.269162,52700.411552
3,2020-10_Kenya_SHS with TV_Kakamega_Sun King Ho...,0.690572,88627.366152,128339.066965
4,2020-10_Kenya_SHS Entry-Level_Oyugis_Sun King ...,0.849122,11203.548114,13194.273749
...,...,...,...,...
87182,2021-09_Kenya_SHS with TV_Ganze_Sun King Home ...,0.374603,267.814570,714.929059
87183,2021-09_Kenya_SHS with TV_Port Victoria_Sun Ki...,0.733333,503.309829,686.331897
87184,2021-09_Kenya_SHS with TV Upgrade_Malindi_Sun ...,1.000000,686.331897,686.331897
87185,2021-09_Kenya_SHS with TV Upgrade_Bondo_Sun Ki...,0.346215,257.419931,743.526222


## Merge KPIs and target data

In [21]:
df_final = pd.merge(df_pivot, df_target_final, on='accounts_group', how='inner')

In [22]:
df_final

Unnamed: 0,accounts_group,count_units,upfront_price_usd,avg_planned_repayment_days,frr_30,frr_60,frr_90,frr_180,frr_270,frr_360,...,disabled_rate_180,disabled_rate_270,disabled_rate_360,disabled_rate_450,disabled_rate_540,disabled_rate_630,disabled_rate_720,frr_3_years,actual_fr,total_follow_on_revenue_usd
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,403.243821,77.0,0.352180,0.626566,0.785424,0.888827,0.924767,0.935544,...,0.228070,0.157895,0.122807,0.105263,0.105263,0.087719,0.052632,0.977148,1386.031419,1418.445741
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,127.340154,77.0,0.425714,0.686587,0.807381,0.849048,0.856984,0.856984,...,0.333333,0.333333,0.333333,0.333333,0.333333,0.222222,0.055556,0.903810,404.843825,447.930234
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2030.368011,77.0,0.365668,0.618819,0.763085,0.867551,0.897489,0.917509,...,0.268293,0.188153,0.163763,0.153310,0.128920,0.108014,0.062718,0.958262,6843.905988,7141.998731
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,806.487642,77.0,0.341451,0.566825,0.706045,0.812436,0.847875,0.866647,...,0.307018,0.263158,0.219298,0.210526,0.201754,0.131579,0.096491,0.925030,2624.209728,2836.891482
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,1004.572326,77.0,0.358523,0.615414,0.759101,0.854278,0.878097,0.892322,...,0.260563,0.225352,0.197183,0.169014,0.161972,0.133803,0.112676,0.945294,3340.358794,3533.671846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87173,2022-10_Zambia_SHS with TV_Luanshya_Sun King H...,1,62.792045,731.0,0.038462,0.067415,0.096368,0.144498,0.182959,0.221421,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.463568,544.908931,1175.467097
87174,2022-10_Zambia_SHS with TV_Ndola_Sun King Home...,1,62.792045,549.0,0.134615,0.160256,0.166667,0.300641,0.633974,0.691667,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,979.555914,979.555914
87175,2022-10_Zambia_SHS with TV_Ndola_Sun King Home...,2,125.584090,731.0,0.030048,0.049012,0.069311,0.116052,0.179621,0.241319,...,0.500000,0.500000,0.000000,0.500000,0.000000,0.500000,0.500000,0.873130,1026.335586,1175.467097
87176,2022-10_Zambia_SHS without TV_Kapiri Mposhi_Su...,1,31.396022,549.0,0.064103,0.141026,0.192308,0.333333,0.500000,0.653846,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,538.755753,538.755753


In [23]:
df_final.to_csv(os.path.join(PROCESSED_DATA_DIR, 'KPIs_data_modelling_{}.csv'.format(dt.date.today())), index=False)

In [24]:
# df_final = pd.read_csv(os.path.join(data_dir,'KPIs_data_modelling_2024-03-15.csv'))

In [25]:
df_final.columns

Index(['accounts_group', 'count_units', 'upfront_price_usd',
       'avg_planned_repayment_days', 'frr_30', 'frr_60', 'frr_90', 'frr_180',
       'frr_270', 'frr_360', 'frr_450', 'frr_540', 'frr_630', 'frr_720',
       'repayment_speed_30', 'repayment_speed_60', 'repayment_speed_90',
       'repayment_speed_180', 'repayment_speed_270', 'repayment_speed_360',
       'repayment_speed_450', 'repayment_speed_540', 'repayment_speed_630',
       'repayment_speed_720', 'avg_cum_days_disabled_30',
       'avg_cum_days_disabled_60', 'avg_cum_days_disabled_90',
       'avg_cum_days_disabled_180', 'avg_cum_days_disabled_270',
       'avg_cum_days_disabled_360', 'avg_cum_days_disabled_450',
       'avg_cum_days_disabled_540', 'avg_cum_days_disabled_630',
       'avg_cum_days_disabled_720', 'at_risk_rate_30', 'at_risk_rate_60',
       'at_risk_rate_90', 'at_risk_rate_180', 'at_risk_rate_270',
       'at_risk_rate_360', 'at_risk_rate_450', 'at_risk_rate_540',
       'at_risk_rate_630', 'at_risk_rate