## Step Functions

Step 1 KPI Collection Step: s3://sagemaker-us-west-2-983511196003/cbr_cohort3/cbr_input_kpis_data/code/kpis_data_collection_predictions.py

Step 2 Data preprocessing: s3://sagemaker-us-west-2-983511196003/cbr_cohort3/cbr_input_processing/code/data_preprocessing_predictions.py

Step3 a Prediction: s3://sagemaker-us-west-2-983511196003/cbr_cohort3/cbr_input_inference/code/model_inference_predictions.py

Step 3 b KPIs redshift table: s3://sagemaker-us-west-2-983511196003/cbr_cohort3/cbr_input_processing/code/KPIs_redshift_table.py

Step 4 Predictions Adjustment: s3://sagemaker-us-west-2-983511196003/cbr_input_adjustment/code/model_predictions_adjustment.py

In [1]:
import pandas as pd

In [None]:
import os
os.environ["AWS_PROFILE"] = 'sunking'
curr_dir = os.path.abspath(os.getcwd())
model_dir = os.path.join(curr_dir,'models')

def fetch_models(model_filename):
    if not os.path.exists(os.path.join(model_dir, model_filename)):
        import subprocess
        command = [
            "aws", "s3", "cp",
            f"s3://cbr-cohort-experiments/Models/{model_filename}",
            "./models/"
        ]
        
        subprocess.run(command, check=True)
    else:
        print(f"{model_filename} already exist. skipping.")
    return os.path.join(model_dir, model_filename)

models = [
"--model_file_30_1",
"quantile_regression_30_0.1_days_2023-10-16.txt",
"--model_file_30_5",
"quantile_regression_30_0.5_days_2023-10-16.txt",
"--model_file_30_8",
"quantile_regression_30_0.8_days_2023-10-16.txt",
"--model_file_60_1",
"quantile_regression_60_0.1_days_2023-10-16.txt",
"--model_file_60_5",
"quantile_regression_60_0.5_days_2023-10-16.txt",
"--model_file_60_8",
"quantile_regression_60_0.8_days_2023-10-16.txt",
"--model_file_90_1",
"quantile_regression_90_0.1_days_2024-03-20.txt",
"--model_file_90_5",
"quantile_regression_90_0.5_days_2025-02-23.txt",
"--model_file_90_8",
"quantile_regression_90_0.8_days_2024-03-20.txt",
"--model_file_180_1",
"quantile_regression_180_0.1_days_2023-09-28.txt",
"--model_file_180_5",
"quantile_regression_180_0.5_days_2025-02-23.txt",
"--model_file_180_8",
"quantile_regression_180_0.8_days_2023-09-28.txt",
"--model_file_270_1",
"quantile_regression_270_0.1_days_2024-03-19.txt",
"--model_file_270_5",
"quantile_regression_270_0.5_days_2025-02-23.txt",
"--model_file_270_8",
"quantile_regression_270_0.8_days_2024-03-19.txt",
"--model_file_360_1",
"quantile_regression_360+_0.1_days_2024-03-19.txt",
"--model_file_360_5",
"quantile_regression_360_0.5_days_2025-02-23.txt",
"--model_file_360_8",
"quantile_regression_360+_0.8_days_2024-03-19.txt"
    ]
models_clean = [i for i in models if i.startswith("quantile_")]
models_clean 

['quantile_regression_30_0.1_days_2023-10-16.txt',
 'quantile_regression_30_0.5_days_2023-10-16.txt',
 'quantile_regression_30_0.8_days_2023-10-16.txt',
 'quantile_regression_60_0.1_days_2023-10-16.txt',
 'quantile_regression_60_0.5_days_2023-10-16.txt',
 'quantile_regression_60_0.8_days_2023-10-16.txt',
 'quantile_regression_90_0.1_days_2024-03-20.txt',
 'quantile_regression_90_0.5_days_2025-02-23.txt',
 'quantile_regression_90_0.8_days_2024-03-20.txt',
 'quantile_regression_180_0.1_days_2023-09-28.txt',
 'quantile_regression_180_0.5_days_2025-02-23.txt',
 'quantile_regression_180_0.8_days_2023-09-28.txt',
 'quantile_regression_270_0.1_days_2024-03-19.txt',
 'quantile_regression_270_0.5_days_2025-02-23.txt',
 'quantile_regression_270_0.8_days_2024-03-19.txt',
 'quantile_regression_360+_0.1_days_2024-03-19.txt',
 'quantile_regression_360_0.5_days_2025-02-23.txt',
 'quantile_regression_360+_0.8_days_2024-03-19.txt']

In [3]:
for model_file in models_clean:
    model_path = fetch_models(model_file)

download: s3://cbr-cohort-experiments/Models/quantile_regression_30_0.1_days_2023-10-16.txt to Models/quantile_regression_30_0.1_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_30_0.5_days_2023-10-16.txt to Models/quantile_regression_30_0.5_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_30_0.8_days_2023-10-16.txt to Models/quantile_regression_30_0.8_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_60_0.1_days_2023-10-16.txt to Models/quantile_regression_60_0.1_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_60_0.5_days_2023-10-16.txt to Models/quantile_regression_60_0.5_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_60_0.8_days_2023-10-16.txt to Models/quantile_regression_60_0.8_days_2023-10-16.txt
download: s3://cbr-cohort-experiments/Models/quantile_regression_90_0.1_days_2024-03-20.txt to Models/quan

In [4]:
preds = pd.read_csv("output/predictions_360_days_2025-09-17.csv")

In [5]:
preds

Unnamed: 0,pred_prim_key,accounts_group,count_units,reg_month,country,area,primary_product,product_group,backtesting_unit_age_days,frr_prediction_10,frr_prediction_50,frr_prediction_80,predicted_revenue_3_years_10,predicted_revenue_3_years_50,predicted_revenue_3_years_80,total_follow_on_revenue_current_usd,date_uploaded
0,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,2016-01_Kenya_Lanterns_Kakamega_Sun King Pro E...,57,2016-01,Kenya,Kakamega,Sun King Pro EasyBuy,Lanterns,360,0.971373,0.977491,0.973307,1499.22,1508.66,1502.20,1543.400856,2025-09-17
1,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,2016-02_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,18,2016-02,Kenya,Bungoma,Sun King Pro EasyBuy,Lanterns,360,0.892442,0.912855,0.928462,434.97,444.92,452.52,487.389744,2025-09-17
2,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,2016-02_Kenya_Lanterns_Kakamega_Sun King Pro E...,287,2016-02,Kenya,Kakamega,Sun King Pro EasyBuy,Lanterns,360,0.950737,0.958276,0.961045,7388.33,7446.91,7468.43,7771.158696,2025-09-17
3,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,2016-03_Kenya_Lanterns_Bungoma_Sun King Pro Ea...,114,2016-03,Kenya,Bungoma,Sun King Pro EasyBuy,Lanterns,360,0.913800,0.928347,0.948667,2820.72,2865.62,2928.35,3086.801712,2025-09-17
4,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,2016-03_Kenya_Lanterns_Kakamega_Sun King Pro E...,142,2016-03,Kenya,Kakamega,Sun King Pro EasyBuy,Lanterns,360,0.940154,0.945235,0.952192,3614.86,3634.39,3661.14,3844.963536,2025-09-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168589,2024-08_Zambia_SHS without TV_Mumbwa_Sun King ...,2024-08_Zambia_SHS without TV_Mumbwa_Sun King ...,16,2024-08,Zambia,Mumbwa,Sun King Home 500x EasyBuy,SHS without TV,360,0.962380,0.990955,1.000000,3141.54,3234.82,3264.34,3264.341409,2025-09-17
168590,2024-08_Zambia_SHS without TV_Ndola_Sun King H...,2024-08_Zambia_SHS without TV_Ndola_Sun King H...,97,2024-08,Zambia,Ndola,Sun King Home 500x EasyBuy,SHS without TV,360,0.959043,0.990156,1.000000,18839.33,19450.51,19643.89,19643.890743,2025-09-17
168591,2024-08_Zambia_SHS without TV_Serenje_Sun King...,2024-08_Zambia_SHS without TV_Serenje_Sun King...,7,2024-08,Zambia,Serenje,Sun King Home 500x EasyBuy,SHS without TV,360,0.933058,0.987748,1.000000,1273.86,1348.52,1365.25,1365.251295,2025-09-17
168592,2024-08_Zambia_SHS without TV_Situmbeko_Sun Ki...,2024-08_Zambia_SHS without TV_Situmbeko_Sun Ki...,19,2024-08,Zambia,Situmbeko,Sun King Home 500x EasyBuy,SHS without TV,360,0.959320,0.990619,1.000000,3614.08,3731.99,3767.33,3767.331009,2025-09-17


In [6]:
uad = 360
accounts_group = '2024-05_Kenya_Lanterns_' #Keep the underscore at the end.

preds2 = preds[preds['accounts_group'].str.contains(accounts_group)]
print(preds2.shape)

(223, 17)


In [7]:
preds2.frr_prediction_50.mean()

np.float64(0.9754330997461169)

In [8]:
preds2.predicted_revenue_3_years_50.sum()/preds2.total_follow_on_revenue_current_usd.sum()

np.float64(0.9727124648220243)