In [38]:
import numpy as np
import pandas as pd
import psycopg2
import cx_Oracle
import datetime
import os
%matplotlib inline
from IPython.display import display
from scipy import stats
from datetime import timedelta
from collections import Counter
from openpyxl import load_workbook

## Existing customers per site

In [2]:
query = """SELECT cop_c_id, cop_pref_o_ext_s_id, cop_g_order_count, cop_g_net_total, cop_g_cm2
FROM public.customer_order_profile
where cop_g_cm2 is not null
"""

In [3]:
conn = psycopg2.connect("dbname='savings_plan_db' user='dba' host='savings-plan-prod-c-savings-plan-db-readonly.ccsod6gcmvcs.eu-central-1.rds.amazonaws.com' password='m4d_l0y_db4?'")
curs = conn.cursor()
data = pd.read_sql(query, con = conn)
conn.close()

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9404913 entries, 0 to 9404912
Data columns (total 5 columns):
cop_c_id               int64
cop_pref_o_ext_s_id    int64
cop_g_order_count      int64
cop_g_net_total        float64
cop_g_cm2              float64
dtypes: float64(2), int64(3)
memory usage: 358.8 MB


In [5]:
data['CM2%'] = data['cop_g_cm2'] / data['cop_g_net_total'] * 100

In [6]:
data.sample(10)

Unnamed: 0,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%
3823702,16006642,21,1,32.546567,8.430117,25.901709
4491846,16851440,1,2,127.311772,22.714525,17.841653
7208413,20566446,18,1,162.0348,14.8646,9.173708
4459969,16811520,107,1,70.239669,13.188219,18.776027
4178474,16456412,24,1,30.16251,8.96359,29.717653
5782906,18590355,1,1,116.487395,33.421395,28.690997
6701532,19840202,15,1,33.345455,-0.946421,-2.838231
4892125,17378792,12,6,158.372429,7.502094,4.736995
6067798,18976877,4,4,201.841666,26.572635,13.165089
2954900,14850547,16,2,105.404522,27.813054,26.386965


In [7]:
site_dict = {1: 'Deutschland',
 2: 'International',
 3: 'United Kingdom',
 4: 'France',
 7: 'Netherlands',
 8: 'Poland',
 11: 'Eire',
 12: 'Italy',
 14: 'Belgium',
 15: 'Spain',
 16: 'Czech',
 18: 'Finland',
 19: 'Slovakia',
 20: 'Russia',
 21: 'Denmark',
 22: 'Hungary',
 23: 'Slovenia',
 24: 'Romania',
 25: 'Switzerland',
 26: 'Sweden',
 28: 'Portugal',
 29: 'Croatia',
 30: 'Bulgaria',
 31: 'Norway',
 32: 'Greece'}

In [8]:
np.sort(data['cop_pref_o_ext_s_id'].unique())[:25]

array([ 1,  2,  3,  4,  7,  8, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23,
       24, 25, 26, 28, 29, 30, 31, 32], dtype=int64)

In [9]:
# Get active customers from Z+ in the last 3 years
data = data[data['cop_pref_o_ext_s_id'].isin(np.sort(data['cop_pref_o_ext_s_id'].unique())[:25])]

In [25]:
print('Z+ Active customers: ', len(data))

Z+ Active customers:  8193374


In [10]:
# Get all the existing customers having 2 or more orders finished
#data = data[data['cop_g_order_count'] >= 2]

Let's get from customers_zooprime_plans every customer ID whose SP has taken place in the last three years. Following this method we will be able to separate from existing customers those considered as renewals while the others would need to meet the Tier 4 or 5 thresholds criteria.

In [10]:
query = """select distinct czp_k_id
from customers_zooprime_plans
where czp_start_date >= trunc(sysdate - (365 * 3))
and czp_cancel_u_id is NULL
"""

In [11]:
conn = cx_Oracle.connect('readonly', 'read_123', 'db-zpmstb-01:1521/zpmstb.web.zooplus.de', threaded=True, encoding = "UTF-8", nencoding = "UTF-8")
curs = conn.cursor()
o_data = pd.read_sql(query, con = conn)
conn.close()

In [13]:
sp_customers = o_data['CZP_K_ID'].values

In [26]:
len(sp_customers)

1648974

In [14]:
data['is_renewal'] = (data['cop_c_id'].isin(sp_customers)) * 1

In [16]:
data.sample(10)

Unnamed: 0,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,is_renewal
2569485,14336530,1,3,65.859813,14.956042,22.708904,0
7047369,20338082,12,2,60.213115,9.674452,16.067018,0
3083583,15020243,4,3,117.433333,1.123532,0.95674,0
8224741,2300149,1,1,18.495327,1.413327,7.641536,0
230809,9051285,25,6,1384.709113,235.61628,17.015579,1
8456622,3653066,4,1,41.633333,12.621058,30.314791,0
4549614,16927689,26,1,30.732154,4.339279,14.119671,0
1144031,12000242,4,1,49.8,16.462,33.056225,0
7631355,12095240,4,12,1093.166599,190.264644,17.404908,1
2034518,13515087,1,6,264.569811,56.444256,21.334352,1


Customers having the NC SP need to be considered as existing customers because their CM2% in the future will be evaluated with the renewal threshold. Therefore, from our data, we will get as 'existing customers' those having at least 1 SP in the last three years (is_renewal = 1) and also those with at least 2 orders finished (Tier 4 & 5 thresholds for accessing 1st SP).

In [28]:
data = data[(data['is_renewal'] == 1) | (data['cop_g_order_count'] >= 2)]

In [29]:
len(data)

4966487

In [30]:
# Looking at thresholds we have more than 1.6M customers evaluated through renewal thresholds (low, high)
# and around 3.3M using Tier 4 & 5 thresholds for their 1st SP 
data['is_renewal'].value_counts()

0    3321473
1    1645014
Name: is_renewal, dtype: int64

In [66]:
file = 'New_routine_parameters.xlsx'
out_path = os.getcwd() + '\\' + file
writer = pd.ExcelWriter(out_path, engine='xlsxwriter')
workbook = writer.book
format1 = workbook.add_format({'num_format': '0.00'})

In [67]:
out_path

'C:\\Users\\albertoma\\Desktop\\Savings_plan\\postgres_conn\\New_routine_parameters.xlsx'

In [68]:
for i in np.sort(data['cop_pref_o_ext_s_id'].unique())[:25]:
    cus_data = data[data['cop_pref_o_ext_s_id'] == i]
    cus_amount = cus_data['cop_c_id'].count()
    cus_renewal = cus_data[cus_data['is_renewal'] == 1]
    cus_first_sp = cus_data[cus_data['is_renewal'] == 0]
    print('SITE ' + str(i) + ' [' + site_dict[i] + ']')
    print('')
    print('Total amount of existing customers: ', cus_amount)
    print('Renewals: ', cus_renewal['cop_c_id'].count())
    print('First SP: ', cus_first_sp['cop_c_id'].count())
    print('')
    print('Renewals Table')
    df_renewals = pd.DataFrame(columns=['THRESHOLD_%', 'ELIG_CUS', '%_RENEW', '%_TOTAL'])
    for j in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15',
             '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']:
        cus_elig_renewal = cus_renewal[cus_renewal['CM2%'] >= float(j)]['cop_c_id'].count()
        elig_perc_r = cus_elig_renewal / cus_renewal['cop_c_id'].count() * 100
        elig_perc_r_from_total = cus_elig_renewal / cus_amount * 100
        lr = list([float(j), cus_elig_renewal, elig_perc_r, elig_perc_r_from_total])
        df1 = pd.DataFrame([lr], columns=df_renewals.columns)
        df_renewals = df_renewals.append(df1, ignore_index=True)
    display(df_renewals.round(2))
    print('')
    print('First SP Table')
    df_first_sp = pd.DataFrame(columns=['THRESHOLD_%', 'ELIG_CUS', '%_FIRST_SP', '%_TOTAL'])
    for k in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15',
             '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']:
        cus_elig_first_sp = cus_first_sp[cus_first_sp['CM2%'] >= float(k)]['cop_c_id'].count()
        elig_perc = cus_elig_first_sp / cus_first_sp['cop_c_id'].count() * 100
        elig_perc_from_total = cus_elig_first_sp / cus_amount * 100
        l = list([float(k), cus_elig_first_sp, elig_perc, elig_perc_from_total])
        df2 = pd.DataFrame([l], columns=df_first_sp.columns)
        df_first_sp = df_first_sp.append(df2, ignore_index=True)
    display(df_first_sp.round(2))
    print('_______________________________________')
    print('')
    df_renewals.round(2).to_excel(writer, sheet_name=site_dict[i], index=False, encoding='utf-8', startrow=1, startcol=0)
    df_first_sp.round(2).to_excel(writer, sheet_name=site_dict[i], index=False, encoding='utf-8', startrow=1, startcol=5)
    
writer.save()

SITE 1 [Deutschland]

Total amount of existing customers:  1241242
Renewals:  437426
First SP:  803816

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,423139,96.73,34.09
1,2.0,419456,95.89,33.79
2,3.0,414899,94.85,33.43
3,4.0,409191,93.55,32.97
4,5.0,402035,91.91,32.39
5,6.0,393194,89.89,31.68
6,7.0,382715,87.49,30.83
7,8.0,370191,84.63,29.82
8,9.0,355435,81.26,28.64
9,10.0,338548,77.4,27.27



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,769169,95.69,61.97
1,2.0,762740,94.89,61.45
2,3.0,754936,93.92,60.82
3,4.0,745555,92.75,60.07
4,5.0,734361,91.36,59.16
5,6.0,721054,89.7,58.09
6,7.0,705745,87.8,56.86
7,8.0,687917,85.58,55.42
8,9.0,667798,83.08,53.8
9,10.0,644830,80.22,51.95


_______________________________________

SITE 2 [International]

Total amount of existing customers:  4559
Renewals:  2127
First SP:  2432

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,2064,97.04,45.27
1,2.0,2050,96.38,44.97
2,3.0,2034,95.63,44.62
3,4.0,2010,94.5,44.09
4,5.0,1987,93.42,43.58
5,6.0,1957,92.01,42.93
6,7.0,1932,90.83,42.38
7,8.0,1889,88.81,41.43
8,9.0,1836,86.32,40.27
9,10.0,1768,83.12,38.78



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,2346,96.46,51.46
1,2.0,2327,95.68,51.04
2,3.0,2312,95.07,50.71
3,4.0,2291,94.2,50.25
4,5.0,2269,93.3,49.77
5,6.0,2234,91.86,49.0
6,7.0,2197,90.34,48.19
7,8.0,2146,88.24,47.07
8,9.0,2082,85.61,45.67
9,10.0,2027,83.35,44.46


_______________________________________

SITE 3 [United Kingdom]

Total amount of existing customers:  454674
Renewals:  153435
First SP:  301239

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,92699,60.42,20.39
1,2.0,84771,55.25,18.64
2,3.0,76821,50.07,16.9
3,4.0,69144,45.06,15.21
4,5.0,61787,40.27,13.59
5,6.0,54723,35.67,12.04
6,7.0,48310,31.49,10.63
7,8.0,42402,27.64,9.33
8,9.0,37043,24.14,8.15
9,10.0,32263,21.03,7.1



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,189038,62.75,41.58
1,2.0,173730,57.67,38.21
2,3.0,158447,52.6,34.85
3,4.0,143734,47.71,31.61
4,5.0,129353,42.94,28.45
5,6.0,115701,38.41,25.45
6,7.0,103293,34.29,22.72
7,8.0,91742,30.45,20.18
8,9.0,81544,27.07,17.93
9,10.0,72347,24.02,15.91


_______________________________________

SITE 4 [France]

Total amount of existing customers:  853230
Renewals:  271035
First SP:  582195

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,254525,93.91,29.83
1,2.0,248908,91.84,29.17
2,3.0,242163,89.35,28.38
3,4.0,234034,86.35,27.43
4,5.0,224445,82.81,26.31
5,6.0,213479,78.76,25.02
6,7.0,201363,74.29,23.6
7,8.0,188330,69.49,22.07
8,9.0,174890,64.53,20.5
9,10.0,161018,59.41,18.87



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,542745,93.22,63.61
1,2.0,531300,91.26,62.27
2,3.0,517210,88.84,60.62
3,4.0,500107,85.9,58.61
4,5.0,480239,82.49,56.28
5,6.0,457374,78.56,53.61
6,7.0,432427,74.28,50.68
7,8.0,405514,69.65,47.53
8,9.0,377664,64.87,44.26
9,10.0,349194,59.98,40.93


_______________________________________

SITE 7 [Netherlands]

Total amount of existing customers:  494203
Renewals:  188628
First SP:  305575

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,177307,94.0,35.88
1,2.0,173661,92.07,35.14
2,3.0,169286,89.75,34.25
3,4.0,164046,86.97,33.19
4,5.0,157595,83.55,31.89
5,6.0,150323,79.69,30.42
6,7.0,142082,75.32,28.75
7,8.0,133143,70.58,26.94
8,9.0,123540,65.49,25.0
9,10.0,113577,60.21,22.98



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,288482,94.41,58.37
1,2.0,283925,92.91,57.45
2,3.0,278269,91.06,56.31
3,4.0,271529,88.86,54.94
4,5.0,263883,86.36,53.4
5,6.0,255053,83.47,51.61
6,7.0,245017,80.18,49.58
7,8.0,233787,76.51,47.31
8,9.0,221743,72.57,44.87
9,10.0,208713,68.3,42.23


_______________________________________

SITE 8 [Poland]

Total amount of existing customers:  419138
Renewals:  94958
First SP:  324180

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,86019,90.59,20.52
1,2.0,83729,88.17,19.98
2,3.0,80922,85.22,19.31
3,4.0,77795,81.93,18.56
4,5.0,74130,78.07,17.69
5,6.0,70010,73.73,16.7
6,7.0,65417,68.89,15.61
7,8.0,60483,63.69,14.43
8,9.0,55426,58.37,13.22
9,10.0,50145,52.81,11.96



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,279702,86.28,66.73
1,2.0,269179,83.03,64.22
2,3.0,257653,79.48,61.47
3,4.0,245198,75.64,58.5
4,5.0,231832,71.51,55.31
5,6.0,217236,67.01,51.83
6,7.0,202020,62.32,48.2
7,8.0,186279,57.46,44.44
8,9.0,170017,52.45,40.56
9,10.0,153900,47.47,36.72


_______________________________________

SITE 11 [Eire]

Total amount of existing customers:  20190
Renewals:  6101
First SP:  14089

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,5515,90.4,27.32
1,2.0,5339,87.51,26.44
2,3.0,5131,84.1,25.41
3,4.0,4911,80.5,24.32
4,5.0,4626,75.82,22.91
5,6.0,4340,71.14,21.5
6,7.0,4065,66.63,20.13
7,8.0,3722,61.01,18.43
8,9.0,3359,55.06,16.64
9,10.0,2981,48.86,14.76



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,12615,89.54,62.48
1,2.0,12220,86.73,60.53
2,3.0,11791,83.69,58.4
3,4.0,11271,80.0,55.82
4,5.0,10680,75.8,52.9
5,6.0,10033,71.21,49.69
6,7.0,9322,66.17,46.17
7,8.0,8585,60.93,42.52
8,9.0,7862,55.8,38.94
9,10.0,7141,50.68,35.37


_______________________________________

SITE 12 [Italy]

Total amount of existing customers:  393145
Renewals:  148000
First SP:  245145

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,140139,94.69,35.65
1,2.0,138152,93.35,35.14
2,3.0,135583,91.61,34.49
3,4.0,132507,89.53,33.7
4,5.0,128782,87.01,32.76
5,6.0,124346,84.02,31.63
6,7.0,119287,80.6,30.34
7,8.0,113528,76.71,28.88
8,9.0,107088,72.36,27.24
9,10.0,100151,67.67,25.47



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,228426,93.18,58.1
1,2.0,224916,91.75,57.21
2,3.0,220631,90.0,56.12
3,4.0,215666,87.97,54.86
4,5.0,209767,85.57,53.36
5,6.0,203046,82.83,51.65
6,7.0,195459,79.73,49.72
7,8.0,186971,76.27,47.56
8,9.0,177747,72.51,45.21
9,10.0,167801,68.45,42.68


_______________________________________

SITE 14 [Belgium]

Total amount of existing customers:  86566
Renewals:  39148
First SP:  47418

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,38368,98.01,44.32
1,2.0,38070,97.25,43.98
2,3.0,37593,96.03,43.43
3,4.0,36966,94.43,42.7
4,5.0,36120,92.27,41.73
5,6.0,35010,89.43,40.44
6,7.0,33747,86.2,38.98
7,8.0,32186,82.22,37.18
8,9.0,30415,77.69,35.14
9,10.0,28422,72.6,32.83



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,46440,97.94,53.65
1,2.0,46051,97.12,53.2
2,3.0,45591,96.15,52.67
3,4.0,44973,94.84,51.95
4,5.0,44163,93.14,51.02
5,6.0,43091,90.87,49.78
6,7.0,41775,88.1,48.26
7,8.0,40162,84.7,46.39
8,9.0,38323,80.82,44.27
9,10.0,36256,76.46,41.88


_______________________________________

SITE 15 [Spain]

Total amount of existing customers:  288862
Renewals:  83334
First SP:  205528

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,73049,87.66,25.29
1,2.0,70753,84.9,24.49
2,3.0,68048,81.66,23.56
3,4.0,65011,78.01,22.51
4,5.0,61585,73.9,21.32
5,6.0,57988,69.59,20.07
6,7.0,54200,65.04,18.76
7,8.0,50031,60.04,17.32
8,9.0,45845,55.01,15.87
9,10.0,41654,49.98,14.42



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,173374,84.36,60.02
1,2.0,167282,81.39,57.91
2,3.0,160335,78.01,55.51
3,4.0,152824,74.36,52.91
4,5.0,144537,70.32,50.04
5,6.0,136088,66.21,47.11
6,7.0,127280,61.93,44.06
7,8.0,118101,57.46,40.88
8,9.0,108636,52.86,37.61
9,10.0,99351,48.34,34.39


_______________________________________

SITE 16 [Czech]

Total amount of existing customers:  134716
Renewals:  38307
First SP:  96409

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,37146,96.97,27.57
1,2.0,36761,95.96,27.29
2,3.0,36246,94.62,26.91
3,4.0,35595,92.92,26.42
4,5.0,34718,90.63,25.77
5,6.0,33632,87.8,24.97
6,7.0,32338,84.42,24.0
7,8.0,30766,80.31,22.84
8,9.0,28903,75.45,21.45
9,10.0,26863,70.13,19.94



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,91279,94.68,67.76
1,2.0,89979,93.33,66.79
2,3.0,88400,91.69,65.62
3,4.0,86393,89.61,64.13
4,5.0,83957,87.08,62.32
5,6.0,81077,84.1,60.18
6,7.0,77593,80.48,57.6
7,8.0,73683,76.43,54.7
8,9.0,69221,71.8,51.38
9,10.0,64522,66.93,47.89


_______________________________________

SITE 18 [Finland]

Total amount of existing customers:  82775
Renewals:  28884
First SP:  53891

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,27999,96.94,33.83
1,2.0,27724,95.98,33.49
2,3.0,27398,94.86,33.1
3,4.0,26958,93.33,32.57
4,5.0,26363,91.27,31.85
5,6.0,25645,88.79,30.98
6,7.0,24864,86.08,30.04
7,8.0,23907,82.77,28.88
8,9.0,22792,78.91,27.53
9,10.0,21508,74.46,25.98



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,51746,96.02,62.51
1,2.0,51204,95.01,61.86
2,3.0,50462,93.64,60.96
3,4.0,49559,91.96,59.87
4,5.0,48461,89.92,58.55
5,6.0,47224,87.63,57.05
6,7.0,45829,85.04,55.37
7,8.0,44061,81.76,53.23
8,9.0,42168,78.25,50.94
9,10.0,40013,74.25,48.34


_______________________________________

SITE 19 [Slovakia]

Total amount of existing customers:  31419
Renewals:  8353
First SP:  23066

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,7332,87.78,23.34
1,2.0,7084,84.81,22.55
2,3.0,6804,81.46,21.66
3,4.0,6448,77.19,20.52
4,5.0,6053,72.46,19.27
5,6.0,5677,67.96,18.07
6,7.0,5297,63.41,16.86
7,8.0,4892,58.57,15.57
8,9.0,4514,54.04,14.37
9,10.0,4085,48.9,13.0



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,20351,88.23,64.77
1,2.0,19716,85.48,62.75
2,3.0,18941,82.12,60.29
3,4.0,18111,78.52,57.64
4,5.0,17194,74.54,54.72
5,6.0,16220,70.32,51.62
6,7.0,15225,66.01,48.46
7,8.0,14251,61.78,45.36
8,9.0,13309,57.7,42.36
9,10.0,12370,53.63,39.37


_______________________________________

SITE 20 [Russia]

Total amount of existing customers:  4010
Renewals:  1635
First SP:  2375

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,1429,87.4,35.64
1,2.0,1377,84.22,34.34
2,3.0,1307,79.94,32.59
3,4.0,1233,75.41,30.75
4,5.0,1164,71.19,29.03
5,6.0,1084,66.3,27.03
6,7.0,1007,61.59,25.11
7,8.0,910,55.66,22.69
8,9.0,819,50.09,20.42
9,10.0,723,44.22,18.03



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,2080,87.58,51.87
1,2.0,2030,85.47,50.62
2,3.0,1949,82.06,48.6
3,4.0,1868,78.65,46.58
4,5.0,1775,74.74,44.26
5,6.0,1665,70.11,41.52
6,7.0,1548,65.18,38.6
7,8.0,1410,59.37,35.16
8,9.0,1270,53.47,31.67
9,10.0,1146,48.25,28.58


_______________________________________

SITE 21 [Denmark]

Total amount of existing customers:  113792
Renewals:  36274
First SP:  77518

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,34858,96.1,30.63
1,2.0,34502,95.11,30.32
2,3.0,34069,93.92,29.94
3,4.0,33534,92.45,29.47
4,5.0,32951,90.84,28.96
5,6.0,32248,88.9,28.34
6,7.0,31463,86.74,27.65
7,8.0,30523,84.15,26.82
8,9.0,29448,81.18,25.88
9,10.0,28295,78.0,24.87



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,73397,94.68,64.5
1,2.0,72547,93.59,63.75
2,3.0,71540,92.29,62.87
3,4.0,70358,90.76,61.83
4,5.0,69056,89.08,60.69
5,6.0,67646,87.26,59.45
6,7.0,65989,85.13,57.99
7,8.0,64223,82.85,56.44
8,9.0,62270,80.33,54.72
9,10.0,60162,77.61,52.87


_______________________________________

SITE 22 [Hungary]

Total amount of existing customers:  47304
Renewals:  10989
First SP:  36315

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,8874,80.75,18.76
1,2.0,8501,77.36,17.97
2,3.0,8053,73.28,17.02
3,4.0,7545,68.66,15.95
4,5.0,7016,63.85,14.83
5,6.0,6493,59.09,13.73
6,7.0,5970,54.33,12.62
7,8.0,5437,49.48,11.49
8,9.0,4868,44.3,10.29
9,10.0,4301,39.14,9.09



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,28508,78.5,60.27
1,2.0,27111,74.66,57.31
2,3.0,25615,70.54,54.15
3,4.0,24038,66.19,50.82
4,5.0,22417,61.73,47.39
5,6.0,20724,57.07,43.81
6,7.0,19023,52.38,40.21
7,8.0,17250,47.5,36.47
8,9.0,15628,43.03,33.04
9,10.0,14084,38.78,29.77


_______________________________________

SITE 23 [Slovenia]

Total amount of existing customers:  27666
Renewals:  10964
First SP:  16702

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,10745,98.0,38.84
1,2.0,10656,97.19,38.52
2,3.0,10568,96.39,38.2
3,4.0,10453,95.34,37.78
4,5.0,10250,93.49,37.05
5,6.0,10007,91.27,36.17
6,7.0,9719,88.64,35.13
7,8.0,9342,85.21,33.77
8,9.0,8927,81.42,32.27
9,10.0,8452,77.09,30.55



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,16297,97.58,58.91
1,2.0,16178,96.86,58.48
2,3.0,16025,95.95,57.92
3,4.0,15839,94.83,57.25
4,5.0,15581,93.29,56.32
5,6.0,15244,91.27,55.1
6,7.0,14846,88.89,53.66
7,8.0,14397,86.2,52.04
8,9.0,13901,83.23,50.25
9,10.0,13233,79.23,47.83


_______________________________________

SITE 24 [Romania]

Total amount of existing customers:  28185
Renewals:  7500
First SP:  20685

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,6743,89.91,23.92
1,2.0,6534,87.12,23.18
2,3.0,6296,83.95,22.34
3,4.0,6030,80.4,21.39
4,5.0,5750,76.67,20.4
5,6.0,5453,72.71,19.35
6,7.0,5144,68.59,18.25
7,8.0,4795,63.93,17.01
8,9.0,4386,58.48,15.56
9,10.0,3980,53.07,14.12



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,17975,86.9,63.78
1,2.0,17334,83.8,61.5
2,3.0,16569,80.1,58.79
3,4.0,15790,76.34,56.02
4,5.0,14932,72.19,52.98
5,6.0,14042,67.88,49.82
6,7.0,13134,63.5,46.6
7,8.0,12226,59.11,43.38
8,9.0,11278,54.52,40.01
9,10.0,10352,50.05,36.73


_______________________________________

SITE 25 [Switzerland]

Total amount of existing customers:  86495
Renewals:  36519
First SP:  49976

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,36495,99.93,42.19
1,2.0,36488,99.92,42.19
2,3.0,36480,99.89,42.18
3,4.0,36467,99.86,42.16
4,5.0,36453,99.82,42.14
5,6.0,36431,99.76,42.12
6,7.0,36413,99.71,42.1
7,8.0,36371,99.59,42.05
8,9.0,36313,99.44,41.98
9,10.0,36244,99.25,41.9



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,49920,99.89,57.71
1,2.0,49914,99.88,57.71
2,3.0,49909,99.87,57.7
3,4.0,49893,99.83,57.68
4,5.0,49879,99.81,57.67
5,6.0,49856,99.76,57.64
6,7.0,49823,99.69,57.6
7,8.0,49787,99.62,57.56
8,9.0,49742,99.53,57.51
9,10.0,49685,99.42,57.44


_______________________________________

SITE 26 [Sweden]

Total amount of existing customers:  97457
Renewals:  26483
First SP:  70974

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,24555,92.72,25.2
1,2.0,24044,90.79,24.67
2,3.0,23361,88.21,23.97
3,4.0,22675,85.62,23.27
4,5.0,21865,82.56,22.44
5,6.0,20869,78.8,21.41
6,7.0,19796,74.75,20.31
7,8.0,18644,70.4,19.13
8,9.0,17403,65.71,17.86
9,10.0,16142,60.95,16.56



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,64205,90.46,65.88
1,2.0,62879,88.59,64.52
2,3.0,61352,86.44,62.95
3,4.0,59511,83.85,61.06
4,5.0,57465,80.97,58.96
5,6.0,55234,77.82,56.68
6,7.0,52812,74.41,54.19
7,8.0,50117,70.61,51.42
8,9.0,47312,66.66,48.55
9,10.0,44374,62.52,45.53


_______________________________________

SITE 28 [Portugal]

Total amount of existing customers:  17655
Renewals:  4048
First SP:  13607

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,3100,76.58,17.56
1,2.0,2939,72.6,16.65
2,3.0,2756,68.08,15.61
3,4.0,2548,62.94,14.43
4,5.0,2325,57.44,13.17
5,6.0,2123,52.45,12.02
6,7.0,1901,46.96,10.77
7,8.0,1654,40.86,9.37
8,9.0,1436,35.47,8.13
9,10.0,1284,31.72,7.27



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,11142,81.88,63.11
1,2.0,10691,78.57,60.56
2,3.0,10140,74.52,57.43
3,4.0,9522,69.98,53.93
4,5.0,8890,65.33,50.35
5,6.0,8238,60.54,46.66
6,7.0,7550,55.49,42.76
7,8.0,6834,50.22,38.71
8,9.0,6195,45.53,35.09
9,10.0,5576,40.98,31.58


_______________________________________

SITE 29 [Croatia]

Total amount of existing customers:  13549
Renewals:  4436
First SP:  9113

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,4166,93.91,30.75
1,2.0,4088,92.16,30.17
2,3.0,4000,90.17,29.52
3,4.0,3879,87.44,28.63
4,5.0,3765,84.87,27.79
5,6.0,3628,81.79,26.78
6,7.0,3440,77.55,25.39
7,8.0,3252,73.31,24.0
8,9.0,3064,69.07,22.61
9,10.0,2871,64.72,21.19



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,8432,92.53,62.23
1,2.0,8261,90.65,60.97
2,3.0,8054,88.38,59.44
3,4.0,7767,85.23,57.33
4,5.0,7470,81.97,55.13
5,6.0,7126,78.2,52.59
6,7.0,6762,74.2,49.91
7,8.0,6390,70.12,47.16
8,9.0,5985,65.68,44.17
9,10.0,5530,60.68,40.81


_______________________________________

SITE 30 [Bulgaria]

Total amount of existing customers:  8245
Renewals:  2746
First SP:  5499

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,2562,93.3,31.07
1,2.0,2514,91.55,30.49
2,3.0,2456,89.44,29.79
3,4.0,2381,86.71,28.88
4,5.0,2305,83.94,27.96
5,6.0,2211,80.52,26.82
6,7.0,2110,76.84,25.59
7,8.0,1980,72.1,24.01
8,9.0,1844,67.15,22.37
9,10.0,1704,62.05,20.67



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,4975,90.47,60.34
1,2.0,4850,88.2,58.82
2,3.0,4714,85.72,57.17
3,4.0,4566,83.03,55.38
4,5.0,4386,79.76,53.2
5,6.0,4166,75.76,50.53
6,7.0,3914,71.18,47.47
7,8.0,3645,66.28,44.21
8,9.0,3364,61.17,40.8
9,10.0,3111,56.57,37.73


_______________________________________

SITE 31 [Norway]

Total amount of existing customers:  13938
Renewals:  3105
First SP:  10833

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,2641,85.06,18.95
1,2.0,2590,83.41,18.58
2,3.0,2531,81.51,18.16
3,4.0,2463,79.32,17.67
4,5.0,2389,76.94,17.14
5,6.0,2314,74.52,16.6
6,7.0,2232,71.88,16.01
7,8.0,2146,69.11,15.4
8,9.0,2059,66.31,14.77
9,10.0,1944,62.61,13.95



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,8505,78.51,61.02
1,2.0,8294,76.56,59.51
2,3.0,8049,74.3,57.75
3,4.0,7815,72.14,56.07
4,5.0,7519,69.41,53.95
5,6.0,7271,67.12,52.17
6,7.0,6979,64.42,50.07
7,8.0,6702,61.87,48.08
8,9.0,6391,59.0,45.85
9,10.0,6048,55.83,43.39


_______________________________________

SITE 32 [Greece]

Total amount of existing customers:  3472
Renewals:  579
First SP:  2893

Renewals Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_RENEW,%_TOTAL
0,1.0,207,35.75,5.96
1,2.0,192,33.16,5.53
2,3.0,177,30.57,5.1
3,4.0,167,28.84,4.81
4,5.0,152,26.25,4.38
5,6.0,140,24.18,4.03
6,7.0,132,22.8,3.8
7,8.0,123,21.24,3.54
8,9.0,104,17.96,3.0
9,10.0,92,15.89,2.65



First SP Table


Unnamed: 0,THRESHOLD_%,ELIG_CUS,%_FIRST_SP,%_TOTAL
0,1.0,922,31.87,26.56
1,2.0,841,29.07,24.22
2,3.0,764,26.41,22.0
3,4.0,715,24.71,20.59
4,5.0,642,22.19,18.49
5,6.0,586,20.26,16.88
6,7.0,515,17.8,14.83
7,8.0,452,15.62,13.02
8,9.0,412,14.24,11.87
9,10.0,364,12.58,10.48


_______________________________________



In [69]:
book = load_workbook(out_path)
writer = pd.ExcelWriter(out_path, engine='openpyxl') 
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

In [74]:
for i in np.sort(data['cop_pref_o_ext_s_id'].unique())[:25]:
    book[site_dict[i]].cell(row=1, column=1).value = "Renewals Table"
    book[site_dict[i]].cell(row=1, column=6).value = "First SP Table"
writer.save()

In [None]:
# Calculations above don't correspond to control groups (2)

## Control Groups

Let's get from each customer his last eligibility status:

In [26]:
query = """
  SELECT *
        FROM(
            SELECT
            EC_C_ID,
            EC_EXT_CTI_ID,
            EC_BOX,
            EC_RULES,
            EC_TIMESTAMP,
            ROW_NUMBER() OVER ( PARTITION BY EC_C_ID ORDER BY EC_TIMESTAMP DESC ) LAST_ELIG
            FROM ELIGIBILITY_CHANGES
        ) q1
        WHERE LAST_ELIG = 1
"""

In [27]:
conn = psycopg2.connect("dbname='savings_plan_db' user='dba' host='savings-plan-prod-c-savings-plan-db-readonly.ccsod6gcmvcs.eu-central-1.rds.amazonaws.com' password='m4d_l0y_db4?'")
curs = conn.cursor()
data = pd.read_sql(query, con = conn)
conn.close()

In [28]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15871228 entries, 0 to 15871227
Data columns (total 6 columns):
ec_c_id          int64
ec_ext_cti_id    int64
ec_box           object
ec_rules         object
ec_timestamp     datetime64[ns]
last_elig        int64
dtypes: datetime64[ns](1), int64(3), object(2)
memory usage: 726.5+ MB


In [29]:
data.head()

Unnamed: 0,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
0,1,0,Z,A|A|A,2018-07-31,1
1,6,0,Z,A|A|A,2018-07-31,1
2,10,0,Z,A|A|A,2018-07-31,1
3,12,0,Z,A|A|A,2018-07-31,1
4,14,0,Z,A|A|A,2018-07-31,1


In [36]:
data['ec_ext_cti_id'].unique()

array([   0,    6,    4,    5,   25,   24, 1005, 1004, 1000], dtype=int64)

In [37]:
data[data['ec_ext_cti_id'] != 0].sort_values(by='ec_c_id', ascending=False)

Unnamed: 0,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
15871225,20917236,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 09:33:04.092002,1
15871215,20917138,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 09:22:02.394286,1
15871172,20916944,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:55:20.495212,1
15871123,20916873,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:51:43.639807,1
15871031,20916761,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:38:12.516530,1
15871002,20916722,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 09:03:29.363444,1
15870977,20916691,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:48:59.604806,1
15870968,20916679,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:29:21.801476,1
15870829,20916503,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 08:01:06.829379,1
15870778,20916428,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-07 07:43:55.699754,1


And now, let's see customer_order_profile

In [30]:
query = """SELECT cop_c_id, cop_pref_o_ext_s_id, cop_g_order_count, cop_g_net_total, cop_g_cm2
FROM public.customer_order_profile
where cop_g_cm2 is not null
"""

In [31]:
conn = psycopg2.connect("dbname='savings_plan_db' user='dba' host='savings-plan-prod-c-savings-plan-db-readonly.ccsod6gcmvcs.eu-central-1.rds.amazonaws.com' password='m4d_l0y_db4?'")
curs = conn.cursor()
data_c = pd.read_sql(query, con = conn)
conn.close()

In [32]:
data_c.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9233523 entries, 0 to 9233522
Data columns (total 5 columns):
cop_c_id               int64
cop_pref_o_ext_s_id    int64
cop_g_order_count      int64
cop_g_net_total        float64
cop_g_cm2              float64
dtypes: float64(2), int64(3)
memory usage: 352.2 MB


In [33]:
data_c['CM2%'] = data_c['cop_g_cm2'] / data_c['cop_g_net_total'] * 100

In [34]:
data_c.sample(10)

Unnamed: 0,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%
8475288,14565333,1,2,36.731092,7.20303,19.610171
7317935,12739031,18,1,33.048387,5.906387,17.871937
7765414,13505572,12,2,32.729508,2.921908,8.927442
6451037,10713243,26,2,118.183587,9.722387,8.226512
2862303,19358309,3,3,119.405511,-13.871054,-11.616762
9174577,15487398,12,1,36.778689,12.752686,34.674118
5852281,8644073,1,7,418.970247,57.873476,13.813266
7889954,13712737,21,7,213.385855,45.269045,21.214642
4118647,20731045,3,1,58.76737,3.10437,5.282472
1323702,17275642,4,3,382.216667,22.208669,5.810492


In [38]:
cus_data = data_c.merge(data, how='inner', left_on='cop_c_id', right_on='ec_c_id')

In [39]:
cus_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9233523 entries, 0 to 9233522
Data columns (total 12 columns):
cop_c_id               int64
cop_pref_o_ext_s_id    int64
cop_g_order_count      int64
cop_g_net_total        float64
cop_g_cm2              float64
CM2%                   float64
ec_c_id                int64
ec_ext_cti_id          int64
ec_box                 object
ec_rules               object
ec_timestamp           datetime64[ns]
last_elig              int64
dtypes: datetime64[ns](1), float64(3), int64(6), object(2)
memory usage: 915.8+ MB


In [40]:
cus_data.sample(5)

Unnamed: 0,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
6925230,12079910,16,2,62.431726,5.919134,9.480971,12079910,4,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P-|A-,B-,C...",2018-07-31 20:43:44.494025,1
8208484,14195084,4,1,25.816667,6.053424,23.447736,14195084,0,Z,A|A|A,2018-07-31 00:00:00.000000,1
8503934,14602702,7,6,394.371901,9.555551,2.42298,14602702,0,Z,A|A|A,2018-07-31 00:00:00.000000,1
84546,15678463,12,1,34.278689,8.213868,23.962025,15678463,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 21:30:57.278006,1
8769420,14951214,4,3,129.133333,33.229875,25.732996,14951214,5,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 21:18:58.353015,1


In [42]:
sp_eligible = cus_data[cus_data['ec_ext_cti_id'] != 0].sort_values(by='ec_c_id', ascending=False)

In [44]:
sp_eligible.sample(10)

Unnamed: 0,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
8464733,14551518,16,3,81.083583,22.893852,28.23488,14551518,5,B3,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 21:08:40.905820,1
9015891,15276630,1,1,48.200581,7.919581,16.430468,15276630,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 21:23:23.074515,1
6488088,10836674,4,2,80.466667,20.51563,25.495812,10836674,5,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 20:35:16.154565,1
609958,16360111,1,4,252.273332,51.907829,20.576027,16360111,4,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P-|A-,B-,C...",2018-07-31 21:42:16.372069,1
3681312,20510841,1,2,148.3899,18.0669,12.17529,20510841,4,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P-|A-,B-,C...",2018-08-03 21:59:29.695811,1
736710,16522974,12,8,643.672112,134.445031,20.887192,16522974,5,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 21:45:16.770597,1
5593467,7644160,7,2,175.46281,36.549643,20.830422,7644160,5,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 20:06:08.522422,1
7163151,12478591,7,7,355.024794,34.021544,9.582864,12478591,4,B4,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P-|A-,B-,C...",2018-07-31 20:51:08.094775,1
4920318,4442284,1,1,22.680672,8.847353,39.008337,4442284,6,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 19:34:10.747477,1
4672019,3028850,4,3,179.933367,16.633407,9.244204,3028850,4,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P-|A-,B-,C...",2018-07-31 19:22:34.531016,1


In [46]:
sp_eligible[sp_eligible['ec_timestamp'] >= '2018-07-01'].groupby(sp_eligible['ec_timestamp'].dt.date)['cop_c_id'].count()

ec_timestamp
2018-07-31    4012120
2018-08-01      13039
2018-08-02      17352
2018-08-03      13726
2018-08-04       5532
2018-08-05       7875
2018-08-06      13552
2018-08-07       6655
Name: cop_c_id, dtype: int64

Finally, let's locate those registered customer accounts in the last month

In [63]:
query = """SELECT c_id, c_registered_account, c_registration_ext_s_id, c_registration_date
FROM public.customers
where c_registered_account = True
and c_registration_date >= to_date('01/07/18', 'DD/MM/YY')
"""

In [64]:
conn = psycopg2.connect("dbname='savings_plan_db' user='dba' host='savings-plan-prod-c-savings-plan-db-readonly.ccsod6gcmvcs.eu-central-1.rds.amazonaws.com' password='m4d_l0y_db4?'")
curs = conn.cursor()
data_r = pd.read_sql(query, con = conn)
conn.close()

In [65]:
data_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259791 entries, 0 to 259790
Data columns (total 4 columns):
c_id                       259791 non-null int64
c_registered_account       259791 non-null bool
c_registration_ext_s_id    259791 non-null int64
c_registration_date        259791 non-null object
dtypes: bool(1), int64(2), object(1)
memory usage: 6.2+ MB


In [66]:
data_r.sample(5)

Unnamed: 0,c_id,c_registered_account,c_registration_ext_s_id,c_registration_date
41679,20859924,True,4,2018-07-31
152118,20640522,True,8,2018-07-04
127180,20683945,True,1,2018-07-09
207927,20667556,True,12,2018-07-08
81347,20822283,True,25,2018-07-26


In [67]:
data_r['c_registration_date'].min()

datetime.date(2018, 7, 1)

In [68]:
data_r['c_registered_account'].unique()

array([ True])

In [69]:
reg_cus_data = data_r.merge(cus_data, how='left', left_on='c_id', right_on='cop_c_id')

In [70]:
reg_cus_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 259791 entries, 0 to 259790
Data columns (total 16 columns):
c_id                       259791 non-null int64
c_registered_account       259791 non-null bool
c_registration_ext_s_id    259791 non-null int64
c_registration_date        259791 non-null object
cop_c_id                   201621 non-null float64
cop_pref_o_ext_s_id        201621 non-null float64
cop_g_order_count          201621 non-null float64
cop_g_net_total            201621 non-null float64
cop_g_cm2                  201621 non-null float64
CM2%                       201621 non-null float64
ec_c_id                    201621 non-null float64
ec_ext_cti_id              201621 non-null float64
ec_box                     201621 non-null object
ec_rules                   201621 non-null object
ec_timestamp               201621 non-null datetime64[ns]
last_elig                  201621 non-null float64
dtypes: bool(1), datetime64[ns](1), float64(9), int64(2), object(3)
memory u

In [72]:
reg_cus_data.sample(10).round(2)

Unnamed: 0,c_id,c_registered_account,c_registration_ext_s_id,c_registration_date,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
105508,20897845,True,23,2018-08-05,,,,,,,,,,,NaT,
94208,20618551,True,3,2018-07-02,20618551.0,3.0,1.0,27.93,-0.49,-1.76,20618551.0,0.0,Z,A|A|A,2018-07-31 00:00:00.000000,1.0
248987,20767736,True,21,2018-07-20,,,,,,,,,,,NaT,
167921,20901166,True,1,2018-08-05,20901166.0,1.0,1.0,42.44,6.52,15.36,20901166.0,0.0,Z,"A+,B-,C-,D+,E-,G-,H-,I-,J-,K-|A+|A+",2018-08-05 16:02:04.696712,1.0
85668,20833066,True,4,2018-07-28,,,,,,,,,,,NaT,
197002,20885596,True,1,2018-08-03,20885596.0,1.0,1.0,50.46,9.05,17.95,20885596.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-08-06 09:10:47.385894,1.0
63920,20822013,True,3,2018-07-26,20822013.0,3.0,1.0,29.0,-2.24,-7.72,20822013.0,0.0,Z,A|A|A,2018-07-31 00:00:00.000000,1.0
128959,20637632,True,12,2018-07-04,20637632.0,12.0,1.0,33.08,0.75,2.28,20637632.0,0.0,Z,A|A|A,2018-07-31 00:00:00.000000,1.0
222351,20845696,True,1,2018-07-30,,,,,,,,,,,NaT,
30215,20914125,True,15,2018-08-06,,,,,,,,,,,NaT,


In [86]:
zp_countries = np.sort(reg_cus_data['c_registration_ext_s_id'].unique())[:25]

In [92]:
zp_countries

array([ 1,  2,  3,  4,  7,  8, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23,
       24, 25, 26, 28, 29, 30, 31, 32], dtype=int64)

In [87]:
zp_reg_cus_data = reg_cus_data[reg_cus_data['c_registration_ext_s_id'].isin(zp_countries)]

In [83]:
import matplotlib.pyplot as plt
from IPython.display import display

In [160]:
for i in np.sort(reg_cus_data['c_registration_ext_s_id'].unique())[:25]:
    site_data = reg_cus_data[reg_cus_data['c_registration_ext_s_id'] == i]
    print('SITE ' + str(i) + ': ' + str(len(site_data)))

SITE 1: 47068
SITE 2: 418
SITE 3: 15353
SITE 4: 33693
SITE 7: 19711
SITE 8: 21647
SITE 11: 1249
SITE 12: 15027
SITE 14: 3774
SITE 15: 10428
SITE 16: 5633
SITE 18: 4317
SITE 19: 1840
SITE 20: 292
SITE 21: 6150
SITE 22: 4152
SITE 23: 1367
SITE 24: 2979
SITE 25: 5598
SITE 26: 9013
SITE 28: 2413
SITE 29: 858
SITE 30: 997
SITE 31: 1727
SITE 32: 1107


In [97]:
cols = list(['DATE']) + list(np.sort(zp_reg_cus_data['cop_pref_o_ext_s_id'].unique())[:25]) 

In [100]:
daily_reg_per_site = pd.DataFrame(columns = cols)

Unnamed: 0,DATE,1.0,2.0,3.0,4.0,7.0,8.0,11.0,12.0,14.0,...,22.0,23.0,24.0,25.0,26.0,28.0,29.0,30.0,31.0,32.0


In [101]:
daily_reg_per_site['DATE'] = np.sort(zp_reg_cus_data['c_registration_date'].unique())

In [106]:
daily_reg_per_site = daily_reg_per_site.set_index('DATE')

In [117]:
for i in np.sort(zp_reg_cus_data['c_registration_ext_s_id'].unique()):
    gr = zp_reg_cus_data[zp_reg_cus_data['cop_pref_o_ext_s_id'] == i].groupby('c_registration_date')['c_id'].nunique()
    for x, y in list(zip(gr.index, gr.values)):
        daily_reg_per_site.loc[x, i] = y

In [214]:
daily_reg_per_site = daily_reg_per_site.drop(daily_reg_per_site.index[len(daily_reg_per_site) - 1:])

In [248]:
daily_reg_per_site

Unnamed: 0_level_0,1.0,2.0,3.0,4.0,7.0,8.0,11.0,12.0,14.0,15.0,...,22.0,23.0,24.0,25.0,26.0,28.0,29.0,30.0,31.0,32.0
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-07-01,1396,12.0,389,892,540,540,36,339,92,247,...,89,40.0,42,164,193,51,18,8.0,42,20
2018-07-02,1355,5.0,387,964,580,726,49,527,88,345,...,130,45.0,68,171,230,70,29,16.0,44,18
2018-07-03,1260,5.0,352,918,562,636,28,457,111,337,...,138,33.0,70,138,200,46,33,22.0,56,20
2018-07-04,1137,3.0,336,860,525,649,29,460,114,315,...,122,39.0,56,124,194,64,27,19.0,43,24
2018-07-05,1062,5.0,326,813,495,532,35,397,101,268,...,130,29.0,48,133,164,34,22,19.0,51,22
2018-07-06,895,9.0,282,637,374,405,27,326,60,206,...,103,24.0,31,115,156,47,19,10.0,32,17
2018-07-07,824,7.0,240,647,370,303,20,275,86,185,...,69,28.0,39,99,132,45,15,16.0,38,15
2018-07-08,1155,6.0,320,818,505,549,24,336,100,243,...,123,38.0,44,164,182,41,20,11.0,35,12
2018-07-09,1311,7.0,344,982,662,798,38,474,98,317,...,152,29.0,70,141,245,66,21,21.0,46,35
2018-07-10,1250,10.0,335,825,566,804,32,445,94,323,...,153,36.0,69,156,163,55,22,27.0,42,18


In [215]:
daily_reg_per_site.fillna(0).apply(sum, axis=1)

DATE
2018-07-01    5610
2018-07-02    6342
2018-07-03    5896
2018-07-04    5542
2018-07-05    5048
2018-07-06    4128
2018-07-07    3742
2018-07-08    5204
2018-07-09    6407
2018-07-10    5955
2018-07-11    5775
2018-07-12    5017
2018-07-13    4169
2018-07-14    3626
2018-07-15    4811
2018-07-16    5932
2018-07-17    5761
2018-07-18    5217
2018-07-19    4776
2018-07-20    4072
2018-07-21    3661
2018-07-22    5043
2018-07-23    5930
2018-07-24    5486
2018-07-25    5095
2018-07-26    4479
2018-07-27    1006
2018-07-28     434
2018-07-29     821
2018-07-30    1804
2018-07-31    5702
2018-08-01    5850
2018-08-02    5142
2018-08-03    4274
2018-08-04    3589
2018-08-05    4196
2018-08-06    3022
dtype: int64

In [216]:
reg_rel_values_per_site = daily_reg_per_site.apply(lambda x: x/x.sum() * 100, axis=1).round(2)

In [217]:
daily_reg_per_site.apply(lambda x: x/x.sum() * 100, axis=1).round(2)[daily_reg_per_site.columns[:12]]

Unnamed: 0_level_0,1.0,2.0,3.0,4.0,7.0,8.0,11.0,12.0,14.0,15.0,16.0,18.0
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-07-01,24.88,0.21,6.93,15.9,9.63,9.63,0.64,6.04,1.64,4.4,2.6,1.89
2018-07-02,21.37,0.08,6.1,15.2,9.15,11.45,0.77,8.31,1.39,5.44,2.33,1.92
2018-07-03,21.37,0.08,5.97,15.57,9.53,10.79,0.47,7.75,1.88,5.72,1.95,2.14
2018-07-04,20.52,0.05,6.06,15.52,9.47,11.71,0.52,8.3,2.06,5.68,1.79,1.88
2018-07-05,21.04,0.1,6.46,16.11,9.81,10.54,0.69,7.86,2.0,5.31,1.66,1.6
2018-07-06,21.68,0.22,6.83,15.43,9.06,9.81,0.65,7.9,1.45,4.99,1.79,2.52
2018-07-07,22.02,0.19,6.41,17.29,9.89,8.1,0.53,7.35,2.3,4.94,2.19,1.87
2018-07-08,22.19,0.12,6.15,15.72,9.7,10.55,0.46,6.46,1.92,4.67,3.19,1.98
2018-07-09,20.46,0.11,5.37,15.33,10.33,12.46,0.59,7.4,1.53,4.95,3.48,1.51
2018-07-10,20.99,0.17,5.63,13.85,9.5,13.5,0.54,7.47,1.58,5.42,3.19,1.78


In [218]:
daily_reg_per_site.apply(lambda x: x/x.sum() * 100, axis=1).round(2)[daily_reg_per_site.columns[12:]]

Unnamed: 0_level_0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,28.0,29.0,30.0,31.0,32.0
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-07-01,0.59,0.05,3.07,1.59,0.71,0.75,2.92,3.44,0.91,0.32,0.14,0.75,0.36
2018-07-02,0.6,0.08,2.87,2.05,0.71,1.07,2.7,3.63,1.1,0.46,0.25,0.69,0.28
2018-07-03,0.71,0.12,3.12,2.34,0.56,1.19,2.34,3.39,0.78,0.56,0.37,0.95,0.34
2018-07-04,0.65,0.13,2.81,2.2,0.7,1.01,2.24,3.5,1.15,0.49,0.34,0.78,0.43
2018-07-05,0.61,0.34,2.95,2.58,0.57,0.95,2.63,3.25,0.67,0.44,0.38,1.01,0.44
2018-07-06,0.97,0.29,2.98,2.5,0.58,0.75,2.79,3.78,1.14,0.46,0.24,0.78,0.41
2018-07-07,0.75,0.08,2.83,1.84,0.75,1.04,2.65,3.53,1.2,0.4,0.43,1.02,0.4
2018-07-08,0.94,0.15,2.92,2.36,0.73,0.85,3.15,3.5,0.79,0.38,0.21,0.67,0.23
2018-07-09,0.83,0.09,2.67,2.37,0.45,1.09,2.2,3.82,1.03,0.33,0.33,0.72,0.55
2018-07-10,1.02,0.13,2.77,2.57,0.6,1.16,2.62,2.74,0.92,0.37,0.45,0.71,0.3


In [247]:
#Median values when looking at how are registered customers distributed per site
reg_rel_values_per_site.describe().loc['50%'].round(2)

1.0     21.37
2.0      0.12
3.0      6.93
4.0     15.24
7.0      9.63
8.0     10.87
11.0     0.61
12.0     7.14
14.0     1.65
15.0     4.74
16.0     2.95
18.0     1.92
19.0     0.82
20.0     0.13
21.0     2.95
22.0     1.96
23.0     0.70
24.0     1.04
25.0     2.62
26.0     3.53
28.0     1.03
29.0     0.44
30.0     0.32
31.0     0.78
32.0     0.40
Name: 50%, dtype: float64

In [220]:
zp_reg_cus_data.head(10)

Unnamed: 0,c_id,c_registered_account,c_registration_ext_s_id,c_registration_date,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
1,20687498,True,8,2018-07-10,20687498.0,8.0,2.0,32.837283,5.535283,16.856703,20687498.0,5.0,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 22:35:14.730085,1.0
2,20718112,True,3,2018-07-14,,,,,,,,,,,NaT,
6,20767855,True,22,2018-07-20,,,,,,,,,,,NaT,
7,20768144,True,15,2018-07-20,,,,,,,,,,,NaT,
8,20785910,True,1,2018-07-22,20785910.0,1.0,1.0,27.9041,7.7709,27.848596,20785910.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:30:56.744814,1.0
9,20785857,True,4,2018-07-22,20785857.0,4.0,1.0,48.5917,-1.8417,-3.790153,20785857.0,0.0,Z,A|A|A,2018-07-31 00:00:00.000000,1.0
10,20785978,True,8,2018-07-22,20785978.0,8.0,1.0,53.498857,2.970357,5.552188,20785978.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:31:17.756786,1.0
11,20786040,True,4,2018-07-22,20786040.0,4.0,1.0,49.075,1.7064,3.477127,20786040.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:31:36.575233,1.0
12,20786134,True,1,2018-07-22,20786134.0,1.0,1.0,17.3271,5.0813,29.325738,20786134.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:32:23.467107,1.0
13,20786133,True,14,2018-07-22,20786133.0,14.0,1.0,33.7521,-0.1879,-0.556706,20786133.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:34:16.884711,1.0


In [221]:
zp_reg_cus_data.columns

Index(['c_id', 'c_registered_account', 'c_registration_ext_s_id',
       'c_registration_date', 'cop_c_id', 'cop_pref_o_ext_s_id',
       'cop_g_order_count', 'cop_g_net_total', 'cop_g_cm2', 'CM2%', 'ec_c_id',
       'ec_ext_cti_id', 'ec_box', 'ec_rules', 'ec_timestamp', 'last_elig'],
      dtype='object')

In [222]:
zp_reg_cus_data['ec_ext_cti_id'].unique()

array([   5.,   nan,    6.,    0.,    4., 1000.,   24.,   25., 1004.,
       1005.])

In [223]:
len(zp_reg_cus_data)

216811

In [224]:
#len 168601 with eligibility status
zp_reg_cus_data_not_null = zp_reg_cus_data[~zp_reg_cus_data.apply(lambda x: x.isnull().any(), axis=1)]

In [225]:
len(zp_reg_cus_data_not_null)

168601

In [226]:
zp_reg_cus_sp_elig = zp_reg_cus_data_not_null[zp_reg_cus_data_not_null['ec_ext_cti_id'] != 0]

In [227]:
zp_reg_cus_sp_elig['ec_ext_cti_id'].value_counts()

6.0       67657
5.0        5573
4.0        5479
24.0        208
1000.0      196
25.0         74
1004.0       63
1005.0       10
Name: ec_ext_cti_id, dtype: int64

In [228]:
zp_reg_cus_data.head()

Unnamed: 0,c_id,c_registered_account,c_registration_ext_s_id,c_registration_date,cop_c_id,cop_pref_o_ext_s_id,cop_g_order_count,cop_g_net_total,cop_g_cm2,CM2%,ec_c_id,ec_ext_cti_id,ec_box,ec_rules,ec_timestamp,last_elig
1,20687498,True,8,2018-07-10,20687498.0,8.0,2.0,32.837283,5.535283,16.856703,20687498.0,5.0,A,"A+,B-,C-,D+,E-,G-,H-,I+,L-,M-,N-,O+,P+|A-,B-,C...",2018-07-31 22:35:14.730085,1.0
2,20718112,True,3,2018-07-14,,,,,,,,,,,NaT,
6,20767855,True,22,2018-07-20,,,,,,,,,,,NaT,
7,20768144,True,15,2018-07-20,,,,,,,,,,,NaT,
8,20785910,True,1,2018-07-22,20785910.0,1.0,1.0,27.9041,7.7709,27.848596,20785910.0,6.0,N,"A+,B-,C-,D+,E-,G-,H-,I-,J+|A-,B-,C-,D+,E+|A-,B...",2018-07-31 22:30:56.744814,1.0


In [229]:
sp_tiers = [4, 5, 6, 24, 25, 1000, 1004, 1005]

In [237]:
daily_elig_per_site = pd.DataFrame(columns = cols)

In [238]:
daily_elig_per_site['DATE'] = np.sort(zp_reg_cus_data['c_registration_date'].unique())

In [239]:
daily_elig_per_site = daily_elig_per_site.set_index('DATE')

In [240]:
for i in np.sort(zp_reg_cus_data['c_registration_ext_s_id'].unique()):
    site_data = zp_reg_cus_data[zp_reg_cus_data['c_registration_ext_s_id'] == i]
    elig_cus_per_site = site_data[site_data['ec_ext_cti_id'].isin(sp_tiers)]
    perc_elig = elig_cus_per_site.groupby('c_registration_date')['c_id'].nunique() /\
    site_data.groupby('c_registration_date')['c_id'].nunique() * 100
    for x, y in list(zip(perc_elig.index, perc_elig.values)):
        daily_elig_per_site.loc[x, i] = float(y)

In [241]:
daily_elig_per_site = daily_elig_per_site.drop(daily_elig_per_site.index[len(daily_elig_per_site) - 1:])

In [242]:
daily_elig_per_site.fillna(0).round(2)

Unnamed: 0_level_0,1.0,2.0,3.0,4.0,7.0,8.0,11.0,12.0,14.0,15.0,...,22.0,23.0,24.0,25.0,26.0,28.0,29.0,30.0,31.0,32.0
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-07-01,64.92,35.0,24.0,34.53,40.11,36.93,38.46,36.51,51.0,38.06,...,17.53,38.1,29.03,77.84,20.76,43.55,42.11,31.25,31.25,47.83
2018-07-02,65.69,55.56,23.06,37.66,43.08,39.37,46.15,38.95,37.96,45.58,...,17.86,39.22,27.27,81.38,16.96,45.78,41.94,26.67,34.0,44.0
2018-07-03,63.96,27.27,23.92,40.27,43.78,38.91,48.28,38.31,45.08,36.66,...,20.13,52.78,25.0,74.03,15.21,36.67,21.62,17.65,26.67,35.71
2018-07-04,65.41,0.0,16.84,34.93,40.6,36.83,43.33,35.88,40.88,41.38,...,26.62,43.9,24.14,73.29,21.22,44.0,25.93,18.18,30.61,35.14
2018-07-05,66.72,44.44,20.45,36.51,42.4,35.48,43.59,34.92,45.54,34.55,...,13.61,40.62,20.0,79.47,17.89,30.0,41.67,9.68,44.07,48.39
2018-07-06,61.38,31.25,17.67,36.23,39.56,37.62,45.16,34.08,48.53,39.48,...,21.1,46.15,8.0,68.18,17.95,40.32,45.45,9.09,27.27,50.0
2018-07-07,67.34,60.0,17.28,35.97,45.14,39.06,59.09,33.33,35.92,37.44,...,14.81,48.28,21.67,69.57,12.64,50.0,35.29,17.39,28.26,47.83
2018-07-08,65.41,25.0,18.52,37.68,42.81,42.36,42.31,38.78,40.74,38.06,...,22.05,35.9,18.18,75.28,19.75,39.58,23.81,22.22,31.71,30.43
2018-07-09,64.21,44.44,21.22,36.84,45.26,39.29,48.72,40.65,51.28,38.31,...,19.64,22.58,21.1,80.77,19.43,57.69,23.81,9.68,49.06,62.5
2018-07-10,64.62,45.45,22.83,38.37,44.72,42.0,37.14,40.41,52.88,38.23,...,16.46,47.37,20.0,74.86,22.17,52.86,44.0,31.43,30.77,40.0


### Registered customers per day

In [249]:
daily_reg_per_site.fillna(0).apply(sum, axis=1)

DATE
2018-07-01    5610
2018-07-02    6342
2018-07-03    5896
2018-07-04    5542
2018-07-05    5048
2018-07-06    4128
2018-07-07    3742
2018-07-08    5204
2018-07-09    6407
2018-07-10    5955
2018-07-11    5775
2018-07-12    5017
2018-07-13    4169
2018-07-14    3626
2018-07-15    4811
2018-07-16    5932
2018-07-17    5761
2018-07-18    5217
2018-07-19    4776
2018-07-20    4072
2018-07-21    3661
2018-07-22    5043
2018-07-23    5930
2018-07-24    5486
2018-07-25    5095
2018-07-26    4479
2018-07-27    1006
2018-07-28     434
2018-07-29     821
2018-07-30    1804
2018-07-31    5702
2018-08-01    5850
2018-08-02    5142
2018-08-03    4274
2018-08-04    3589
2018-08-05    4196
2018-08-06    3022
dtype: int64

### Median values when looking at how are registered customers distributed per site

In [250]:
#Index values are Shop IDs
reg_rel_values_per_site.describe().loc['50%'].round(2)

1.0     21.37
2.0      0.12
3.0      6.93
4.0     15.24
7.0      9.63
8.0     10.87
11.0     0.61
12.0     7.14
14.0     1.65
15.0     4.74
16.0     2.95
18.0     1.92
19.0     0.82
20.0     0.13
21.0     2.95
22.0     1.96
23.0     0.70
24.0     1.04
25.0     2.62
26.0     3.53
28.0     1.03
29.0     0.44
30.0     0.32
31.0     0.78
32.0     0.40
Name: 50%, dtype: float64

### Median values per site in terms of eligible customers

In [244]:
#Index values are Shop IDs
daily_elig_per_site.fillna(0).round(2).describe().loc['50%'].round(2)

1.0     64.65
2.0     36.84
3.0     17.90
4.0     35.21
7.0     40.60
8.0     36.29
11.0    45.00
12.0    34.92
14.0    39.53
15.0    36.84
16.0    40.00
18.0    32.00
19.0    16.22
20.0    60.00
21.0    25.37
22.0    17.14
23.0    34.69
24.0    16.51
25.0    71.72
26.0    16.25
28.0    43.68
29.0    27.27
30.0    16.00
31.0    31.25
32.0    45.71
Name: 50%, dtype: float64

In [284]:
df = pd.DataFrame(columns=cols[1:])

In [285]:
for i in daily_reg_per_site.fillna(0).apply(sum, axis=1).values:
    reg_cus = reg_rel_values_per_site.describe().loc['50%'].round(2).values / 100 * i
    elig_cus = list(reg_cus * daily_elig_per_site.fillna(0).round(2).describe().loc['50%'].round(2).values / 100)
    row = pd.DataFrame([elig_cus], columns=df.columns)
    df = pd.concat([df, row], ignore_index=True)

In [293]:
df = df.set_index([daily_reg_per_site.fillna(0).apply(sum, axis=1).index])

In [295]:
df.loc['SUM_ELIG_CUS'] = df.apply(sum, axis=0)

### Approximation of the number of customers being eligible for any SP per day and site (column: shop_id, index: date)

In [297]:
df.round(2)

Unnamed: 0_level_0,1.0,2.0,3.0,4.0,7.0,8.0,11.0,12.0,14.0,15.0,...,22.0,23.0,24.0,25.0,26.0,28.0,29.0,30.0,31.0,32.0
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-07-01,775.06,2.48,69.59,301.03,219.34,221.3,15.4,139.87,36.59,97.96,...,18.85,13.62,9.63,105.42,32.18,25.24,6.73,2.87,13.67,10.26
2018-07-02,876.19,2.8,78.67,340.31,247.96,250.17,17.41,158.12,41.37,110.75,...,21.31,15.4,10.89,119.17,36.38,28.53,7.61,3.25,15.46,11.6
2018-07-03,814.57,2.61,73.14,316.38,230.52,232.58,16.18,147.0,38.46,102.96,...,19.81,14.32,10.12,110.79,33.82,26.53,7.07,3.02,14.37,10.78
2018-07-04,765.67,2.45,68.75,297.38,216.68,218.62,15.21,138.18,36.15,96.78,...,18.62,13.46,9.52,104.14,31.79,24.93,6.65,2.84,13.51,10.13
2018-07-05,697.42,2.23,62.62,270.88,197.37,199.13,13.86,125.86,32.93,88.15,...,16.96,12.26,8.67,94.86,28.96,22.71,6.06,2.58,12.3,9.23
2018-07-06,570.31,1.82,51.21,221.51,161.4,162.84,11.33,102.92,26.92,72.08,...,13.87,10.02,7.09,77.57,23.68,18.57,4.95,2.11,10.06,7.55
2018-07-07,516.98,1.65,46.42,200.8,146.3,147.61,10.27,93.3,24.41,65.34,...,12.57,9.09,6.43,70.31,21.47,16.84,4.49,1.92,9.12,6.84
2018-07-08,718.97,2.3,64.55,279.25,203.46,205.28,14.28,129.75,33.94,90.87,...,17.48,12.64,8.94,97.79,29.85,23.41,6.24,2.66,12.68,9.51
2018-07-09,885.17,2.83,79.48,343.8,250.5,252.74,17.59,159.74,41.79,111.88,...,21.52,15.56,11.0,120.39,36.75,28.83,7.69,3.28,15.62,11.71
2018-07-10,822.73,2.63,73.87,319.55,232.83,234.91,16.35,148.48,38.84,103.99,...,20.01,14.46,10.22,111.9,34.16,26.79,7.15,3.05,14.52,10.89


In [299]:
df.round(2).to_csv('approx_elig_customers_per_shop_and_day.csv', sep=';', encoding = 'utf-8')

Last row from the table above shows an approximation to the number of eligible customers per shop after 37 days in order to define the matching patterns enabling some customer IDs to access the control group.

In [303]:
#TOP 7 shops
df[[1, 3, 4, 7, 8, 12, 15]].loc['SUM_ELIG_CUS'] * 0.01

1.0     232.883050
3.0      20.909859
4.0      90.451510
7.0      65.904816
8.0      66.493829
12.0     42.027860
15.0     29.434915
Name: SUM_ELIG_CUS, dtype: float64

Figures above try to approximate the number of customers in Top 7 shops belonging to the control groups we would have after 37 days, having supossed that the matching pattern allows 1% of the eligible customers take part of the control group.

Another option could be just enable more than one matching patterns (customer IDs ending in '03', '06', '09' for example) and then stop including customers in it once a suitable amount has been reached.