In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
pd.set_option("display.max_columns", 300)
PROJECT_DIR = os.path.abspath(os.path.join("..",os.path.curdir))
PROJECT_DATA_DIR = os.path.join(PROJECT_DIR,"data")

In [3]:
# Month of CVM input data, to predict for month + 2.
# For example: in order to create a list for december,
# we use CVM data from month october.
month="201804"

In [4]:
tarificador_pre = pd.read_csv(os.path.join(PROJECT_DATA_DIR,"input","EXTR_INFO_TARIF_PRE_" + month + ".TXT"),
                                   sep="|",
                                   encoding="ISO-8859-1")

ac_final_prepago = pd.read_csv(os.path.join(PROJECT_DATA_DIR,"input","AC_FINAL_PREPAGO_" + month + ".TXT"),
                                   sep="|",
                                   encoding="ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
tarificador_pre["ActualVolume_MB"] = tarificador_pre["ActualVolume"] / float(1024**2)

In [6]:
join_ac_tarificador = ac_final_prepago.merge(tarificador_pre,
                                             how="inner",
                                             on="MSISDN",
                                             suffixes=["_ac","_tarificador"])

In [7]:
feature_numeric_columns = ["NUM_PREPAGO",
                           "NUM_POSPAGO",
                           "MIN_LLAM_ULTMES",
                           "ULT3MESES_TOTAL",
                           "DIASDESDEULTRECARGA",
                           "NUMRECARGASULT3MESES_TOTAL",
                           "COBERTURA_4G",
                           "LORTAD",
                           "MOU",
                           "TOTAL_LLAMADAS",
                           "TOTAL_SMS",
                           'MOU_Week',
                           'LLAM_Week',
                           'SMS_Week',
                           'MOU_Weekend',
                           'LLAM_Weekend',
                           'SMS_Weekend',
                           'MOU_VF',
                           'LLAM_VF',
                           'SMS_VF',
                           'MOU_Fijo',
                           'LLAM_Fijo',
                           'SMS_Fijo',
                           'MOU_OOM',
                           'LLAM_OOM',
                           'SMS_OOM',
                           'MOU_Internacional',
                           'LLAM_Internacional',
                           'SMS_Internacional',
                           #'ActualVolume',
                           'Num_accesos',  
                           'Num_Cambio_Planes',
                           'LLAM_COMUNIDAD_SMART',
                           'MOU_COMUNIDAD_SMART',
                           'LLAM_SMS_COMUNIDAD_SMART',
                           #'Flag_Uso_Etnica',
                           'cuota_SMART8',
                           'cuota_SMART12',
                           'cuota_SMART16',
                           'ActualVolume_MB']

feature_categorical_columns = ["Tipo_Documento_Comprador", # Categorica
                               'Codigo_Plan_Precios' # Categorica
                              ]

In [8]:
ml_dataset = join_ac_tarificador[feature_numeric_columns 
                            + feature_categorical_columns
                            ].copy()

In [9]:
for feature in feature_categorical_columns:
    categorical_column = feature + "_" + ml_dataset[feature].fillna("MISSING").str.lower()
    categorical_column = pd.get_dummies(categorical_column)
    ml_dataset[categorical_column.columns] = categorical_column
    del ml_dataset[feature]
    
for feature in feature_numeric_columns:
    ml_dataset[feature] = ml_dataset[feature].fillna(0.0).astype(np.float64)

In [10]:
# This is a patch
ml_dataset['Codigo_Plan_Precios_ppjmi'] = 0.0

In [11]:
feature_columns = [feature for feature in ml_dataset.columns]

In [13]:
import pickle
with open(os.path.join(PROJECT_DIR, "models", "model_identificator_upsellers_v01.pkl"), "rb") as file_:
    model = pickle.load(file_)



In [20]:
join_ac_tarificador["prob_upsell_prepago"] = model.predict_proba(ml_dataset[feature_columns[:72]])[:,1]

In [21]:
join_ac_tarificador_sorted = (join_ac_tarificador[["MSISDN","prob_upsell_prepago"]]
                              .sort_values(by="prob_upsell_prepago", ascending=False)
                             )

In [22]:
# I don't think we will need to change the threshold
cutoff_score = 0.6

join_ac_tarificador_sorted_filtered = join_ac_tarificador_sorted[join_ac_tarificador_sorted["prob_upsell_prepago"] > cutoff_score].copy()

In [23]:
join_ac_tarificador_sorted_filtered.count()

MSISDN                 647316
prob_upsell_prepago    647316
dtype: int64

In [24]:
join_ac_tarificador_sorted_filtered['decil'] = 0.0

for q in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    join_ac_tarificador_sorted_filtered.loc[join_ac_tarificador_sorted_filtered['prob_upsell_prepago'] >= join_ac_tarificador_sorted_filtered['prob_upsell_prepago'].quantile(q), 'decil'] = np.round(q, decimals=1)
    
join_ac_tarificador_sorted_filtered["decil"] = np.round((join_ac_tarificador_sorted_filtered["decil"] + 0.1) * 10.0, decimals=0).astype(np.int64)

In [25]:
print("Preview of the list")
join_ac_tarificador_sorted_filtered[:10]

Preview of the list


Unnamed: 0,MSISDN,prob_upsell_prepago,decil
1064209,617712307,0.823648,10
492736,637107020,0.823193,10
154612,634927978,0.82229,10
1834173,678233592,0.821811,10
465543,671628099,0.821811,10
1286887,711763129,0.82156,10
1351004,603769253,0.821208,10
641501,678116590,0.821077,10
1894626,600067843,0.821039,10
1826352,667354528,0.820886,10


In [27]:
join_ac_tarificador_sorted_filtered.to_csv(os.path.join(PROJECT_DIR,"data","output","lista_trade_up_prepago_"+str(int(month)+2)+".csv"),
                                           sep=";",
                                           index=False)

print("Written prediction list to %s" % os.path.join(PROJECT_DIR,"data","output","lista_trade_up_prepago_"+str(int(month)+2)+".csv"))

Written prediction list to /Users/adesant3/Documents/Development/src/upsell_prepago/data/output/lista_trade_up_prepago_201806.csv
