## Package imports & settings

In [1]:
import os
import pandas as pd
from fastai.tabular.all import *


pd.set_option("display.width", 140)

----
## Reading in the data

In [2]:
transformed_data_path = "../data/processed/transformed/Potential Customers.csv"
raw_data_path = "../data/raw/Potential Customers.csv"

df_t = pd.read_csv(transformed_data_path, sep=";")
df_r = pd.read_csv(raw_data_path, sep=";")

In [3]:
df_t.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2747 entries, 0 to 2746
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   AGE                     2747 non-null   float64
 1   YEARS_WITH_BANK         2747 non-null   float64
 2   INCOME                  2747 non-null   float64
 3   BALANCE                 2747 non-null   float64
 4   ADDRESS_MISSING         2747 non-null   float64
 5   YEARS_WITH_ADDRESS      2747 non-null   float64
 6   JOB_MISSING             2747 non-null   float64
 7   YEARS_WITH_JOB          2747 non-null   float64
 8   MARITAL_STATUS_MISSING  2747 non-null   float64
 9   MARITAL_STATUS_D        2747 non-null   float64
 10  MARITAL_STATUS_M        2747 non-null   float64
 11  MARITAL_STATUS_S        2747 non-null   float64
 12  MARITAL_STATUS_W        2747 non-null   float64
 13  EDUCATION_BCR           2747 non-null   float64
 14  EDUCATION_HGH           2747 non-null   

In [4]:
df_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2747 entries, 0 to 2746
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Cocunut                 2747 non-null   int64 
 1   AGE                     2747 non-null   int64 
 2   YEARS_WITH_BANK         2747 non-null   int64 
 3   MARTIAL_STATUS          2747 non-null   object
 4   EDUCATION               2747 non-null   object
 5   EMPLOYMENT              2747 non-null   object
 6   GENDER                  2747 non-null   object
 7   CUST_INCOME             2747 non-null   object
 8   CURRENT_ADDRESS_DATE    2747 non-null   object
 9   CURRENT_JOB_DATE        2747 non-null   object
 10  CURRENT_WITH_BANK_DATE  2747 non-null   object
 11  CURRENT_BALANCE_EUR     2747 non-null   object
dtypes: int64(3), object(9)
memory usage: 257.7+ KB


----
## Loading in the model

In [5]:
model_path = "../models/model.pkl"

learn = load_learner(model_path)

----
## Performing predictions

In [6]:
def predict_with_model(row, learn):
    with learn.no_bar(), learn.no_logging():
        _, index, probs = learn.predict(row)
        pred = "Y" if index == 1 else "N"
        return pd.Series([pred, probs[index].numpy()])

df_r[["MORTGAGE_PRED", "MORTGAGE_PROB"]] = df_t.apply(lambda row: predict_with_model(row, learn), axis=1)

In [7]:
df_r.head()

Unnamed: 0,Cocunut,AGE,YEARS_WITH_BANK,MARTIAL_STATUS,EDUCATION,EMPLOYMENT,GENDER,CUST_INCOME,CURRENT_ADDRESS_DATE,CURRENT_JOB_DATE,CURRENT_WITH_BANK_DATE,CURRENT_BALANCE_EUR,MORTGAGE_PRED,MORTGAGE_PROB
0,80001,32,3,M,HGH,PVE,M,4230769231,1985-01-21,2005-01-01,2014-07-02,143,N,0.99944216
1,80002,51,10,W,SEC,RET,F,1406102308,2010-10-23,9999-10-01,2007-02-21,2288700154,N,0.99931157
2,80003,36,7,M,BCR,STE,F,3269230769,1981-10-16,2009-09-10,2009-10-26,2268491692,N,0.9993444
3,80004,46,11,D,BCR,TEA,F,73882,2008-09-01,2005-11-01,2005-11-30,4536983462,N,0.9905455
4,80005,39,10,M,HGH,PVE,M,4839282308,1993-09-01,2013-06-01,2006-12-05,3076923077,N,0.99957436


----
## Saving predicted values

In [8]:
save_path = transformed_data_path.replace("transformed", "predicted")
save_dir = os.path.dirname(save_path)

if not os.path.exists(save_dir):
    os.mkdir(save_dir)

df_r.to_csv(save_path, index=False, sep=";")