In [1]:
# Base library
import numpy as np
import pandas as pd

# Deployment
import pickle
import warnings
warnings.filterwarnings('ignore')

After modeling, we try to predict the unseen data with Prediction label and Probability provided. Probability less than 0.5 will be classified as 0 and above 0.5 will be classified as 1.

In [3]:
# Load the saved model
with open('xgboost-model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [4]:
# Sample data for prediction

test_data = pd.DataFrame({
    'age':[30,30],
    'job':['student','blue-collar'],
    'marital':['single','married'],
    'education':['professional.course','illiterate'],
    'default':['no','no'],
    'housing':['yes','no'],
    'loan':['no','no'],
    'contact':['cellular','telephone'],
    'day_of_week':['sep','mon'],
    'month':['wed','may'],
    'duration':[1616,360],
    'campaign':[4,8],
    'pdays':[19,999],
    'previous':[1,0],
    'poutcome':['success','nonexistent'],
    'emp.var.rate':[-1.1,1.4],
    'cons.price.idx':[94.199,93.994],
    'cons.conf.idx':[-37.5,-36.1],
    'euribor3m':[0.879,4.856],
    'nr.employed':[4963.6,4963],
})
test_data

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day_of_week,month,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,30,student,single,professional.course,no,yes,no,cellular,sep,wed,1616,4,19,1,success,-1.1,94.199,-37.5,0.879,4963.6
1,30,blue-collar,married,illiterate,no,no,no,telephone,mon,may,360,8,999,0,nonexistent,1.4,93.994,-36.1,4.856,4963.0


In [5]:
predictions = loaded_model.predict(test_data)

predictions

array([1, 0])

In [6]:
# Bulk prediction

data_unseen = pd.read_csv('data_unseen.csv')
data_unseen.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,35,services,married,high.school,unknown,no,no,telephone,jun,wed,173,14,999,0,nonexistent,1.4,94.465,-41.8,4.864,5228.1
1,30,student,single,professional.course,no,yes,no,cellular,sep,tue,282,2,6,1,success,-1.1,94.199,-37.5,0.88,4963.6
2,37,self-employed,single,basic.4y,no,yes,no,cellular,may,mon,180,3,999,1,failure,-1.8,92.893,-46.2,1.244,5099.1
3,31,blue-collar,single,professional.course,no,no,no,cellular,may,tue,314,2,999,0,nonexistent,-1.8,92.893,-46.2,1.266,5099.1
4,31,technician,married,university.degree,no,unknown,unknown,cellular,aug,wed,88,2,999,0,nonexistent,1.4,93.444,-36.1,4.964,5228.1


In [7]:
# Predict using the loaded model
y_pred_label = loaded_model.predict(data_unseen)
y_pred_proba = loaded_model.predict_proba(data_unseen)[:, 1]

# Add predictions to the DataFrame
data_unseen['Prediction'] = y_pred_label
data_unseen['Probability'] = y_pred_proba
data_unseen

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Prediction,Probability
0,35,services,married,high.school,unknown,no,no,telephone,jun,wed,...,999,0,nonexistent,1.4,94.465,-41.8,4.864,5228.1,0,0.070517
1,30,student,single,professional.course,no,yes,no,cellular,sep,tue,...,6,1,success,-1.1,94.199,-37.5,0.880,4963.6,1,0.566494
2,37,self-employed,single,basic.4y,no,yes,no,cellular,may,mon,...,999,1,failure,-1.8,92.893,-46.2,1.244,5099.1,0,0.072730
3,31,blue-collar,single,professional.course,no,no,no,cellular,may,tue,...,999,0,nonexistent,-1.8,92.893,-46.2,1.266,5099.1,0,0.103590
4,31,technician,married,university.degree,no,unknown,unknown,cellular,aug,wed,...,999,0,nonexistent,1.4,93.444,-36.1,4.964,5228.1,0,0.070517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8231,30,blue-collar,divorced,basic.9y,no,yes,no,cellular,jul,tue,...,999,0,nonexistent,1.4,93.918,-42.7,4.961,5228.1,0,0.070517
8232,60,admin.,married,basic.9y,no,yes,no,cellular,aug,thu,...,999,0,nonexistent,1.4,93.444,-36.1,4.962,5228.1,0,0.072419
8233,21,student,single,high.school,no,no,no,cellular,may,wed,...,999,1,failure,-1.8,92.893,-46.2,1.281,5099.1,0,0.287686
8234,38,technician,single,professional.course,no,yes,no,telephone,may,fri,...,999,0,nonexistent,1.1,93.994,-36.4,4.855,5191.0,0,0.070517


In [8]:
# Save Prediction
data_unseen.to_csv('data_unseen_prediction.csv', index=False)