In [9]:
import pandas as pd
import numpy as np
import pickle

In [10]:
data_test = pd.read_csv("../data/raw/loan_sanction_test.csv")
data_test.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,LP001015,Male,Yes,0,Graduate,No,5720,0,110.0,360.0,1.0,Urban
1,LP001022,Male,Yes,1,Graduate,No,3076,1500,126.0,360.0,1.0,Urban
2,LP001031,Male,Yes,2,Graduate,No,5000,1800,208.0,360.0,1.0,Urban
3,LP001035,Male,Yes,2,Graduate,No,2340,2546,100.0,360.0,,Urban
4,LP001051,Male,No,0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban


Procesos de preparacion de datos para predecir

In [11]:
with open("../artifacts/feature_eng_configs.pkl","rb") as f:
    feature_eng_configs = pickle.load(f)

feature_eng_configs

{'codificador_property_area': Property_Area
 Semiurban    233
 Urban        202
 Rural        179
 Name: count, dtype: int64,
 'codificador_dependents': Dependents
 0     360
 1     102
 2     101
 3+     51
 Name: count, dtype: int64,
 'media_credit_history': 0.8421985815602837,
 'media_loan_amount_term': 342.0,
 'media_loanamount': 146.41216216216216,
 'mode_self_employed': 'No',
 'mode_dependents': '0',
 'mode_married': 'Yes',
 'mode_gender': 'Male'}

In [12]:
with open("../artifacts/variables_escaling.pkl","rb") as f:
   variables_escaling = pickle.load(f)

variables_escaling

{'categoricas': ['Gender',
  'Married',
  'Dependents',
  'Education',
  'Self_Employed',
  'Property_Area',
  'Loan_Status'],
 'continuas': ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount'],
 'discretas': ['Loan_Amount_Term', 'Credit_History']}

In [13]:
categorias = variables_escaling["categoricas"]

categorias.remove('Loan_Status')
categorias


['Gender',
 'Married',
 'Dependents',
 'Education',
 'Self_Employed',
 'Property_Area']

In [14]:
# Eliminamos variables no utiles
data_test.drop('Loan_ID', axis=1, inplace=True)

# Imputamos la variable Gender
data_test['Gender'] = data_test['Gender'].fillna(feature_eng_configs['mode_gender'])

# Imputamos Married
data_test['Married'] = data_test['Married'].fillna(feature_eng_configs['mode_married'])

# Imputamos Dependents
data_test['Dependents'] = data_test['Dependents'].fillna(feature_eng_configs['mode_dependents'])

# Imputamos Self_Employed
data_test['Self_Employed'] = data_test['Self_Employed'].fillna(feature_eng_configs['mode_self_employed'])

# Imputamos LoanAmount
data_test['LoanAmount'] = data_test['LoanAmount'].fillna(feature_eng_configs['media_loanamount']) 

# Imputamos Loan_Amount_Term
data_test['Loan_Amount_Term'] = data_test['Loan_Amount_Term'].fillna(feature_eng_configs['media_loan_amount_term'])

# Imputamos Credit_History
data_test['Credit_History'] = data_test['Credit_History'].fillna(feature_eng_configs['media_credit_history']) 

# Codificacion de Variable Dependents
data_test['Dependents'] = data_test['Dependents'].map(feature_eng_configs['codificador_dependents'])

# Codificacion de Variable Dependents
data_test['Property_Area'] = data_test['Property_Area'].map(feature_eng_configs['codificador_property_area'])

#Codificación de variables Categorias con One Hot Encoding con valores menores a 3


for colName in categorias:
    if len(data_test[colName].unique()) <3:
        data_test[colName] = pd.get_dummies(data_test[colName], drop_first=True).astype(int)


data_test.head()


Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,1,1,360,0,0,5720,0,110.0,360.0,1.0,202
1,1,1,102,0,0,3076,1500,126.0,360.0,1.0,202
2,1,1,101,0,0,5000,1800,208.0,360.0,1.0,202
3,1,1,101,0,0,2340,2546,100.0,360.0,0.842199,202
4,1,0,360,1,0,3276,0,78.0,360.0,1.0,202


In [15]:
data_test.isnull().mean()

Gender               0.0
Married              0.0
Dependents           0.0
Education            0.0
Self_Employed        0.0
ApplicantIncome      0.0
CoapplicantIncome    0.0
LoanAmount           0.0
Loan_Amount_Term     0.0
Credit_History       0.0
Property_Area        0.0
dtype: float64

Estandarizacion con objetos scaler de train

In [16]:
with open("../artifacts/std_scaler.pkl","rb") as f:
   std_scaler = pickle.load(f)

std_scaler

Cargamos modelo ya entrenado

In [17]:
with open("../models/model_svm_v1.pkl","rb") as f:
   model_svm = pickle.load(f)

model_svm

In [18]:
X_data_test_std = std_scaler.transform(data_test)
data_test_predicts = model_svm.predict(X_data_test_std)

data_test_predicts

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,