## <b> Carga de librerias </b>

In [1]:
import pandas as pd 
import numpy as np 
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# red neuronal
import keras 
from keras.models import Sequential
from keras.layers import Dense

# K-folds
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

# Grid Search 
from sklearn.model_selection import GridSearchCV

## <b> Carga de datos </b>
0 Standard,
1 Poor,
2 Good

In [2]:
dataset = pd.read_csv(r'dataset procesados/modelado.txt', sep=';')
dict_cod = {'Standard':0, 'Poor':1, 'Good':2}
dataset['Credit_Score'] = dataset['Credit_Score'].map(dict_cod)
dataset.head()

Unnamed: 0,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,...,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score
0,23,6292,19114.12,1824.843333,3,4,3,4,3,7.0,...,4.0,56617,809.98,26.82262,35636,49.574949,80.415295,11333,312.494089,2
1,23,6292,19114.12,1824.843333,3,4,3,4,-1,31.0,...,4.0,24313,809.98,31.94496,35636,49.574949,118.280222,10418,284.629162,2
2,-500,6292,19114.12,1824.843333,3,4,3,4,3,7.0,...,4.0,24313,809.98,28.609352,35636,49.574949,81.699521,13844,331.209863,2
3,23,6292,19114.12,1824.843333,3,4,3,4,5,4.0,...,4.0,24313,809.98,31.377862,35636,49.574949,199.458074,25471,223.45131,2
4,23,6292,19114.12,1824.843333,3,4,3,4,6,31.0,...,4.0,24313,809.98,24.797347,35636,49.574949,41.420153,17527,341.489231,2


## <b> Balance de datos </b>

In [3]:
dataset['Credit_Score'].value_counts()

0    53111
1    28965
2    17814
Name: Credit_Score, dtype: int64

In [4]:
nGood = len(dataset[dataset['Credit_Score']==2])
standard = dataset[dataset['Credit_Score']==0]
poor = dataset[dataset['Credit_Score']==1]
good = dataset[dataset['Credit_Score']==2]


standard = standard.sample(2*nGood)
dataset = pd.concat([standard, poor, good])
dataset = dataset.sample(frac=1)

In [5]:
dataset['Credit_Score'].value_counts()

0    35628
1    28965
2    17814
Name: Credit_Score, dtype: int64

## <b> Verificación de Nulos </b>

In [6]:
dataset.isnull().values.any()

False

## <b> Definición de datos para modelado </b>

### Variables

In [7]:
X = dataset.iloc[:, dataset.columns != 'Credit_Score']
y = dataset.loc[:, 'Credit_Score']

In [8]:
X_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.30, shuffle=True)

## <b> PCA </b>

In [9]:
from sklearn.decomposition import PCA

In [10]:
pca = PCA(n_components=19)

In [11]:
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(x_test)

In [12]:
np.sum(pca.explained_variance_ratio_)

0.9999999999884737

In [13]:
np.round(pca.components_,2)

array([[ 0.  ,  0.  ,  1.  ,  0.  , -0.  , -0.  , -0.  ,  0.  , -0.  ,
         0.  , -0.  ,  0.  , -0.  , -0.  ,  0.  , -0.  ,  0.  , -0.  ,
        -0.  ,  0.  ],
       [-0.  , -0.  ,  0.  , -0.02,  0.  , -0.  , -0.  , -0.  ,  0.  ,
         0.  ,  0.  , -0.  ,  0.94, -0.01, -0.  ,  0.34, -0.  , -0.  ,
         0.  , -0.  ],
       [-0.  ,  0.  , -0.  ,  0.07, -0.  , -0.  , -0.  , -0.  , -0.  ,
        -0.  , -0.  , -0.  ,  0.34, -0.05,  0.  , -0.94,  0.  ,  0.  ,
        -0.02,  0.01],
       [ 0.  ,  0.  , -0.  ,  0.  ,  0.  , -0.  ,  0.  , -0.  , -0.  ,
         0.  ,  0.  , -0.  , -0.  , -0.  ,  0.  ,  0.  ,  1.  ,  0.  ,
         0.  ,  0.  ],
       [ 0.  ,  0.  , -0.  ,  0.08, -0.  , -0.  ,  0.  , -0.  , -0.  ,
        -0.  , -0.  , -0.  , -0.  , -0.01,  0.  ,  0.02, -0.  ,  0.  ,
        -1.  ,  0.  ],
       [-0.  ,  0.01, -0.  ,  0.99, -0.  , -0.  , -0.  , -0.  , -0.  ,
         0.  , -0.  , -0.  , -0.01, -0.05,  0.  ,  0.07, -0.  ,  0.06,
         0.08,  0.04],
       [ 0

## <b> Red neuronal </b>

In [14]:
parametros = {
# Tamaño de la muestra
    'batch_size': [5,10], 

# Cantidad de epocas
    'nb_epoch': [100,500],

# Optimizador 
    'optimizer':['adam', 'SGD']
}

In [15]:
def red_grid_search_v1(optimizer):
    clasificador = Sequential()
    clasificador.add(Dense(input_dim=19, units=64, activation = 'softmax', kernel_initializer='uniform') )
    clasificador.add(Dense(units=128, activation = 'softmax', kernel_initializer='uniform') )
    clasificador.add(Dense(units=128, activation = 'softmax', kernel_initializer='uniform') )
    clasificador.add(Dense(units=128, activation = 'softmax', kernel_initializer='uniform') )
    clasificador.add(Dense(units=1, activation='softmax', kernel_initializer='uniform'))
    clasificador.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return clasificador

In [16]:
clasificador_grid_search = KerasClassifier(build_fn=red_grid_search_v1)

  clasificador_grid_search = KerasClassifier(build_fn=red_grid_search_v1)


In [17]:
grid_out = GridSearchCV(estimator=clasificador_grid_search,
                        param_grid=parametros,
                        cv=20,
                        scoring='accuracy')

In [18]:
train_gs_out = grid_out.fit(X_train_pca, y_train, verbose=1)



In [19]:
grid_out.best_params_

{'batch_size': 5, 'nb_epoch': 100, 'optimizer': 'adam'}

In [20]:
grid_out.best_score_

0.34931690892439493

In [21]:
from sklearn.metrics import classification_report
print(classification_report(y_train, grid_out.predict(X_train_pca) ) )

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     25006
           1       0.35      1.00      0.52     20150
           2       0.00      0.00      0.00     12528

    accuracy                           0.35     57684
   macro avg       0.12      0.33      0.17     57684
weighted avg       0.12      0.35      0.18     57684



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
