### **Parte 1 - Red Neuronal Feed Forward**

**Base de datos bancarios**: El objetivo será predecir el record crediticio de un cliente

In [3]:
import sklearn.model_selection
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Carga de dataset**

In [5]:
data  = pd.read_csv('/content/drive/My Drive/Universidad Galileo/Tercer Trimestre/Statistical Learning II/Proyecto/1-MLP/data.csv')

**Visualización de datos**

In [6]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


**Normalización y Encoding**

In [7]:
LE = LabelEncoder()
data['credit_score'] = data['CreditScore']/100
data['age'] = data['Age']/10
data['balance'] = data['Balance']/10000
data['estimated_salary'] = data['EstimatedSalary']/10000
data['has_credit_card'] = data['HasCrCard']
data['gender_code'] = LE.fit_transform(data['Gender'])
data['geography_code'] = LE.fit_transform(data['Geography'])
data['is_active_member'] = data['IsActiveMember']

In [8]:
data=data.drop(['RowNumber','CustomerId','Surname','CreditScore','Geography','Gender','Age','Tenure','Balance',
            'NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary','Exited']
             ,axis=1)

In [9]:
data.head()

Unnamed: 0,credit_score,age,balance,estimated_salary,has_credit_card,gender_code,geography_code,is_active_member
0,6.19,4.2,0.0,10.134888,1,0,0,1
1,6.08,4.1,8.380786,11.254258,0,0,2,1
2,5.02,4.2,15.96608,11.393157,1,0,0,0
3,6.99,3.9,0.0,9.382663,0,0,0,0
4,8.5,4.3,12.551082,7.90841,1,0,2,1


**Dimensiones del dataset**

In [10]:
data.shape

(10000, 8)

**Split train/test dataset**

##### x = age, balance, estimated_salary, has_credit_card, gender_code, geography_code, is_active_member
##### y = credit_score

In [19]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(np.array(data.iloc[:,1:6]), np.array(data.iloc[:,0]), 
                                                                            test_size=0.3, random_state=0)
y_train = y_train.reshape(7000,1)

In [23]:
def relu(x):
    return np.maximum(x, 0)

def relu_derivative(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

def sigmoid(x):
    s = 1/(1+np.exp(-x)) 
    return s

def sigmoid_derivative(x):
    s = sigmoid(x)
    ds = s*(1-s)
    return ds

**Definición de pesos**

In [12]:
w1 = np.random.normal(0.0, 0.1, (5,5))
w2 = np.random.normal(0.0, 0.1, (5,5))
w3 = np.random.normal(0.0, 0.15, (5,5))
w4 = np.random.normal(0.0, 0.15, (5,1))

**Funciones de forward-propagation y back-propagation**

In [13]:
def forward_propagation(x, w1, w2, w3, w4):
    
    input_H1 = np.matmul(x,w1)
    output_H1 = relu(input_H1)
    
    input_H2 = np.matmul(output_H1, w2)
    output_H2 = relu(input_H2)
    
    input_H3 = np.matmul(output_H2, w3)
    output_H3 = relu(input_H3)
    
    output = np.matmul(output_H3, w4)    
    
    return output_H1, output_H2, output_H3, output

def back_propagation(output_H1, output_H2, output_H3, output, y):
    
    error = y - output
    
    delta = error * relu_derivative(output)
    
    error_H3 = np.matmul(delta,w4.T)
    delta_H3 = error_H3 * relu_derivative(output_H3)
    
    error_H2 = np.matmul(delta_H3,w3.T)
    delta_H2 = error_H2 * relu_derivative(output_H2)
    
    error_H1 = np.matmul(delta_H2,w2.T)
    delta_H1 = error_H1 * relu_derivative(output_H1)
    
    return error, delta, delta_H3, delta_H2, delta_H1

**Función de entreno**

In [None]:
def fit (lr, epochs):
    costs = []
    global w1
    global w2
    global w3
    global w4
    
    for i in range(epochs):
        output_H1, output_H2, output_H3, output = forward_propagation(x_train, w1, w2, w3, w4)
        error, delta, delta_H3, delta_H2, delta_H1 = back_propagation(output_H1, output_H2, output_H3, output, y_train)
    
        w4 += (np.matmul(output.T,delta)*lr)
        w3 += (np.matmul(output_H3.T,delta_H3)*lr)
        w2 += (np.matmul(output_H2.T,delta_H2)*lr)
        w1 += (np.matmul(output_H1.T,delta_H1)*lr)

        c = np.mean(error)
        costs.append(c)
    
        if i % 1000 == 0:
            print(f"Iteración: {i}. Error: {c}")

    print("Entreno completo!!")

**Entrenamiento**

In [None]:
fit(0.00000001,15000)

Iteración: 0. Error: 6.504998804005659
Iteración: 1000. Error: 6.485586675103829
Iteración: 2000. Error: 6.327725355225311
Iteración: 3000. Error: 2.18326045838197
Iteración: 4000. Error: 0.1910262228506942
Iteración: 5000. Error: 0.1593026106152336
Iteración: 6000. Error: 0.14021024663970022
Iteración: 7000. Error: 0.11764302136890264
Iteración: 8000. Error: 0.09659205482772329
Iteración: 9000. Error: 0.07685546520484336
Iteración: 10000. Error: 0.061626801090084164
Iteración: 11000. Error: 0.04988725153422113
Iteración: 12000. Error: 0.043529590782010685
Iteración: 13000. Error: 0.037446905819373634
Iteración: 14000. Error: 0.03298244039531779
Entreno completo!!


**Experimento**

In [34]:
def predict(x):
    prediction = forward_propagation(x, w1, w2, w3, w4)
    return prediction[0]

In [38]:
x = np.array([4.2, 0, 10.1, 1, 0])
x = x.reshape(1,5)
print("Credit score: "+str(predict(x)[0][0]))

Credit score: 0.756019799633828
