In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

# Exploracion y breve descripcion del dataset:

Contexto: El concreto es el material más importante en la ingeniería civil. La resistencia a la compresión del concreto es una función altamente no lineal de la edad y los ingredientes que lo componen. Estos ingredientes incluyen cemento, escoria de alto horno, cenizas volantes,
agua, superplastificante, agregado grueso y agregado fino.

Por tanto, se buscará encontrar un modelo para la resistencia a la compresion del concreto, en funcion de sus ingredientes (6) y la edad. Por tanto tendremos un dataset con 7 variables de entrada y una de salida. El problema que el modelo busca predecir es básicamente un modelo de regresión.

In [3]:
dataset = pd.read_excel('/content/drive/My Drive/MLII/MLP/Concrete_Data.xls', index_col=0)
dataset.shape
list(dataset.columns)

['Blast_Furnace_Slag',
 'Fly_Ash',
 'Water',
 'Superplasticizer',
 'Coarse_Aggregate',
 'Fine_Aggregate',
 'Age',
 'Concrete_compressive_strength']

In [4]:
dataset.head(10)

Unnamed: 0_level_0,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age,Concrete_compressive_strength
Cement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075
266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.029847
380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.698299
380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.44777
266.0,114.0,0.0,228.0,0.0,932.0,670.0,28,45.854291
475.0,0.0,0.0,228.0,0.0,932.0,594.0,28,39.28979


# Preprocesamiento de la data.

Dado que se esta trabajando con un modelo profundo, no se tiene necesidad de Feature Engeneering, por lo que no se realizara una combinacion de las mismas.

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 1030 entries, 540.0 to 260.9
Data columns (total 8 columns):
Blast_Furnace_Slag               1030 non-null float64
Fly_Ash                          1030 non-null float64
Water                            1030 non-null float64
Superplasticizer                 1030 non-null float64
Coarse_Aggregate                 1030 non-null float64
Fine_Aggregate                   1030 non-null float64
Age                              1030 non-null int64
Concrete_compressive_strength    1030 non-null float64
dtypes: float64(7), int64(1)
memory usage: 72.4 KB


* Dado que es un modelo profundo no existe la necesidad feature engineering a las variables de entrada.
* No se cuentan con valores nulos en el dataset.
* No  se tienen variables categoricas en el dataset por lo que no hay que aplicar one-hot encoding. 



**Escalamiento/normalizacion de las features: Aplicacion de la formula: x' = (x - xmin)/(xmax - xmin)**



In [6]:
scaling = preprocessing.MinMaxScaler()
scaledMatrix = scaling.fit_transform(dataset)
df_normalized = pd.DataFrame(scaledMatrix,columns=dataset.columns)
df_normalized.head(5)

Unnamed: 0,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age,Concrete_compressive_strength
0,0.0,0.0,0.321357,0.07764,0.694767,0.20572,0.074176,0.967445
1,0.0,0.0,0.321357,0.07764,0.738372,0.20572,0.074176,0.741964
2,0.396494,0.0,0.848303,0.0,0.380814,0.0,0.739011,0.472642
3,0.396494,0.0,0.848303,0.0,0.380814,0.0,1.0,0.4824
4,0.368392,0.0,0.560878,0.0,0.515698,0.580783,0.986264,0.522806


**Separacion del dataset entre los X y Y del modelo**

In [0]:
X = df_normalized[['Blast_Furnace_Slag', 'Fly_Ash', 'Water', 'Superplasticizer', 'Coarse_Aggregate', 'Fine_Aggregate', 'Age',]]
y = df_normalized[['Concrete_compressive_strength']]

In [8]:
X[10:20]

Unnamed: 0,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age
10,0.368392,0.0,0.560878,0.0,0.515698,0.580783,0.244505
11,0.368392,0.0,0.560878,0.0,0.515698,0.580783,0.074176
12,0.132165,0.0,0.848303,0.0,0.380814,0.0,0.739011
13,0.528659,0.0,0.848303,0.0,0.380814,0.190667,0.244505
14,0.211464,0.0,0.848303,0.0,0.380814,0.190667,0.074176
15,0.0,0.0,0.848303,0.0,0.380814,0.190667,0.244505
16,0.582638,0.0,0.560878,0.0,0.715116,0.534119,0.244505
17,0.105732,0.0,0.848303,0.0,0.380814,0.190667,1.0
18,0.264329,0.0,0.848303,0.0,0.380814,0.0,0.244505
19,0.0,0.0,0.848303,0.0,0.380814,0.0,0.491758


In [9]:
y[10:20]

Unnamed: 0,Concrete_compressive_strength
10,0.445292
11,0.320054
12,0.50682
13,0.498273
14,0.566631
15,0.6301
16,0.461286
17,0.670386
18,0.476301
19,0.501933


**Cross validations and k-folds**

Procedemos a realizar el split de la data en Train/Test, se tomaran un 15% de datos para el Test, y un 85% de datos para el Train. Tambien utilizamos crossValidation k folds.

Generamos tres conjuntos e training sets

In [0]:
def crossValidKfolds(X,y,k=3,itTest = 3):
    if itTest>k:
        return None
    elif itTest == k:
        return (X[(X.shape[0]//k)*(itTest-1):],   #xTest             
                y[(y.shape[0]//k)*(itTest-1):],   #yTest
                X[:(X.shape[0]//k)*(itTest-1)],   #xTrain
                y[:(y.shape[0]//k)*(itTest-1)])   #yTrain
    elif itTest == 1:
        return (X[(X.shape[0]//k)*(itTest-1): (X.shape[0]//k)*(itTest)],                
                y[(y.shape[0]//k)*(itTest-1): (y.shape[0]//k)*(itTest)],
                X[(X.shape[0]//k)*(itTest):],
                y[(y.shape[0]//k)*(itTest):])
    else:
        firstX = X[:(X.shape[0]//k)*(itTest-1)]
        secndX = X[(X.shape[0]//k)*(itTest):]
        firstY = y[:(y.shape[0]//k)*(itTest-1)]
        secndY = y[(y.shape[0]//k)*(itTest):]
        return (X[(X.shape[0]//k)*(itTest-1): (X.shape[0]//k)*(itTest)],                
                y[(y.shape[0]//k)*(itTest-1): (y.shape[0]//k)*(itTest)],
                pd.concat([firstX, secndX]),
                pd.concat([firstY, secndY])) 

In [0]:
porcentajeTest = 0.15

In [0]:
#Devolvemos el segmento 2 de 6 (int(1/0.15))
xTest01,yTest01,xTrain01,yTrain01 = crossValidKfolds(X,y,k=int(1/porcentajeTest),itTest = 2)
#Devolvemos el segmento 4 de 6 (int(1/0.15))
xTest02,yTest02,xTrain02,yTrain02 = crossValidKfolds(X,y,k=int(1/porcentajeTest),itTest = 4)
#Devolvemos el segmento 6 de 6 (int(1/0.15))
xTest03,yTest03,xTrain03,yTrain03 = crossValidKfolds(X,y,k=int(1/porcentajeTest),itTest = 6)

**Agregando el Bias a cada set de entrenamiento**

In [0]:
#xTrain01['Bias'] = 1
#xTrain02['Bias'] = 1
#xTrain03['Bias'] = 1

In [14]:
print(xTest01.shape)
xTest01.head()

(171, 7)


Unnamed: 0,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age
171,0.0,0.0,0.16008,0.875776,0.148547,0.751882,0.247253
172,0.295771,0.0,0.253493,0.512422,0.148547,0.735324,0.247253
173,0.591263,0.0,0.271058,0.444099,0.148547,0.718515,0.247253
174,0.263495,0.0,0.20479,0.354037,0.423837,0.647516,0.247253
175,0.525876,0.0,0.344511,0.360248,0.417733,0.405921,0.247253


In [15]:
print(yTest01.shape)
yTest01.head()

(171, 1)


Unnamed: 0,Concrete_compressive_strength
171,0.708464
172,0.783195
173,0.819358
174,0.911526
175,0.958855


In [16]:
print(xTrain01.shape)
xTrain01.head()

(859, 8)


Unnamed: 0,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age,Bias
0,0.0,0.0,0.321357,0.07764,0.694767,0.20572,0.074176,1
1,0.0,0.0,0.321357,0.07764,0.738372,0.20572,0.074176,1
2,0.396494,0.0,0.848303,0.0,0.380814,0.0,0.739011,1
3,0.396494,0.0,0.848303,0.0,0.380814,0.0,1.0,1
4,0.368392,0.0,0.560878,0.0,0.515698,0.580783,0.986264,1


In [17]:
print(yTrain01.shape)
yTrain01.head()

(859, 1)


Unnamed: 0,Concrete_compressive_strength
0,0.967445
1,0.741964
2,0.472642
3,0.4824
4,0.522806


# Arquitectura de la red

* Funciones de activacion:

Para la neurona de salida, como funcion de activacion, se utilizará la funcion ReLu, dada que la data ha sido normalizada, tendrá unicamente valores entre 0 y 1.

* Funcion de costo:

Dada la regresion, se utilizará la clásica funcion de costo MSE.

* Input y Output layer:

Tenemos 7 features, por lo que tendremos 7 parametros de entrada. Dado que buscamos resolver un problema de regresion, utilizaremos unicamente una capa de salida. 

* Hidden Layer(s):

Se partira de un modelo de dos capas ocultas, cada capa con cinco neuronas. A partir de esto, se iniciará la experimentación. Como función de activación para cada capa, se utilizará le función ReLu, aunque tambien se experimentará con la función Leaky ReLu.

* Optimizadores:  Se utilizara el optimizador Nesterov y el Adam.

* Se utilizará early stopping, deteniendo el entrenamiento cuando el error comience a aumentar.

* Se utilizará dropout en caso de ser necesario, para combatir el overfitting de la red.

* Como numero de neuronas en cada capa interna, se utilizaran 5 neuronas.

* Como numero de capas internas, el modelo utilizará dos capas.

*Los hiper parametros de numero de capas y numero de neuronas en cada capa oculta, podrán variar dependiendo de si tenemos overfitting o underfitting.

Se aplicara mini batch gradient descent con un batch de 64, y a partir de ahi se harán pruebas.

### Creacion del grafo:

Semantica de las matrices de pesos:
* Filas: Representan la cantidad e parametros entrenables (incluyendo el bias)
* Columnas: Representan la cantidad de neuronas.

In [0]:
def batchsizeDivision(x=xTrain01,y=yTrain01,batchSize=128):
  batchesListX = []
  batchesListY = []
  for i in range((x.shape[0]//batchSize)+1):
    if i>=(x.shape[0]/batchSize):
      batchesListX.append(x[i*batchSize:])
      batchesListY.append(y[i*batchSize:])
    else:
      batchesListX.append(x[i*batchSize:(i+1)*batchSize])
      batchesListY.append(y[i*batchSize:(i+1)*batchSize])
  return (batchesListX,batchesListY)

In [0]:
MLPgraph = tf.Graph()
with MLPgraph.as_default():
  #Carga de la data:
  with tf.name_scope('pureData') as scope:

    #Inicializando matrices de pesos (Xavier inicialization)
    w01 = tf.get_variable(name= "W1", shape=[8, 5],
           initializer=tf.contrib.layers.xavier_initializer())
    w02 = tf.get_variable(name= "W2", shape=[6, 5],
           initializer=tf.contrib.layers.xavier_initializer())
    w03 = tf.get_variable(name= "W3", shape=[6, 1],
            initializer=tf.contrib.layers.xavier_initializer())
    #Aplicando matriz de pesos a capa:
    preX = tf.placeholder(dtype=tf.float32,name='X', shape = [None,7])
    Y = tf.placeholder(dtype=tf.float32,name='Y', shape= [None,1])
    learningRate = tf.placeholder(dtype=tf.float32,name='LR')

    X = tf.pad(tensor=preX,paddings= tf.constant([[0,0],[1,0]]),
               mode='CONSTANT',constant_values=1.0000) #vector de 8x128


  with tf.name_scope('feedFoward') as scope:
    #Aplicando feedFoward - Primera capa oculta
    a01 = tf.nn.relu(tf.matmul(X,w01))
    A01 = tf.pad(tensor=a01,paddings= tf.constant([[0,0],[1,0]]),
               mode='CONSTANT',constant_values=1.0000) 
    #Aplicando feedFoward - Segunda capa oculta
    a02 = tf.nn.relu(tf.matmul(A01,w02))
    A02 = tf.pad(tensor=a02,paddings= tf.constant([[0,0],[1,0]]),
                 mode='CONSTANT',constant_values=1.0000) 
    #Aplicando feedForward - Tercera capa oculta
    a03 = tf.nn.relu(tf.matmul(A02,w03))   
  
  with tf.name_scope('costFunctionMSE') as scope:
    costFunction = 0.5*tf.reduce_mean(tf.pow((Y-a03),2))

  with tf.name_scope('gradient_optimizer') as scope:
    optimizador =  tf.train.AdamOptimizer(learningRate).minimize(costFunction)
    #optimizer = tf.train.AdamOptimizer(learning_rate).minimize(costFunction,var_list=[w01,w02,w03])

In [0]:
def trainingFunction(x,y,lr,epoch,showError):
  tf.reset_default_graph()
  with tf.Session(graph = MLPgraph) as session:
    init=tf.global_variables_initializer().run() 
    for i in range(epoch):
      total,W1,W2, W3,_ = session.run([costFunction, w01,w02,w03,optimizador],feed_dict = {X:x,Y:y,learningRate:lr})
      if i%showError == 0:
        print('Epoch: '+str(i)+' error: '+str(total))

In [79]:
trainingFunction(x=xTrain01,y=yTrain01,lr=0.00000001,epoch=10000,showError=1000)

Epoch: 0 error: 0.09569114
Epoch: 1000 error: 0.0956856
Epoch: 2000 error: 0.095680065
Epoch: 3000 error: 0.09567452
Epoch: 4000 error: 0.09566899
Epoch: 5000 error: 0.09566344
Epoch: 6000 error: 0.095657915
Epoch: 7000 error: 0.09565235
Epoch: 8000 error: 0.095646836
Epoch: 9000 error: 0.09564131
