# **Criando um modelo de regressão no Keras**




## Instalação e Importações

In [1]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error


## Baixar e Avaliar Conjunto de Dados


In [2]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

# Separação de Preditores e Alvo.

Os preditores contêm todas as características ou variáveis independentes do conjunto de dados, exceto a variável que estamos interessados em prever (Força). O alvo contém apenas a variável que queremos prever (Força). Essa separação é essencial para treinar um modelo de aprendizado de máquina, onde os preditores são usados para aprender padrões que ajudam a prever a variável alvo.

In [6]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [7]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [8]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Divisão de Treinamento e Teste

In [9]:
from sklearn.model_selection import train_test_split

# Splitting the data into training and testing, with 30% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

# Checking the size of datasets
print(f'Training set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')


Training set size: 721
Test set size: 309


## Importando Keras


In [10]:
import keras

In [11]:
from keras.models import Sequential
from keras.layers import Dense

<a id='item33'></a>


## A. Construindo um Modelo Básico


In [1]:
def regression_model():
    #define model type
    model = Sequential()
    
     #define layers
    model.add(Dense(10, activation='relu', input_shape=(predictors.shape[1],)))
    model.add(Dense(1))
    
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model







# Treinando a Rede Neural e Fazendo Previsões

In [13]:
#build the model
model_a = regression_model()

#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_a.fit(X_train, y_train, epochs=50, verbose=0)
    y_prediction = model_a.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction))  
#show array of mean_squared_errors
mses

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

array([87.08731074, 72.43760341, 57.01759305, 56.692045  , 50.17074653,
       48.33100754, 49.6020875 , 47.48835568, 45.69357929, 45.58869506,
       45.24641059, 46.39132095, 44.67391515, 43.92879307, 47.75771962,
       45.37913358, 46.82390845, 46.90638259, 43.37429693, 41.9794084 ,
       42.18320174, 41.42085776, 44.17950841, 41.56103368, 49.0568413 ,
       44.80011376, 41.73415156, 41.63830381, 41.13996141, 40.88068131,
       41.56207627, 42.59243247, 40.7708029 , 41.46848542, 40.93489386,
       41.1735887 , 40.89482792, 42.16700043, 42.24432186, 42.17058033,
       42.89055912, 42.51600706, 42.48425367, 41.61327854, 44.44317002,
       41.22174237, 46.83611713, 41.5467352 , 41.93030906])

## Média do Erro e Desvio Padrão

In [14]:
a_std = mses.std()
a_mean = mses.mean()
print("Error Mean: " + str(a_mean) + "\n"
     + "Standard Deviation: " + str(a_std))

Error Mean: 45.767880615413155
Standard Deviation: 8.034459588693606


# B. Normalizar os Dados

In [15]:
#normalize predictors
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


## Divisão de Treinamento e Teste dos Dados Normalizados

In [16]:
from sklearn.model_selection import train_test_split

# Splitting the data into training and testing, with 30% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

# Checking the size of datasets
print(f'Training set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')

Training set size: 721
Test set size: 309


In [17]:
n_cols = predictors_norm.shape[1] # number of predictors

##  Construindo a Rede Neural

In [18]:
model_b = regression_model()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Treinando a Rede Neural e Fazendo Previsões

In [19]:
#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_b.fit(X_train, y_train, epochs=50, verbose=0)
    y_prediction = model_b.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) #add the mean_squared_error on the mses list
    
#show array of mean_squared_errors
mses

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

array([161.76882358, 111.69762636, 108.80492518, 123.96041488,
       116.32244155, 109.30146542, 112.75034201, 107.40800867,
       107.84196636, 108.20871333, 110.21111076, 117.04937022,
       113.86127731, 107.73708767, 109.93455103, 135.4604344 ,
       111.54559269, 126.13854823, 109.05778846, 108.45418843,
       109.54716751, 109.12441409, 116.76312038, 111.71115395,
       108.13152731, 116.02111733, 108.95658658, 113.065497  ,
       109.94426662, 124.46773068, 115.08428346, 117.93786238,
       107.78552219, 110.27440036, 109.16903813, 107.01137889,
       125.84054902, 108.27489127, 128.22909507, 110.58707693,
       108.4913129 , 122.41287677, 108.34297239, 111.95782799,
       108.86250365, 116.97232109, 122.52718428, 116.14965352,
       121.99945261])

## Média do Erro e Desvio Padrão

In [20]:
b_std = mses.std()
b_mean = mses.mean()
print("Error Mean: " + str(b_mean) + "\n"
     + "Standard Deviation: " + str(b_std))

Error Mean: 114.75831552872192
Standard Deviation: 9.440775137566636


## Média dos Erros Quadráticos Médios Comparados aos do Passo A.

In [21]:
print("Mean of Step A: " + str(a_mean) + "\n" 
      + "Mean of Step B: " + str(b_mean) + "\n" 
      + "Difference: " + str(a_mean - b_mean))

Mean of Step A: 45.767880615413155
Mean of Step B: 114.75831552872192
Difference: -68.99043491330877


**Normalizing the data, in this case, is more accurate**

# C. Aumentar o Número de Épocas para 100


In [22]:
model_c = regression_model()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Treinando e Fazendo Previsões com o Modelo


In [None]:
#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_c.fit(X_train, y_train, epochs=100, verbose=0)
    y_prediction = model_c.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) #add the mean_squared_error on the mses list
    
#show array of mean_squared_errors
mses

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━

## Média do Erro e Desvio Padrão

In [None]:
c_std = mses.std()
c_mean = mses.mean()
print("Error Mean: " + str(c_mean) + "\n"
     + "Standard Deviation: " + str(c_std))

## Média dos Erros Quadráticos Médios Comparados aos do Passo B

In [None]:
print("Error Mean of Step B: " + str(b_mean) + "\n" 
      + "Error Mean of Step C: " + str(c_mean) + "\n" 
      + "Difference: " + str(b_mean - c_mean))

**Aumentar o número de épocas, neste caso, é mais preciso**

# D. Aumentar o Número de Camadas Ocultas (3 Camadas)

In [None]:
def regression_model_d():
    # Define model type
    model = Sequential()
    
    # Define layers
    model.add(Dense(10, activation='relu', input_shape=(predictors_norm.shape[1],)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # Compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model






## Construir o Modelo


In [None]:
model_d = regression_model_d()

## Treinando a Rede Neural e Fazendo Previsões

In [None]:
# Empty list for mean squared errors
mses = np.array([])

# Fit the model (50 times)
for i in range(1, 50):
    model_d.fit(X_train, y_train, epochs=100, verbose=0)
    y_prediction = model_d.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) # Add the mean_squared_error to the mses list
    
# Show array of mean_squared_errors
print(mses)# Create an instance of the model


## Média do Erro e Desvio Padrão

In [None]:
d_std = mses.std()
d_mean = mses.mean()
print("Error Mean: " + str(d_mean) + "\n"
     + "Standard Deviation: " + str(d_std))

##  Média dos Erros Quadráticos Médios Comparados aos do Passo C

In [None]:
print("Error Mean of Step C: " + str(c_mean) + "\n" 
      + "Error Mean of Step D: " + str(d_mean) + "\n" 
      + "Difference: " + str(c_mean - d_mean))

**Aumentar o número de camadas ocultas, neste caso, é mais preciso**