In [1]:
import numpy as np
import matplotlib.pyplot as plt 

import pandas as pd  
import seaborn as sns 

from sklearn.model_selection import train_test_split

%matplotlib inline

# Cargando los datos de entrada

In [2]:
#Se carga el conjunto de datos California Housing, cuyo objetivo es predecir el valor promedio de una casa 
#con base en 8 características que tienen que ver con el número de habitaciones promedios de la casa, la antiguedad, 
#el ingreso promedio de los habitantes de la casa, la ubicación, etc.

from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

X = pd.DataFrame(data=housing.data, columns=housing.feature_names)
print(X.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
dtypes: float64(8)
memory usage: 1.3 MB
None


In [3]:
y = housing.target
print(y)

[4.526 3.585 3.521 ... 0.923 0.847 0.894]


In [4]:
X.isnull().sum()

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
dtype: int64

# Escalamiento de los datos de entrada

In [5]:
#Escalamiento de los datos de entrada a 1 y 0

from sklearn.preprocessing import MinMaxScaler

col_names = housing.feature_names
X = MinMaxScaler().fit_transform(X)
X = pd.DataFrame(X, columns = col_names )
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,0.539668,0.784314,0.043512,0.020469,0.008941,0.001499,0.567481,0.211155
1,0.538027,0.392157,0.038224,0.018929,0.06721,0.001141,0.565356,0.212151
2,0.466028,1.0,0.052756,0.02194,0.013818,0.001698,0.564293,0.210159
3,0.354699,1.0,0.035241,0.021929,0.015555,0.001493,0.564293,0.209163
4,0.230776,1.0,0.038534,0.022166,0.015752,0.001198,0.564293,0.209163


# Partición de los datos de entrada en entrenamiento y prueba

In [6]:
#Partición del conjunto de datos en entrenamiento y prueba. Un 30 por ciento de los datos totales se reserva para prueba
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,y,test_size=0.3,random_state=420)

In [7]:
print("Los datos de entrenamiento contienen %d registros " % (len(Xtrain)))
print("Los datos de prueba contienen %d registros " % (len(Xtest)))

Los datos de entrenamiento contienen 14448 registros 
Los datos de prueba contienen 6192 registros 


# Perceptron multicapa

In [8]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

model_MPL = MLPRegressor()

#### Prueba 1

In [9]:
#Prueba 1: modelos con una sola capa oculta
MPL_parameters = [{'hidden_layer_sizes' : [(10), (20), (30), (50)], 'max_iter':[750, 1000], 
                   'alpha': [0.001, 0.01, 0.1]}]

model_MPL = MLPRegressor()
grid_MPL1 = GridSearchCV(model_MPL, MPL_parameters, cv=5, scoring='neg_mean_squared_error')
grid_result_MPL1 = grid_MPL1.fit(Xtrain, Ytrain)
print(grid_result_MPL1.best_params_)

{'alpha': 0.001, 'hidden_layer_sizes': 50, 'max_iter': 1000}


In [14]:
Ytest_MLP1_predict = grid_result_MPL1.predict(Xtest)
mse = mean_squared_error(Ytest, Ytest_MLP1_predict)
r2 = r2_score(Ytest, Ytest_MLP1_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

Evaluacion sobre el conjunto de prueba:  0.3535635227761741
The model performance for testing set
--------------------------------------
MSE is 0.5946120775565983
R2 score is 0.7365207586439875


#### Prueba 2

In [16]:
#prueba 2
MPL_parameters = [{'hidden_layer_sizes' : [(50), (25, 25), (50,50)], 
                   'max_iter':[750, 1000, 1500], 
                   'alpha': [0.001, 0.01]}]

grid_MPL2 = GridSearchCV(model_MPL, MPL_parameters, cv=5, scoring='neg_mean_squared_error')
grid_result_MPL2 = grid_MPL2.fit(Xtrain, Ytrain) 
print(grid_result_MPL2.best_params_)

{'alpha': 0.001, 'hidden_layer_sizes': (50, 50), 'max_iter': 1500}


In [17]:
Ytest_MLP2_predict = grid_result_MPL2.predict(Xtest)
rmse = (np.sqrt(mean_squared_error(Ytest, Ytest_MLP2_predict)))
r2 = r2_score(Ytest, Ytest_MLP2_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
RMSE is 0.5623543610855011
R2 score is 0.7643328298969219


#### Prueba 3

In [13]:
#prueba 3
MPL_parameters = [{'hidden_layer_sizes' : [(50, 50, 50), (75, 75, 50), (50,50,50,50)], 
                   'max_iter':[1000, 1500, 2000], 
                   'alpha': [0.001, 0.01]}]

grid_MPL3 = GridSearchCV(model_MPL, MPL_parameters, cv=5, scoring='neg_mean_squared_error')
grid_result_MPL3 = grid_MPL3.fit(Xtrain, Ytrain)

print('best prarams:', grid_MPL3.best_params_)

best prarams: {'alpha': 0.001, 'hidden_layer_sizes': (75, 75, 50), 'max_iter': 1000}


In [16]:
Ytest_MLP3_predict = grid_result_MPL3.predict(Xtest)
rmse = (np.sqrt(mean_squared_error(Ytest, Ytest_MLP3_predict)))
r2 = r2_score(Ytest, Ytest_MLP3_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
RMSE is 0.5404329033739587
R2 score is 0.7823480728743076


# Keras

In [10]:
import tensorflow as tf
from tensorflow import keras

np.random.seed(42)
tf.random.set_seed(42)

In [11]:
Xtrain2, Xvalid, Ytrain2, Yvalid = train_test_split(Xtrain, Ytrain, random_state=42)

print("Los datos de entrenamiento contienen %d registros " % (len(Xtrain2)))
print("Los datos de prueba contienen %d registros " % (len(Xvalid)))

Los datos de entrenamiento contienen 10836 registros 
Los datos de prueba contienen 3612 registros 


In [15]:

model = keras.models.Sequential([
    keras.layers.Dense(75, activation="relu", input_shape = Xtrain.shape[1:]),
    keras.layers.Dense(75, activation="relu"),
    keras.layers.Dense(50, activation="relu"),
    keras.layers.Dense(1)
])
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(Xtrain2, Ytrain2, epochs=100, validation_data=(Xvalid, Yvalid))

mse_test = model.evaluate(Xtest, Ytest)
print('MSE con datos de prueba is {}'.format(mse_test))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
MSE con datos de prueba is 0.4786602854728699


In [21]:
model2 = keras.models.Sequential([
    keras.layers.Dense(80, activation="relu", input_shape = Xtrain.shape[1:]),
    keras.layers.Dense(80, activation="relu"),
    keras.layers.Dense(80, activation="relu"),
    keras.layers.Dense(80, activation="sigmoid"),
    keras.layers.Dense(1)
])
model2.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model2.fit(Xtrain2, Ytrain2, epochs=100, validation_data=(Xvalid, Yvalid))

mse_test = model2.evaluate(Xtest, Ytest)
print('MSE con datos de prueba is {}'.format(mse_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
MSE con datos de prueba is 0.4928724765777588
