In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

<h1>Carga de datos</h1>

In [2]:
dataframe = pd.read_csv("./dataset/housing.csv",header=0,delimiter=",")

In [3]:
print(dataframe.dtypes)

RM         float64
LSTAT      float64
PTRATIO    float64
MEDV       float64
dtype: object


In [4]:
x=dataframe.iloc[:,0:3]
y=dataframe.iloc[:,3]
print(y)

0      504000.0
1      453600.0
2      728700.0
3      701400.0
4      760200.0
         ...   
484    470400.0
485    432600.0
486    501900.0
487    462000.0
488    249900.0
Name: MEDV, Length: 489, dtype: float64


In [5]:
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [6]:
desviacion_tipica = dataframe['RM'].std()
print(desviacion_tipica)

0.6436497627572433


In [7]:
media = dataframe['RM'].mean()
print(media)

6.240288343558283


In [8]:
desviacion_tipica = dataframe['MEDV'].std()
print(desviacion_tipica)

165340.27765266784


In [9]:
media = dataframe['MEDV'].mean()
print(media)

454342.9447852761


In [10]:
dataframe_resultados=pd.DataFrame(columns=["Modelo","MSE","R^2","EMA"])

In [11]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


<h1>Hold out con regresion lineal</h1>

In [12]:
model = LinearRegression()

In [13]:
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)

In [15]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["Regresion lineal",mse,r2,ema]

Error cuadrático medio: 6789025559.265889
Coeficiente de determinación (R2): 0.6910934003098512


<h1>Hold out con regresion Ridge</h1>

In [16]:
from sklearn.linear_model import Ridge

In [17]:
ridge_regressor = Ridge(alpha=1.0)  


In [18]:
ridge_regressor.fit(X_train, y_train)

In [19]:
y_pred = ridge_regressor.predict(X_test)

In [20]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["Regresion ridge",mse,r2,ema]

Error cuadrático medio: 6791921929.524185
Coeficiente de determinación (R2): 0.6909616129303335


<h1>Hold out con SGD Regresor</h1>

In [21]:
from sklearn.linear_model import SGDRegressor

In [22]:
sgd_regressor = SGDRegressor(max_iter=1000, random_state=42)


In [23]:
sgd_regressor.fit(X_train, y_train)

In [24]:
y_pred = sgd_regressor.predict(X_test)

In [25]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["SGD Regresor",mse,r2,ema]

Error cuadrático medio: 6784427659.198819
Coeficiente de determinación (R2): 0.6913026087835908


<h1>Hold out con SVR</h1>

In [26]:
from sklearn.svm import SVR

In [27]:
model = SVR(kernel='poly')

In [28]:
model.fit(X_train, y_train)

In [29]:
y_pred = model.predict(X_test)

In [30]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["SVR",mse,r2,ema]

Error cuadrático medio: 22309953707.180016
Coeficiente de determinación (R2): -0.01512240287909905


<h1>Hold out con arbol de decision</h1>

In [31]:
from sklearn.tree import DecisionTreeRegressor

In [32]:
model = DecisionTreeRegressor(random_state=42)

In [33]:
model.fit(X_train, y_train)

In [34]:
y_pred = model.predict(X_test)

In [35]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["Arbol de decision",mse,r2,ema]

Error cuadrático medio: 6396165000.0
Coeficiente de determinación (R2): 0.7089688992980032


<h1>Hold out con red neuronal</h1>

In [36]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, r2_score

In [37]:
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

In [38]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [39]:
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x22f9cda8590>

In [40]:
y_pred = model.predict(X_test)



In [41]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["Red neuronal",mse,r2,ema]

Error cuadrático medio: 183882863772.8977
Coeficiente de determinación (R2): -7.366831100207975


<h1>Hold out con MLP Regresor</h1>

In [42]:
from sklearn.neural_network import MLPRegressor

In [43]:
model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)


In [44]:
model.fit(X_train, y_train)



In [45]:
y_pred = model.predict(X_test)

In [46]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ema = mean_absolute_error(y_test, y_pred)
print("Error cuadrático medio:", mse)
print("Coeficiente de determinación (R2):", r2)
dataframe_resultados.loc[len(dataframe_resultados)]=["MLP Regresor",mse,r2,ema]

Error cuadrático medio: 99532507185.61554
Coeficiente de determinación (R2): -3.5288161143214865


<h1>Resultado final</h1>

In [47]:
print(dataframe_resultados)

              Modelo           MSE       R^2            EMA
0   Regresion lineal  6.789026e+09  0.691093   64277.288657
1    Regresion ridge  6.791922e+09  0.690962   64274.423031
2       SGD Regresor  6.784428e+09  0.691303   64031.613524
3                SVR  2.230995e+10 -0.015122  108953.717021
4  Arbol de decision  6.396165e+09  0.708969   61650.000000
5       Red neuronal  1.838829e+11 -7.366831  404991.427665
6       MLP Regresor  9.953251e+10 -3.528816  299236.798187


In [48]:
dataframe_resultados=dataframe_resultados[dataframe_resultados["R^2"]>0]
print(dataframe_resultados)

              Modelo           MSE       R^2           EMA
0   Regresion lineal  6.789026e+09  0.691093  64277.288657
1    Regresion ridge  6.791922e+09  0.690962  64274.423031
2       SGD Regresor  6.784428e+09  0.691303  64031.613524
4  Arbol de decision  6.396165e+09  0.708969  61650.000000


In [49]:
fila_con_mayor_valor = dataframe_resultados.loc[dataframe_resultados['R^2'].idxmax()]
print(fila_con_mayor_valor["Modelo"])

Arbol de decision
