# Reto 01

**Dr. Antonio Arista Jalife.**

En este reto debes modificar los datos de entrada para mejorar el desempeño. Trata de reducir el MSE lo mas que puedas y mantén un reporte de tus resultados para saber que funcionó mejor.

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
housing = fetch_california_housing()
housingDataFrame = pd.DataFrame(housing.data, columns=housing.feature_names)
housingDataFrame["MedHouseVal"] = housing.target

In [None]:
#Elige los datos que irán de entrada:
X = housingDataFrame[['MedInc','AveBedrms','Population']]
Y = housingDataFrame['MedHouseVal']

In [None]:
#Particion de datos de entrenamiento y prueba
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.4, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(12384, 3)
(8256, 3)
(12384,)
(8256,)


**Entrenamiento y predicciones:**

Trataremos de predecir que tal funciona el regresor lineal.

In [None]:
lin_model = LinearRegression()
lin_model.fit(X_train, Y_train)

In [None]:
y_train_predict = lin_model.predict(X_train)
MSE = mean_squared_error(Y_train,y_train_predict)
print("Entrenamiento: MSE ="+str(MSE))

y_test_predict = lin_model.predict(X_test)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE ="+str(MSE))

df_predicciones = pd.DataFrame({'valor_real':Y_test, 'prediccion':y_test_predict})
df_predicciones = df_predicciones.reset_index(drop = True)
df_predicciones.head(10)

Entrenamiento: MSE =0.6952041220007095
Pruebas: MSE =0.7074635191484535


Unnamed: 0,valor_real,prediccion
0,0.477,1.146625
1,0.458,1.497797
2,5.00001,1.902644
3,2.186,2.838498
4,2.78,2.012543
5,1.587,2.390518
6,1.982,2.592959
7,1.575,1.999598
8,3.4,2.46506
9,4.466,3.855552


# Reto 02

**Dr. Antonio Arista Jalife.**

En este reto deberás modificar los grados del polinomio y revisar como afecta eso el MSE

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

In [None]:
housing = fetch_california_housing()
housingDataFrame = pd.DataFrame(housing.data, columns = housing.feature_names)
housingDataFrame['MedHouseVal'] = housing.target
X = housingDataFrame[['MedInc', 'AveBedrms']]
Y = housingDataFrame['MedHouseVal']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=5)

**Ejemplo de polinomio de grado 2**

Ahora probaremos con un polinomio de grado 2

In [None]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=2)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)

poly_model.fit(Xpolytrain, Y_train)
Y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train, y_train_predict)
print("Entrenamiento: MSE= "+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE= "+str(MSE))

df_predicciones = pd.DataFrame({'Valor_real': Y_test, 'prediccion': y_test_predict, 'diferencia':Y_test - y_test_predict})
df_predicciones = df_predicciones.reset_index(drop=True)
df_predicciones.head(10)

Entrenamiento: MSE= 1.9577493060513906
Pruebas: MSE= 0.6982901767917583


Unnamed: 0,Valor_real,prediccion,diferencia
0,0.936,1.576446,-0.640446
1,1.536,1.977706,-0.441706
2,1.325,1.523437,-0.198437
3,1.479,1.540762,-0.061762
4,1.207,1.494667,-0.287667
5,1.155,2.509804,-1.354804
6,1.633,1.63368,-0.00068
7,1.603,1.726007,-0.123007
8,3.406,3.082113,0.323887
9,5.00001,3.688743,1.311267


In [None]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=3)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)

poly_model.fit(Xpolytrain, Y_train)
Y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train, y_train_predict)
print("Entrenamiento: MSE= "+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE= "+str(MSE))

df_predicciones = pd.DataFrame({'Valor_real': Y_test, 'prediccion': y_test_predict, 'diferencia':Y_test - y_test_predict})
df_predicciones = df_predicciones.reset_index(drop=True)
df_predicciones.head(10)

Entrenamiento: MSE= 1.9577493060513906
Pruebas: MSE= 0.6876549545668629


Unnamed: 0,Valor_real,prediccion,diferencia
0,0.936,1.54251,-0.60651
1,1.536,1.904081,-0.368081
2,1.325,1.019615,0.305385
3,1.479,1.513915,-0.034915
4,1.207,1.476283,-0.269283
5,1.155,2.467328,-1.312328
6,1.633,1.590204,0.042796
7,1.603,1.670807,-0.067807
8,3.406,3.142898,0.263102
9,5.00001,3.892193,1.107817


In [None]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=4)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)

poly_model.fit(Xpolytrain, Y_train)
Y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train, y_train_predict)
print("Entrenamiento: MSE= "+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE= "+str(MSE))

df_predicciones = pd.DataFrame({'Valor_real': Y_test, 'prediccion': y_test_predict, 'diferencia':Y_test - y_test_predict})
df_predicciones = df_predicciones.reset_index(drop=True)
df_predicciones.head(10)

Entrenamiento: MSE= 1.9577493060513906
Pruebas: MSE= 0.7008601334810598


Unnamed: 0,Valor_real,prediccion,diferencia
0,0.936,1.52854,-0.59254
1,1.536,1.897501,-0.361501
2,1.325,0.66351,0.66149
3,1.479,1.507006,-0.028006
4,1.207,1.470119,-0.263119
5,1.155,2.47377,-1.31877
6,1.633,1.577534,0.055466
7,1.603,1.661098,-0.058098
8,3.406,3.144228,0.261772
9,5.00001,3.875875,1.124135


In [None]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=5)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)

poly_model.fit(Xpolytrain, Y_train)
Y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train, y_train_predict)
print("Entrenamiento: MSE= "+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE= "+str(MSE))

df_predicciones = pd.DataFrame({'Valor_real': Y_test, 'prediccion': y_test_predict, 'diferencia':Y_test - y_test_predict})
df_predicciones = df_predicciones.reset_index(drop=True)
df_predicciones.head(10)

Entrenamiento: MSE= 1.9577493060513906
Pruebas: MSE= 0.6927922956027325


Unnamed: 0,Valor_real,prediccion,diferencia
0,0.936,1.528889,-0.592889
1,1.536,1.892781,-0.356781
2,1.325,2.301252,-0.976252
3,1.479,1.509427,-0.030427
4,1.207,1.470623,-0.263623
5,1.155,2.470173,-1.315173
6,1.633,1.576515,0.056485
7,1.603,1.661484,-0.058484
8,3.406,3.134368,0.271632
9,5.00001,3.879676,1.120334
