# Ejemplo 02
**_Dr. Antonio Arista Jalife._**

En este ejemplo crearemos un sistema de regresión polinomial: Utilizaremos los grados de un polinomio para curvear la línea (o el plano) y evaluar que tal mejora una predicción

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

In [2]:
housing = fetch_california_housing()
housingDataFrame = pd.DataFrame(housing.data, columns = housing.feature_names)
housingDataFrame["MedHouseVal"] = housing.target
X = housingDataFrame[['MedInc', 'AveBedrms']]
Y = housingDataFrame['MedHouseVal']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=5)

### Ejemplo lineal:
Primero probaremos con predicciones lineales

In [3]:
lin_model = LinearRegression()
lin_model.fit(X_train, Y_train)
y_train_predict = lin_model.predict(X_train)
MSE = mean_squared_error(Y_train,y_train_predict)
print("Entrenamiento: MSE ="+str(MSE))

y_test_predict = lin_model.predict(X_test)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE ="+str(MSE))

df_predicciones = pd.DataFrame({'valor_real':Y_test, 'prediccion':y_test_predict, 'diferencia':Y_test-y_test_predict})
df_predicciones = df_predicciones.reset_index(drop = True)
df_predicciones.head(10)

Entrenamiento: MSE =0.697532017688797
Pruebas: MSE =0.7068301704412728


Unnamed: 0,valor_real,prediccion,diferencia
0,0.936,1.586953,-0.650953
1,1.536,1.95222,-0.41622
2,1.325,1.394057,-0.069057
3,1.479,1.555578,-0.076578
4,1.207,1.514277,-0.307277
5,1.155,2.457804,-1.302804
6,1.633,1.638246,-0.005246
7,1.603,1.72192,-0.11892
8,3.406,3.030692,0.375308
9,5.00001,3.682019,1.317991


### Ejemplo de polinomio de grado 2
Ahora probaremos con un polinomio de grado 2

In [4]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=2)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)

poly_model.fit(Xpolytrain, Y_train)
y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train,y_train_predict)
print("Entrenamiento: MSE ="+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE ="+str(MSE))

df_predicciones = pd.DataFrame({'valor_real':Y_test, 'prediccion':y_test_predict, 'diferencia':Y_test-y_test_predict})
df_predicciones = df_predicciones.reset_index(drop = True)
df_predicciones.head(10)

Entrenamiento: MSE =0.6926995352961527
Pruebas: MSE =0.6982901767917583


Unnamed: 0,valor_real,prediccion,diferencia
0,0.936,1.576446,-0.640446
1,1.536,1.977706,-0.441706
2,1.325,1.523437,-0.198437
3,1.479,1.540762,-0.061762
4,1.207,1.494667,-0.287667
5,1.155,2.509804,-1.354804
6,1.633,1.63368,-0.00068
7,1.603,1.726007,-0.123007
8,3.406,3.082113,0.323887
9,5.00001,3.688743,1.311267
