# Ejemplo 02
**_Dr. Antonio Arista Jalife._**

En este ejemplo crearemos un sistema de regresión polinomial: Utilizaremos los grados de un polinomio para curvear la línea (o el plano) y evaluar que tal mejora una predicción

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

In [8]:
housing = fetch_california_housing()
housingDataFrame = pd.DataFrame(housing.data, columns = housing.feature_names)
housingDataFrame["MedHouseVal"] = housing.target
X = housingDataFrame.drop('MedHouseVal', axis = 1)
Y = housingDataFrame['MedHouseVal']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=5)

### Ejemplo lineal:
Primero probaremos con predicciones lineales

In [9]:
lin_model = LinearRegression()
lin_model.fit(X_train, Y_train)
y_train_predict = lin_model.predict(X_train)
MSE = mean_squared_error(Y_train,y_train_predict)
print("Entrenamiento: MSE ="+str(MSE))

y_test_predict = lin_model.predict(X_test)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE ="+str(MSE))

df_predicciones = pd.DataFrame({'valor_real':Y_test, 'prediccion':y_test_predict, 'diferencia':Y_test-y_test_predict})
df_predicciones = df_predicciones.reset_index(drop = True)
df_predicciones.head(10)

Entrenamiento: MSE =0.5184098069585754
Pruebas: MSE =0.5337602496187848


Unnamed: 0,valor_real,prediccion,diferencia
0,0.936,1.69647,-0.76047
1,1.536,1.940313,-0.404313
2,1.325,0.958007,0.366993
3,1.479,1.734499,-0.255499
4,1.207,1.750087,-0.543087
5,1.155,1.856828,-0.701828
6,1.633,1.468177,0.164823
7,1.603,1.959325,-0.356325
8,3.406,3.18984,0.21616
9,5.00001,3.938889,1.061121


### Ejemplo de polinomio de grado 2
Ahora probaremos con un polinomio de grado 2

In [10]:
poly_model = LinearRegression()
poly = PolynomialFeatures(degree=2)

Xpolytrain = poly.fit_transform(X_train)
Xpolytest = poly.fit_transform(X_test)
Xpolytrain # 6 columnas: MedInc, AveBdrms, MedInc2, AveBdrms2, MedInc*AveBdrms, solita

array([[ 1.00000000e+00,  5.52280000e+00,  1.40000000e+01, ...,
         1.42581760e+03, -4.56103040e+03,  1.45902241e+04],
       [ 1.00000000e+00,  1.96500000e+00,  2.30000000e+01, ...,
         1.73056000e+03, -5.13385600e+03,  1.52300281e+04],
       [ 1.00000000e+00,  4.12500000e+00,  1.80000000e+01, ...,
         1.22290090e+03, -4.21108740e+03,  1.45009764e+04],
       ...,
       [ 1.00000000e+00,  5.80960000e+00,  2.00000000e+01, ...,
         1.17443290e+03, -4.06956250e+03,  1.41015625e+04],
       [ 1.00000000e+00,  3.59430000e+00,  1.40000000e+01, ...,
         1.36826010e+03, -4.51204020e+03,  1.48791204e+04],
       [ 1.00000000e+00,  4.34820000e+00,  9.00000000e+00, ...,
         1.25032960e+03, -4.20996160e+03,  1.41752836e+04]])

In [11]:

poly_model.fit(Xpolytrain, Y_train)
y_train_predict = poly_model.predict(Xpolytrain)

MSE = mean_squared_error(Y_train,y_train_predict)
print("Entrenamiento: MSE ="+str(MSE))

y_test_predict = poly_model.predict(Xpolytest)
MSE = (mean_squared_error(Y_test, y_test_predict))
print("Pruebas: MSE ="+str(MSE))

df_predicciones = pd.DataFrame({'valor_real':Y_test, 'prediccion':y_test_predict, 'diferencia':Y_test-y_test_predict})
df_predicciones = df_predicciones.reset_index(drop = True)
df_predicciones.head(10)

Entrenamiento: MSE =0.414578560127964
Pruebas: MSE =5.150029728111247


Unnamed: 0,valor_real,prediccion,diferencia
0,0.936,1.527609,-0.591609
1,1.536,1.972667,-0.436667
2,1.325,1.482197,-0.157197
3,1.479,1.417488,0.061512
4,1.207,1.778217,-0.571217
5,1.155,1.460064,-0.305064
6,1.633,1.465058,0.167942
7,1.603,2.082916,-0.479916
8,3.406,3.474848,-0.068848
9,5.00001,4.463894,0.536116


In [6]:
from sklearn.metrics import mean_absolute_percentage_error as mape

In [12]:
mape(Y_test, y_test_predict)

0.2911313043996396