In [None]:
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import sklearn as sk
import sys
import matplotlib.pyplot as plt
from jedi.api.refactoring import inline

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import PolynomialFeatures

sns.set_style('whitegrid')
#%matplotlib inline


# Modelando
np.random.seed(42)      # Número de inicio para generador de números aleatorios
n_samples = 100

X = np.linspace(0, 10, 100)     # genera arreglo de 100 números entre 0 a 10
rng = np.random.rand(n_samples) * 100   # error del modelo genera números aleatorios

y = X ** 3 + rng + 100
plt.figure(figsize=(10, 8))
plt.scatter(X, y)
plt.show()


# 1. Regresión Lineal
lr = LinearRegression()
lr.fit(X.reshape(-1, 1), y)
model_pred = lr.predict(X.reshape(-1, 1))
plt.figure(figsize=(10, 8))
plt.scatter(X, y)
plt.plot(X, model_pred)
plt.show()
print(r2_score(y, model_pred))


# 2. Polinomial Regresión no linal de segundo grado (Aplican todas las pruebas de especificación de la regresión lineal)
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(X.reshape(-1, 1))   # reshape(-1,1) genera un vector de datos (renglon, columna)

lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y.reshape(-1, 1))
y_pred = lin_reg_2.predict(X_poly)

plt.figure(figsize=(10, 8))
plt.scatter(X, y)
plt.plot(X, y_pred)
plt.show()
print(r2_score(y, y_pred))

# 3. Datos reales Boston Housing

# Se carga datos provientes de Boston Housing Buble CSV con pandas
filePath = "C:/Users/Armando/Documents/Ciencia_Datos/Udemy/Datos" \
           "/BostonHousingDatasetCSV.csv"

# encabezados = ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'b', 'lstat', 'medv']
df = pd.read_csv(filePath)
pd.options.display.float_format = '{:,.3f}'.format      # Formato para únicamente tomar 3 decimales

print(df.corr())

X_boston = df['dis'].values
Y_boston = df['nox'].values

plt.figure(figsize=(12, 8))
plt.scatter(X_boston, Y_boston)
plt.show()

# 3 a) Linear Regression
lr = LinearRegression()
lr.fit(X_boston.reshape(-1, 1), Y_boston)
model_pred = lr.predict(X_boston.reshape(-1, 1))

plt.figure(figsize=(12, 8))
plt.scatter(X_boston, Y_boston)
plt.plot(X_boston, model_pred)
print("R2 score = {:.3f}".format(r2_score(Y_boston, model_pred)))


# 3 b) Non Linear regression

poly_reg = PolynomialFeatures(degree=2)
X_poly_b = poly_reg.fit_transform(X_boston.reshape(-1, 1))
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly_b, Y_boston)

LinearRegression(copy_X=True, fit_intercept= True, n_jobs= None, normalize=False)
X_fit = np.arange(X_boston.min(), X_boston.max(), 1)[:, np.newaxis]
print(X_fit)

y_pred = lin_reg_2.predict(poly_reg.fit_transform(X_fit.reshape(-1, 1)))

plt.figure(figsize=(10, 8))
plt.scatter(X_boston, Y_boston)
plt.plot(X_fit, y_pred)
plt.show()
print("R2 score = {:.3f}".format(r2_score(Y_boston, lin_reg_2.predict(X_poly_b))))



# 3 c) Cúbica
poly_reg = PolynomialFeatures(degree=3)
X_poly_b = poly_reg.fit_transform(X_boston.reshape(-1, 1))
lin_reg_3 = LinearRegression()
lin_reg_3.fit(X_poly_b, Y_boston)

LinearRegression(copy_X=True, fit_intercept= True, n_jobs= None, normalize=False)
X_fit = np.arange(X_boston.min(), X_boston.max(), 1)[:, np.newaxis]
print(X_fit)

y_pred = lin_reg_3.predict(poly_reg.fit_transform(X_fit.reshape(-1, 1)))

plt.figure(figsize=(10, 8))
plt.scatter(X_boston, Y_boston)
plt.plot(X_fit, y_pred)
plt.show()
print("R2 score = {:.3f}".format(r2_score(Y_boston, lin_reg_3.predict(X_poly_b))))
