# Regresión

Datos de [renta de bicis](http://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
bici_hora = pd.read_csv('datos/hour.csv')

In [None]:
bici_hora.shape

In [None]:
bici_hora.head()

In [None]:
bici_pred = bici_hora.drop(['instant', 'dteday', 'casual', 'registered'], axis=1)

In [None]:
from sklearn.model_selection import GridSearchCV, train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingRegressor

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    bici_pred.drop('cnt', axis=1), # X
    bici_pred.cnt,  # y
    test_size=0.2, # porcentaje que será prueba
    random_state=42) # para fijar el aleatorio (reproducibilidad)

In [None]:
knn_grid = {
    'n_neighbors': list(range(1, 10))
}

knn_cv = GridSearchCV(KNeighborsRegressor(), knn_grid,
                     scoring='neg_mean_squared_error')

knn_cv.fit(X_train, y_train)

In [None]:
knn_cv.best_params_

In [None]:
knn_cv.best_score_

In [None]:
plt.scatter(knn_cv.predict(X_test), y_test, alpha=0.2)
plt.xlabel('Pred')
plt.ylabel('Real')
plt.plot([0,y_test.max()], [0,y_test.max()], 'r')
plt.show()

## Autoregresión

In [None]:
bici_diario = pd.read_csv('datos/day.csv')

In [None]:
bici_diario.cnt.plot()
plt.show()

In [None]:
from pandas.plotting import lag_plot

lag_plot(bici_diario.cnt)
plt.show()

In [None]:
lag_plot(bici_diario.cnt, lag=2)
plt.show()

In [None]:
lag_plot(bici_diario.cnt, lag=15)
plt.show()

In [None]:
from pandas.plotting import autocorrelation_plot

autocorrelation_plot(bici_diario.cnt)
plt.show()

In [None]:
from statsmodels.tsa.ar_model import AR

In [None]:
ts = AR(bici_diario.cnt.values, bici_diario.dteday.values)

In [None]:
plt.figure(figsize=(20,8))

max_lag = 100

ts_bici = ts.fit(maxlag=max_lag)

bici_diario.cnt.plot()
plt.plot(range(max_lag, bici_diario.cnt.shape[0]), ts_bici.predict(), 'r')
plt.show()