In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

%matplotlib inline

In [None]:
data = pd.read_csv("../../data/bikes_rent.csv")

In [None]:
data.head()

In [None]:
data["cnt"].hist();

In [None]:
X_df, y_series = data.drop("cnt", axis=1), data["cnt"]

In [None]:
X_df.shape

In [None]:
plt.figure(figsize=(12, 8))
for i, col in enumerate(X_df.columns):
    plt.subplot(4, 3, i + 1)
    plt.scatter(X_df[col], y_series)
    plt.title(col);

In [None]:
sns.heatmap(data.corr());

In [None]:
from sklearn.linear_model import Lasso, LassoCV, LinearRegression, Ridge, RidgeCV

In [None]:
ols = LinearRegression(normalize=True)

In [None]:
train_share = 0.7

train_part_len = int(train_share * data.shape[0])

X_train = X_df.values[:train_part_len, :]
X_valid = X_df.values[train_part_len:, :]
y_train = y_series.values[:train_part_len]
y_valid = y_series.values[train_part_len:]

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
ols.fit(X_train, y_train)

In [None]:
y_ols_pred = ols.predict(X_valid)

In [None]:
def rmse(y, p):
    return np.sqrt(mean_squared_error(y, p))

In [None]:
def beatiful_coef(coefs, feature_names=X_df.columns):
    return pd.DataFrame(coefs, index=feature_names, columns=["coef"]).sort_values(
        "coef", ascending=False
    )

In [None]:
rmse(y_valid, y_ols_pred)

In [None]:
beatiful_coef(ols.coef_, feature_names=X_df.columns)

In [None]:
ridge = Ridge(random_state=17, normalize=True)

In [None]:
ridge.fit(X_train, y_train)

In [None]:
beatiful_coef(ridge.coef_)

In [None]:
ridge_valid_pred = ridge.predict(X_valid)

In [None]:
rmse(y_valid, ridge_valid_pred)

In [None]:
alphas = np.logspace(-4, 4, 100)

In [None]:
ridge_cv = RidgeCV(alphas=alphas, cv=5, normalize=True).fit(X_train, y_train)

In [None]:
ridge_cv.alpha_

In [None]:
best_ridge_valid_pred = ridge_cv.predict(X_valid)

In [None]:
rmse(y_valid, best_ridge_valid_pred)

In [None]:
lasso = LassoCV(alphas=alphas, cv=5, normalize=True).fit(X_train, y_train)

In [None]:
lasso.alpha_

In [None]:
best_lasso_valid_pred = lasso.predict(X_valid)

In [None]:
rmse(y_valid, best_lasso_valid_pred)

In [None]:
beatiful_coef(lasso.coef_)

In [None]:
beatiful_coef(ridge_cv.coef_)

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
forest = RandomForestRegressor(n_estimators=100, random_state=17)

In [None]:
forest.fit(X_train, y_train)

In [None]:
forest_valid_pred = forest.predict(X_valid)

In [None]:
rmse(y_valid, forest_valid_pred)

In [None]:
beatiful_coef(forest.feature_importances_)