https://scikit-learn.org/stable/modules/neural_networks_supervised.html

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
plt.style.use("default")

In [2]:
df = pd.read_csv("hour.csv")
df.head(2)

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40


`instant`, `dteday`, `casual`, `registered` columns are not that important, so dropping these

In [3]:
my_df = df.drop(["instant", "dteday", "casual", "registered"], axis=1)
my_df.head(2)

Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40


In [4]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split

In [5]:
X = my_df.drop("cnt", axis=1)
y = my_df.cnt

In [6]:
#make numpy array before splitting and training
X = X.values
y = y.values
type(X), X.shape, y.shape

(numpy.ndarray, (17379, 12), (17379,))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.2, shuffle=True)

In [38]:
regr = MLPRegressor(max_iter=1000, random_state=1)

In [39]:
regr.fit(X_train, y_train)



MLPRegressor(max_iter=1000, random_state=1)

In [40]:
regr.score(X_test, y_test)

0.791774580336376

### let's see how can we improve the score

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
estimator = MLPRegressor(max_iter=400, random_state=1)
param_grid = {
    "learning_rate_init": [0.01, 0.001],
    "activation": ["relu", "tanh"],
    "solver": ["sgd", "adam"]
}

model = GridSearchCV(estimator, param_grid, cv=4)
model.fit(X_train, y_train)



GridSearchCV(cv=4, estimator=MLPRegressor(max_iter=400, random_state=1),
             param_grid={'activation': ['relu', 'tanh'],
                         'learning_rate_init': [0.01, 0.001],
                         'solver': ['sgd', 'adam']})

In [13]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_activation,param_learning_rate_init,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,9.45657,14.827886,0.005497,0.001658,relu,0.01,sgd,"{'activation': 'relu', 'learning_rate_init': 0...",-0.001481,-0.00015,-0.001213,-0.000743,-0.000897,0.000506,7
1,47.572759,6.673533,0.009246,0.001918,relu,0.01,adam,"{'activation': 'relu', 'learning_rate_init': 0...",0.766781,0.821511,0.691939,0.79105,0.76782,0.04791,2
2,1.8356,0.098491,0.004996,0.000706,relu,0.001,sgd,"{'activation': 'relu', 'learning_rate_init': 0...",-0.002133,0.138262,-0.018269,0.194311,0.078043,0.090621,6
3,57.832357,2.78371,0.01612,0.001144,relu,0.001,adam,"{'activation': 'relu', 'learning_rate_init': 0...",0.603508,0.616643,0.604354,0.607498,0.608001,0.005207,4
4,2.41303,1.329893,0.007627,0.001984,tanh,0.01,sgd,"{'activation': 'tanh', 'learning_rate_init': 0...",-0.127645,-0.170766,0.024305,-0.17056,-0.111166,0.080162,8
5,47.136148,10.635573,0.006496,0.001117,tanh,0.01,adam,"{'activation': 'tanh', 'learning_rate_init': 0...",0.915628,0.922453,0.923143,0.916035,0.919315,0.003495,1
6,2.099044,0.318688,0.006746,0.001296,tanh,0.001,sgd,"{'activation': 'tanh', 'learning_rate_init': 0...",0.241241,0.259,0.259334,0.253355,0.253233,0.007319,5
7,41.22735,8.997705,0.009495,0.003199,tanh,0.001,adam,"{'activation': 'tanh', 'learning_rate_init': 0...",0.749433,0.760212,0.75905,0.756597,0.756323,0.004187,3


In [14]:
model.best_params_

{'activation': 'tanh', 'learning_rate_init': 0.01, 'solver': 'adam'}

In [44]:
regr = MLPRegressor(max_iter=2000, random_state=1,
                    activation="tanh", learning_rate_init=0.01)

In [45]:
regr.fit(X_train, y_train)

MLPRegressor(activation='tanh', learning_rate_init=0.01, max_iter=2000,
             random_state=1)

In [46]:
regr.score(X_test, y_test)

0.9271352633333602

In [27]:
regr.predict(X_test)

array([416.970918  , 113.26504647, 132.96948659, ...,  14.08480529,
       345.35793518,  71.98820831])

### Predict for next hour

In [19]:
my_df.tail()

Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
17374,1,1,12,19,0,1,1,2,0.26,0.2576,0.6,0.1642,119
17375,1,1,12,20,0,1,1,2,0.26,0.2576,0.6,0.1642,89
17376,1,1,12,21,0,1,1,1,0.26,0.2576,0.6,0.1642,90
17377,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,61
17378,1,1,12,23,0,1,1,1,0.26,0.2727,0.65,0.1343,49


In [29]:
df.tail(2)

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
17377,17378,2012-12-31,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,13,48,61
17378,17379,2012-12-31,1,1,12,23,0,1,1,1,0.26,0.2727,0.65,0.1343,12,37,49


In [24]:
X_test.shape, X_test[:1]

((3476, 12),
 array([[ 4.    ,  1.    , 10.    ,  9.    ,  0.    ,  5.    ,  1.    ,
          2.    ,  0.54  ,  0.5152,  0.94  ,  0.1045]]))

In [22]:
my_df.columns

Index(['season', 'yr', 'mnth', 'hr', 'holiday', 'weekday', 'workingday',
       'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'cnt'],
      dtype='object')

In [54]:
my_df.describe()

Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
count,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0,17379.0
mean,2.50164,0.502561,6.537775,11.546752,0.02877,3.003683,0.682721,1.425283,0.496987,0.475775,0.627229,0.190098,189.463088
std,1.106918,0.500008,3.438776,6.914405,0.167165,2.005771,0.465431,0.639357,0.192556,0.17185,0.19293,0.12234,181.387599
min,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.02,0.0,0.0,0.0,1.0
25%,2.0,0.0,4.0,6.0,0.0,1.0,0.0,1.0,0.34,0.3333,0.48,0.1045,40.0
50%,3.0,1.0,7.0,12.0,0.0,3.0,1.0,1.0,0.5,0.4848,0.63,0.194,142.0
75%,3.0,1.0,10.0,18.0,0.0,5.0,1.0,2.0,0.66,0.6212,0.78,0.2537,281.0
max,4.0,1.0,12.0,23.0,1.0,6.0,1.0,4.0,1.0,1.0,1.0,0.8507,977.0


In [60]:
# temp, atemp, hum, windspeed value taking from previous hour
next_hr_features = np.array([[1., 2, 1, 0, 0, 2, 1, 1, 0.26, 0.2727, 0.65, 0.1343]])
next_hr_features.shape, next_hr_features

((1, 12),
 array([[1.    , 2.    , 1.    , 0.    , 0.    , 2.    , 1.    , 1.    ,
         0.26  , 0.2727, 0.65  , 0.1343]]))

In [49]:
regr.predict(next_hr_features)

array([-64.66220562])

<p style="color:red">negative value, maybe from overfitting. Try with second best param</p>

In [51]:
regr2 = MLPRegressor(max_iter=2000, random_state=1,
                    activation="relu", learning_rate_init=0.01)
regr2.fit(X_train, y_train)

MLPRegressor(learning_rate_init=0.01, max_iter=2000, random_state=1)

In [52]:
regr2.score(X_test, y_test)

0.8644912184680409

In [62]:
value = regr2.predict(next_hr_features)
value

array([29.96695273])

In [64]:
round(value[0])

30.0