In [1]:
#required imports
from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error  
from math import sqrt 
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer
from scipy import stats

In [2]:
#load features
features = pd.read_csv("least_features.csv")
features.head()

Unnamed: 0,Station ID,Year,Month,Day,Weekday,Season,Holiday,Daily Weather,Daily Weather (Past),Humidity,Humidity (Past),Windspeed,Windspeed (Past),Apparent Temperature (Avg),Apparent Temperature (Avg) (Past),Rented Bikes,Rented Bikes (Future)
0,835,2018,10,277,4,Autumn,False,fog,partly-cloudy-day,0.88,0.76,6.55,6.33,57.905,57.42,1,19
1,835,2018,10,291,4,Autumn,False,partly-cloudy-day,partly-cloudy-day,0.8,0.9,6.87,2.22,50.035,53.7,19,24
2,835,2018,10,292,5,Autumn,False,cloudy,partly-cloudy-day,0.8,0.8,3.14,6.87,51.435,50.035,24,5
3,835,2018,10,293,6,Autumn,False,cloudy,cloudy,0.8,0.8,3.64,3.14,55.035,51.435,5,12
4,835,2018,10,294,7,Autumn,False,cloudy,cloudy,0.8,0.8,4.17,3.64,53.105,55.035,12,22


In [3]:
#transorm non-numerical labels to numerical labels
le = LabelEncoder()
features.loc[:,'Holiday'] = le.fit_transform(features['Holiday'])

In [None]:

cleanup_nums = {"Season": {"Spring": 1, "Summer": 2, "Autumn": 3, "Winter": 4},
               "Daily Weather": {"clear-day": 1, "partly-cloudy-day": 2,"cloudy":3,"wind":4, "rain": 5, "fog": 6, "snow": 7},
               "Daily Weather (Past)": {"clear-day": 1, "partly-cloudy-day": 2,"cloudy":3,"wind":4, "rain": 5,"fog": 6, "snow": 7} }

In [None]:
features.replace(cleanup_nums, inplace=True)

In [None]:
#target variable
y = features['Rented Bikes (Future)']
len(y)

In [None]:
#Feature Matrix after evaluation: Use only Weekday, Month, Past Data, Apparent Temp Avg, Rented Bikes and Daily Weather
X=features.loc[:,['Weekday','Month','Daily Weather','Daily Weather (Past)','Humidity (Past)', 'Windspeed (Past)', 'Apparent Temperature (Avg)','Apparent Temperature (Avg) (Past)','Rented Bikes']]
X.head()


[Scikitlearn](https://sklearn.org/modules/neural_networks_supervised.html) recommends to scale the data to  [0, 1] or [-1, +1] by using the StandardScaler but has an RMSE of 103 whereas QuantileTransforme which also transform data to values between [-1, +1].

In [None]:
#Split into train and test set
X_train =X.loc[0:115]
X_test= X.loc[116:146]
y_train = y[0:116]
y_test=y[116:146]
#use min max scaler to get values between 0 and 1. MinMaxScaler subtracts the minimum value in the feature and then divides by the range
#use Quantile Transformer to create a uniform distribution with values between 0 and 1:
scaler1 = QuantileTransformer()
scaler2 = QuantileTransformer()
scaler3 = QuantileTransformer()
#Compute the minimum and maximum of X to be used for later scaling
scaler1.fit(X_train)
scaler2.fit(X_test)
#Scaling features of X according to feature_range
X_train_scaled = scaler1.transform(X_train)
X_test_scaled = scaler2.transform(X_test)
#convert series into array for reshaping
target_train = np.array(y_train, dtype=pd.Series)
#reshape data -> ValueError: Expected 2D array, got 1D array instead:
#array=[281. 279. 274. ...  43.  67. 393.].
#Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
y_train_reshaped = target_train.reshape(-1, 1)
#Compute the minimum and maximum of y_train to be used for later scaling
scaler3.fit(y_train_reshaped)
#Scaling features of y_train according to feature_range
y_train_scaled= scaler3.transform(y_train_reshaped)

In [None]:
#initialize MLPRegressor
mlp = MLPRegressor(
    hidden_layer_sizes=(10,5),  activation='logistic', solver='lbfgs', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
#change shape of y to an 1D-array(n_samples, )
m = mlp.fit(X_train_scaled,y_train_scaled.ravel())

predictions = m.predict(X_test_scaled)
#create 12 dimensional array of X_test
new = X_test_scaled[:, [1,2,3,4,5,6,7,8]]
# add predictions as a thirteenth dimension to the X_test array in order to denormalize it
X_test_scaled2=np.append(new,predictions[:,None],1)

In [None]:
#Denormalize testdata
pred = predictions.reshape(-1,1)
denormalizedX = scaler2.inverse_transform(X_test_scaled2)
denormalizedY = scaler3.inverse_transform(y_train_scaled)
denormalizedX
transformed = pd.DataFrame({'Column1':denormalizedX[:,0],'Column2':denormalizedX[:,1],'Column3':denormalizedX[:,2],'Column4':denormalizedX[:,3],'Column5':denormalizedX[:,4],'Column6':denormalizedX[:,5],'Column7':denormalizedX[:,6],'Column8':denormalizedX[:,7],'Column9':denormalizedX[:,8]})
transformed.head()
p = transformed.values[:,8]

In [None]:
#Compare predicted and actual values
df = pd.DataFrame({'Actual': y_test, 'Predicted': p})  
df.head()

In [None]:
#RMSE version 1
a=df.loc[:,'Actual']
b=df.loc[:,'Predicted']
rmse=sqrt(mean_squared_error(a, b))  
"The root mean square error of the model is {0:.3f}".format(rmse) 

In [None]:
#Save predicted and actual values into csv
daysTmp = X.values[:,0]
day = daysTmp[1212:1514].astype(int)
df = pd.DataFrame({'Actual': y_test, 'Predicted': p})  
df.to_csv('predictionRawLeast.csv')

In [None]:
#Save predicted and actual values of training data into csv
#daysTmp = X.values[:,0]
#day = daysTmp[0:1212].astype(int)
#df = pd.DataFrame({'Actual': y_train, 'Predicted': p})  
#df.to_csv('training_least.csv')

In [None]:
#read csv with the same length as the feature matrix for y values after day 1212
predict = pd.read_csv('prediction_least.csv')
predict.head()
x = predict.values[:,0]
ynew = predict.values[:,2]

In [None]:
#read csv with the same length as the feature matrix for y values until day 1212
training = pd.read_csv('trainingCleaned_least.csv')
training.head()
xtrain = training.values[:,0]
ytrain = training.values[:,2]


In [None]:
from bokeh.plotting import figure, output_file, show
from bokeh.models import SingleIntervalTicker, LinearAxis
# prepare some data
day = features['Day']
x1 = xtrain
x2 = xtrain
x3 = x

y1 = ytrain
y2 = y
y3 = ynew

# output to static HTML file
output_file("MLPRegressor least used station.html")
p = figure(
   tools="pan,box_zoom,wheel_zoom,reset,save",
    title="MLPRegressor Prediction",
    y_range=[0, 100],
    x_range=[0, 150],
   x_axis_label='Days', y_axis_label='Frequency',
    plot_width=1200, plot_height=350,
    x_axis_type=None,
    toolbar_location="left"
)
#set xticks
tickerx = SingleIntervalTicker(interval=1, num_minor_ticks=1)
xaxis = LinearAxis(ticker=tickerx)
p.add_layout(xaxis, 'below')
# add a line renderer with legend and line thickness
p.ray(x=1212, y=0, length=800, angle=1.57079633, color='purple')
p.line(x1, y1, legend="Predicted Training", line_color="red")
p.line(x2, y2, legend="Actual", line_color="green")
p.line(x3, y3, legend="Predicted Test", line_color="blue")
p.xaxis.major_label_orientation = 'vertical'
# show the results
show(p)