In [1]:
#Import pandas and scikitlearn for Machine Learning Models
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Ridge, HuberRegressor, ElasticNetCV
from sklearn.ensemble import BaggingRegressor, ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor

# Pickle saves the model
import pickle

In [2]:
# Read in cleaned results
df = pd.read_csv('05-13-03-19_station2.csv')

In [3]:
# Transform to a 2D array and assign features and target
# Available bikes as target. Another model can be done for available stands if needed.
features = df[['number', 'weekday', 'hour', 'minute']].values
targets = df['available_bikes'].values


In [4]:
# Cycles through neural network options and selects the most accurate.
def mlp_regressor_grid_search():
    est = MLPRegressor(activation= 'logistic')
    sizes = []
    for size1 in range(5, 20, 5):
        sizes.append(size1)

    param_grid = dict(solver=['lbfgs'], #'sgd','adam'
                      learning_rate=['adaptive','invscaling'],
                      alpha=[0.0001], # [0.0005, 0.001, 0.005, 0.01]
                      max_iter=[200], #np.arange(200, 300, 20)
                      tol=[0.0001], #np.arange(0.00001, 0.0001, 0.00001)
                      hidden_layer_sizes=sizes)
    return GridSearchCV(est, param_grid=param_grid, n_jobs=1, verbose=100)


In [5]:
models = [LinearRegression(),
          Ridge(),
          HuberRegressor(),
          ElasticNetCV(),
          DecisionTreeRegressor(), 
          ExtraTreesRegressor(),
          GradientBoostingRegressor(),
          RandomForestRegressor(),
          BaggingRegressor()]


In [6]:
# Split the data set into test and training data.
def split_by_position(features, targets):
    """
    train 0.80
    test 0.20
    """
    len_train = int(0.80 * len(features))
    train_features = features[0:len_train]
    train_targets = targets[0:len_train]
    test_features = features[len_train:]
    test_targets = targets[len_train:]
    return train_features, test_features, train_targets, test_targets


In [7]:
# Assign training features and target and test features and target
train_features, test_features, train_targets, test_targets = split_by_position(features, targets)



In [8]:
# Normalise the features for training
scaler = StandardScaler()
scaler.fit(train_features)
scaled_train_features = scaler.transform(train_features)




In [9]:
# Tests training features and targets with different neural network options
est = mlp_regressor_grid_search()
est.fit(scaled_train_features, train_targets)




Fitting 3 folds for each of 6 candidates, totalling 18 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbfgs, tol=0.0001 
[CV]  alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbfgs, tol=0.0001, score=-1.8914202777676041, total=   0.1s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbfgs, tol=0.0001 
[CV]  alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbfgs, tol=0.0001, score=-0.9208305092120043, total=   0.1s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbfgs, tol=0.0001 
[CV]  alpha=0.0001, hidden_layer_sizes=5, learning_rate=adaptive, max_iter=200, solver=lbf



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'solver': ['lbfgs'], 'learning_rate': ['adaptive', 'invscaling'], 'alpha': [0.0001], 'max_iter': [200], 'tol': [0.0001], 'hidden_layer_sizes': [5, 10, 15]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=100)

In [10]:
# Print best hyper parameters for model
print(est.best_estimator_)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=5, learning_rate='invscaling',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)


In [11]:
# Print score for best estimator on test data
print(est.score(scaler.transform(test_features), test_targets))

0.6449067298301492




In [12]:
# Test algorithms for performance.
for model in models:
    est = model
    est.fit(scaled_train_features, train_targets)
    print(est.score(scaler.transform(test_features), test_targets))



-0.13100233771649616
-0.1308276657796692
-0.14799966782928897
-0.09486833649783666
0.5176025936045243
0.5109056191753276
0.5333341085897598
0.5356191559686072
0.5300990992851324




In [13]:
# Use features to print a prediction
predict_data = [[2.0,2.0,8.0,44.0]]
scaled_predict = scaler.transform(predict_data)


prediction = est.predict(scaled_predict)
print("PREDICTION:", math.floor(prediction))


PREDICTION: [0.2]


In [14]:
# Using elastic net CV as seems to be the best fit currently
# Pickle saves the model
est = ElasticNetCV()
est.fit(scaled_train_features, train_targets)
filename_est = 'model.sav'
filename_scaler = 'scaler.sav'
pickle.dump(est, open(filename_est, 'wb'))
pickle.dump(scaler, open(filename_scaler, 'wb'))


