# Predict Bike Demand in Montreal Using Neural Network using LSTM



#### We have specifically decided to focus on the BIXI bike sharing system in Montreal, Canada, as it is the second largest bike sharing system in North America. Also, extensive datasets can be easily acquired on their website: https://montreal.bixi.com/en

#### Dataset: We have used one-month worth of data  - July 2017 - to make predictions for the next month 
### Goals: 
1. Define two new parameters - net gain and cluster gain
2. Make predictions on the level of occupancy of the stations (ie. how many bikes are station X at time Y and date Z)

### Steps:
1. Data Preprocesing
2. Clustering
3. Define Station Gain
4. Define Cluster Gain
5. Run DNN, RNN and LSTM models
6. Optimize the models in Step 5

## Data Preprocessing

In [1]:
import dask.dataframe as dd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import folium
import gpxpy.geo
from datetime import datetime
import time
import seaborn as sns
import os
import math
import xgboost as xgb
import matplotlib
matplotlib.use('nbagg')
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV, train_test_split
#from sklearn.grid_search import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.cluster import MiniBatchKMeans, KMeans
import warnings
warnings.simplefilter('ignore')
from sklearn.svm import SVC
#HAD TO USE SVR - Support Vector Regression bc SVC doesn't support continuous values
from sklearn.svm import SVR
from sklearn import metrics

In [2]:
new_frame = dd.read_csv("Stations_2019.csv")

In [3]:
new_frame.head()

Unnamed: 0,Code,name,latitude,longitude
0,10002,Métro Charlevoix (Centre / Charlevoix),45.478228,-73.569651
1,4000,Jeanne-d'Arc / Ontario,45.549598,-73.541874
2,4001,Graham / Brookfield,45.520075,-73.629776
3,4002,Graham / Wicksteed,45.516937,-73.640483
4,5002,St-Charles / Montarville,45.533682,-73.515261


## Clustering

In [4]:
montreal_boundaries = new_frame[(new_frame.latitude <= 45.5017) & (new_frame.longitude <= -73.5673 )]

In [5]:
#clustering
coord = montreal_boundaries[["latitude", "longitude"]].values
neighbors = []

def min_distance(regionCenters, totalClusters):
    good_points = 0
    bad_points = 0
    less_dist = []
    more_dist = []
    min_distance = 100000  #any big number can be given here
    for i in range(totalClusters):
        good_points = 0
        bad_points = 0
        for j in range(totalClusters):
            if j != i:
                distance = gpxpy.geo.haversine_distance(latitude_1 = regionCenters[i][0], longitude_1 = regionCenters[i][1], latitude_2 = regionCenters[j][0], longitude_2 = regionCenters[j][1])
                #you can check the documentation of above "gpxpy.geo.haversine_distance" at "https://github.com/tkrajina/gpxpy/blob/master/gpxpy/geo.py"
                #"gpxpy.geo.haversine_distance" gives distance between two latitudes and longitudes in meters. So, we have to convert it into miles.
                distance = distance/(1.60934*1000)   #distance from meters to miles
                min_distance = min(min_distance, distance) #it will return minimum of "min_distance, distance".
                if distance < 2:
                    good_points += 1
                else:
                    bad_points += 1
        less_dist.append(good_points)
        more_dist.append(bad_points)
    print("On choosing a cluster size of {}".format(totalClusters))
    print("Avg. Number clusters within vicinity where inter cluster distance < 2 miles is {}".format(np.ceil(sum(less_dist)/len(less_dist))))
    print("Avg. Number clusters outside of vicinity where inter cluster distance > 2 miles is {}".format(np.ceil(sum(more_dist)/len(more_dist))))
    print("Minimum distance between any two clusters = {}".format(min_distance))
    print("-"*10)
            
def makingRegions(noOfRegions):
    regions = MiniBatchKMeans(n_clusters = noOfRegions, batch_size = 10000).fit(coord)
    regionCenters = regions.cluster_centers_ 
    totalClusters = len(regionCenters)
    return regionCenters, totalClusters

In [6]:
startTime = datetime.now()
for i in range(10, 100, 10):
    regionCenters, totalClusters = makingRegions(i)
    min_distance(regionCenters, totalClusters)
print("Time taken = "+str(datetime.now() - startTime))

On choosing a cluster size of 10
Avg. Number clusters within vicinity where inter cluster distance < 2 miles is 3.0
Avg. Number clusters outside of vicinity where inter cluster distance > 2 miles is 7.0
Minimum distance between any two clusters = 0.9661826309055204
----------
On choosing a cluster size of 20
Avg. Number clusters within vicinity where inter cluster distance < 2 miles is 5.0
Avg. Number clusters outside of vicinity where inter cluster distance > 2 miles is 15.0
Minimum distance between any two clusters = 0.34593524207718
----------
On choosing a cluster size of 30
Avg. Number clusters within vicinity where inter cluster distance < 2 miles is 8.0
Avg. Number clusters outside of vicinity where inter cluster distance > 2 miles is 22.0
Minimum distance between any two clusters = 0.3789738029262929
----------
On choosing a cluster size of 40
Avg. Number clusters within vicinity where inter cluster distance < 2 miles is 12.0
Avg. Number clusters outside of vicinity where inter

In [7]:
#Mini Batch K-means model
coord = montreal_boundaries[["latitude", "longitude"]].values
print(coord)
regions = MiniBatchKMeans(n_clusters = 30, batch_size = 100000).fit(coord)

dask.array<values, shape=(nan, 2), dtype=float64, chunksize=(nan, 2), chunktype=numpy.ndarray>


In [8]:
NewFrame = new_frame[['latitude','longitude', 'Code']].compute()
NewFrame["pickup_cluster"] = regions.predict(NewFrame[["latitude", "longitude"]])

In [9]:
print(NewFrame.head())
print (NewFrame.shape)

    latitude  longitude   Code  pickup_cluster
0  45.478228 -73.569651  10002              25
1  45.549598 -73.541874   4000               8
2  45.520075 -73.629776   4001               1
3  45.516937 -73.640483   4002               1
4  45.533682 -73.515261   5002               8
(615, 4)


In [10]:
NewFrame = NewFrame.sort_values(by=['pickup_cluster'])

In [11]:
mydict = dict(zip(NewFrame.Code, NewFrame.pickup_cluster))
mydict

{6436: 0,
 6093: 0,
 6433: 0,
 6432: 0,
 6194: 0,
 6410: 1,
 6737: 1,
 7004: 1,
 6312: 1,
 6323: 1,
 7064: 1,
 6324: 1,
 6330: 1,
 6331: 1,
 6332: 1,
 6333: 1,
 7148: 1,
 7001: 1,
 7063: 1,
 6412: 1,
 7007: 1,
 6419: 1,
 6240: 1,
 4001: 1,
 6125: 1,
 7099: 1,
 4002: 1,
 6423: 1,
 6717: 1,
 6754: 2,
 7051: 2,
 6744: 2,
 6742: 2,
 6725: 2,
 7048: 2,
 7111: 3,
 6719: 4,
 6369: 4,
 6705: 5,
 6709: 5,
 6707: 5,
 6435: 6,
 6080: 6,
 6434: 6,
 6101: 7,
 6339: 7,
 6356: 7,
 6383: 8,
 6381: 8,
 6386: 8,
 6380: 8,
 6376: 8,
 6384: 8,
 6385: 8,
 6417: 8,
 6388: 8,
 6391: 8,
 6424: 8,
 6393: 8,
 6394: 8,
 6395: 8,
 6396: 8,
 6397: 8,
 6398: 8,
 6411: 8,
 6413: 8,
 6387: 8,
 6421: 8,
 6346: 8,
 6373: 8,
 6264: 8,
 6261: 8,
 6260: 8,
 6259: 8,
 6258: 8,
 6248: 8,
 6237: 8,
 6236: 8,
 6229: 8,
 6226: 8,
 6221: 8,
 6220: 8,
 6219: 8,
 6218: 8,
 6217: 8,
 6216: 8,
 6215: 8,
 6214: 8,
 6213: 8,
 6265: 8,
 6374: 8,
 6266: 8,
 6269: 8,
 6372: 8,
 6371: 8,
 6370: 8,
 6368: 8,
 6367: 8,
 6364: 8,
 6362: 8,


In [12]:
bike_data_frame = pd.read_csv("OD_2018-07.csv")
bike_data_frame = bike_data_frame.sort_values(by=['start_station_code'])
print(bike_data_frame.head())
print (bike_data_frame.shape)

              start_date  start_station_code          end_date  \
849566  2018-07-28 22:03                4000  2018-07-28 22:43   
784874  2018-07-26 21:28                4000  2018-07-26 21:56   
795083  2018-07-27 09:25                4000  2018-07-27 09:30   
768486  2018-07-26 14:35                4000  2018-07-26 15:05   
556753  2018-07-18 19:24                4000  2018-07-18 19:34   

        end_station_code  duration_sec  is_member  
849566              6111          2350          1  
784874              7067          1673          1  
795083              6391           303          1  
768486              7031          1805          0  
556753              6701           570          0  
(953031, 6)


In [13]:
bike_data_frame['pickup_cluster'] = bike_data_frame['start_station_code']\
                               .map(mydict) 
bike_data_frame['dropoff_cluster'] = bike_data_frame['end_station_code']\
                               .map(mydict) \


In [14]:
bike_data_frame.head()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster
849566,2018-07-28 22:03,4000,2018-07-28 22:43,6111,2350,1,8.0,8.0
784874,2018-07-26 21:28,4000,2018-07-26 21:56,7067,1673,1,8.0,8.0
795083,2018-07-27 09:25,4000,2018-07-27 09:30,6391,303,1,8.0,8.0
768486,2018-07-26 14:35,4000,2018-07-26 15:05,7031,1805,0,8.0,8.0
556753,2018-07-18 19:24,4000,2018-07-18 19:34,6701,570,0,8.0,8.0


In [15]:
# Feature engineer the hour and weekday of the start time and end time 
bike_data_frame['start_date'] = pd.to_datetime(bike_data_frame['start_date'], utc=True, format='%Y-%m-%d %H:%M')
bike_data_frame['end_date'] = pd.to_datetime(bike_data_frame['end_date'], utc=True, format='%Y-%m-%d %H:%M')
bike_data_frame['start_hour'] = bike_data_frame['start_date'].dt.hour
bike_data_frame['start_weekday'] = bike_data_frame['start_date'].dt.weekday
bike_data_frame['end_hour'] = bike_data_frame['end_date'].dt.hour
bike_data_frame['end_weekday'] = bike_data_frame['end_date'].dt.weekday
bike_data_frame['day_of_month'] = bike_data_frame['start_date'].dt.day
bike_data_frame.head()


Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month
849566,2018-07-28 22:03:00+00:00,4000,2018-07-28 22:43:00+00:00,6111,2350,1,8.0,8.0,22,5,22,5,28
784874,2018-07-26 21:28:00+00:00,4000,2018-07-26 21:56:00+00:00,7067,1673,1,8.0,8.0,21,3,21,3,26
795083,2018-07-27 09:25:00+00:00,4000,2018-07-27 09:30:00+00:00,6391,303,1,8.0,8.0,9,4,9,4,27
768486,2018-07-26 14:35:00+00:00,4000,2018-07-26 15:05:00+00:00,7031,1805,0,8.0,8.0,14,3,15,3,26
556753,2018-07-18 19:24:00+00:00,4000,2018-07-18 19:34:00+00:00,6701,570,0,8.0,8.0,19,2,19,2,18


In [16]:
# data filtering 
bike_data_frame = bike_data_frame[(bike_data_frame.start_hour>4) & (bike_data_frame.start_hour<24)]
bike_data_frame = bike_data_frame[(bike_data_frame.end_hour>4) & (bike_data_frame.end_hour<24)]
bike_data_frame = bike_data_frame[(bike_data_frame.duration_sec>0) & (bike_data_frame.duration_sec<7000)]
#bike_data_frame = bike_data_frame.drop(['start_date', 'end_date', 'is_member'], axis=1)

bike_data_frame.head()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month
849566,2018-07-28 22:03:00+00:00,4000,2018-07-28 22:43:00+00:00,6111,2350,1,8.0,8.0,22,5,22,5,28
784874,2018-07-26 21:28:00+00:00,4000,2018-07-26 21:56:00+00:00,7067,1673,1,8.0,8.0,21,3,21,3,26
795083,2018-07-27 09:25:00+00:00,4000,2018-07-27 09:30:00+00:00,6391,303,1,8.0,8.0,9,4,9,4,27
768486,2018-07-26 14:35:00+00:00,4000,2018-07-26 15:05:00+00:00,7031,1805,0,8.0,8.0,14,3,15,3,26
556753,2018-07-18 19:24:00+00:00,4000,2018-07-18 19:34:00+00:00,6701,570,0,8.0,8.0,19,2,19,2,18


In [17]:
start_loc = (bike_data_frame['start_station_code'].unique())
start_loc_mapping = dict(zip(start_loc, 
                                 range(0, len(start_loc) + 1)))

bike_data_frame['start_loc'] = bike_data_frame['start_station_code']\
                               .map(start_loc_mapping) \
                               .astype(int)
bike_data_frame['end_loc'] = bike_data_frame['end_station_code'] \
                               .map(start_loc_mapping) \
                               .astype(int)

pickup_cluster = (bike_data_frame['pickup_cluster'].unique())
cluster_mapping = dict(zip(pickup_cluster, 
                                 range(0, len(pickup_cluster)+1 )))

bike_data_frame['pcluster'] = bike_data_frame['pickup_cluster']\
                               .map(cluster_mapping ) \
                               .astype(int)
bike_data_frame['dcluster'] = bike_data_frame['dropoff_cluster'] \
                               .map(cluster_mapping ) \
                               .astype(int)
bike_data_frame.tail()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month,start_loc,end_loc,pcluster,dcluster
204390,2018-07-07 22:36:00+00:00,10002,2018-07-07 22:38:00+00:00,6349,160,1,25.0,25.0,22,5,22,5,7,546,306,3,3
866649,2018-07-29 14:40:00+00:00,10002,2018-07-29 14:59:00+00:00,6159,1154,0,25.0,8.0,14,6,14,6,29,546,152,3,0
236630,2018-07-09 07:45:00+00:00,10002,2018-07-09 07:49:00+00:00,6921,285,1,25.0,25.0,7,0,7,0,9,546,454,3,3
694073,2018-07-23 12:20:00+00:00,10002,2018-07-23 12:40:00+00:00,6052,1249,0,25.0,8.0,12,0,12,0,23,546,56,3,0
332334,2018-07-11 21:45:00+00:00,10002,2018-07-11 21:51:00+00:00,6727,370,1,25.0,25.0,21,2,21,2,11,546,413,3,3


In [28]:
#bike_data_frame.to_csv('bike_df_cluster.csv')

## Dataset 1: Cluster Gain

In [29]:
station_ID = []
gain_list = []
cluster_ID = []
day_of_month = []
hour_of_day = []

for i in range(0,32): # days of month 0-31
    for j in range(5,24): # hours of day 5 am - 11 pm
      #  for z in range(1,31): # pickup cluster 1 to 30
                new_frame = bike_data_frame[bike_data_frame['day_of_month'] == i]
                new_frame = new_frame[new_frame['start_hour'] == j]
              #  new_frame = new_frame[new_frame['pickup_cluster'] == z]

                n_gain = new_frame['pcluster'].value_counts(dropna=True, sort=True) #count unique values on the same row
                n_gain = -1*n_gain      
                p_gain = new_frame['dcluster'].value_counts(dropna=True, sort=True) 
                gain = pd.concat([p_gain, n_gain], axis=1)
                gain = gain.replace(np.nan, 0)
                tot_gain = gain['pcluster'] + gain['dcluster']
                tot_gain_ind = tot_gain.index.values


                for x in range(0,len(tot_gain)):
                   cluster_ID.append(tot_gain_ind[x])
                   gain_list.append(tot_gain.iloc[x])
                   day_of_month.append(i)
                   hour_of_day.append(j)
                #   cluster.append(z)



In [30]:
zippedList =  list(zip(cluster_ID, gain_list, day_of_month, hour_of_day))
dfObj = pd.DataFrame(zippedList, columns = ['cluster_id' , 'gain', 'day_of_month', 'hour_of_day']) 

dfObj = dfObj.sort_values(by=['cluster_id'])
dfObj

Unnamed: 0,cluster_id,gain,day_of_month,hour_of_day
0,0,8.0,1,5
8089,0,88.0,18,14
843,0,45.0,2,21
8064,0,55.0,18,13
8039,0,89.0,18,12
8014,0,92.0,18,11
7989,0,148.0,18,10
7963,0,305.0,18,9
7938,0,767.0,18,8
7912,0,395.0,18,7


In [31]:
df_x_cluster = dfObj.drop(['gain', 'hour_of_day'], axis=1)

df_y_cluster = dfObj.gain


In [32]:
y_cluster = np.array(df_y_cluster)
x_cluster = np.array(df_x_cluster)

print(x_cluster.shape, y_cluster.shape)

(14286, 2) (14286,)


In [33]:
x_train_cluster, x_test_cluster, y_train_cluster, y_test_cluster = train_test_split(x_cluster, y_cluster, random_state=5)

## Dataset 2: Station Gain

In [18]:
bike_data_frame = pd.read_csv("OD_2018-07.csv")
bike_data_frame = bike_data_frame.sort_values(by=['start_station_code'])
print(bike_data_frame.head())
print (bike_data_frame.shape)

              start_date  start_station_code          end_date  \
849566  2018-07-28 22:03                4000  2018-07-28 22:43   
784874  2018-07-26 21:28                4000  2018-07-26 21:56   
795083  2018-07-27 09:25                4000  2018-07-27 09:30   
768486  2018-07-26 14:35                4000  2018-07-26 15:05   
556753  2018-07-18 19:24                4000  2018-07-18 19:34   

        end_station_code  duration_sec  is_member  
849566              6111          2350          1  
784874              7067          1673          1  
795083              6391           303          1  
768486              7031          1805          0  
556753              6701           570          0  
(953031, 6)


In [19]:
bike_data_frame['pickup_cluster'] = bike_data_frame['start_station_code']\
                               .map(mydict) 
bike_data_frame['dropoff_cluster'] = bike_data_frame['end_station_code']\
                               .map(mydict) \

bike_data_frame['start_date'] = pd.to_datetime(bike_data_frame['start_date'], utc=True, format='%Y-%m-%d %H:%M')
bike_data_frame['end_date'] = pd.to_datetime(bike_data_frame['end_date'], utc=True, format='%Y-%m-%d %H:%M')
bike_data_frame['start_hour'] = bike_data_frame['start_date'].dt.hour
bike_data_frame['start_weekday'] = bike_data_frame['start_date'].dt.weekday
bike_data_frame['end_hour'] = bike_data_frame['end_date'].dt.hour
bike_data_frame['end_weekday'] = bike_data_frame['end_date'].dt.weekday
bike_data_frame['day_of_month'] = bike_data_frame['start_date'].dt.day
bike_data_frame.head()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month
849566,2018-07-28 22:03:00+00:00,4000,2018-07-28 22:43:00+00:00,6111,2350,1,8.0,8.0,22,5,22,5,28
784874,2018-07-26 21:28:00+00:00,4000,2018-07-26 21:56:00+00:00,7067,1673,1,8.0,8.0,21,3,21,3,26
795083,2018-07-27 09:25:00+00:00,4000,2018-07-27 09:30:00+00:00,6391,303,1,8.0,8.0,9,4,9,4,27
768486,2018-07-26 14:35:00+00:00,4000,2018-07-26 15:05:00+00:00,7031,1805,0,8.0,8.0,14,3,15,3,26
556753,2018-07-18 19:24:00+00:00,4000,2018-07-18 19:34:00+00:00,6701,570,0,8.0,8.0,19,2,19,2,18


In [36]:
# data filtering 
bike_data_frame = bike_data_frame[(bike_data_frame.start_hour>4) & (bike_data_frame.start_hour<24)]
bike_data_frame = bike_data_frame[(bike_data_frame.end_hour>4) & (bike_data_frame.end_hour<24)]
bike_data_frame = bike_data_frame[(bike_data_frame.duration_sec>0) & (bike_data_frame.duration_sec<7000)]
#bike_data_frame = bike_data_frame.drop(['start_date', 'end_date', 'is_member'], axis=1)

bike_data_frame.head()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month
849566,2018-07-28 22:03:00+00:00,4000,2018-07-28 22:43:00+00:00,6111,2350,1,19.0,19.0,22,5,22,5,28
784874,2018-07-26 21:28:00+00:00,4000,2018-07-26 21:56:00+00:00,7067,1673,1,19.0,0.0,21,3,21,3,26
795083,2018-07-27 09:25:00+00:00,4000,2018-07-27 09:30:00+00:00,6391,303,1,19.0,19.0,9,4,9,4,27
768486,2018-07-26 14:35:00+00:00,4000,2018-07-26 15:05:00+00:00,7031,1805,0,19.0,0.0,14,3,15,3,26
556753,2018-07-18 19:24:00+00:00,4000,2018-07-18 19:34:00+00:00,6701,570,0,19.0,0.0,19,2,19,2,18


In [20]:
start_loc = (bike_data_frame['start_station_code'].unique())
start_loc_mapping = dict(zip(start_loc, 
                                 range(0, len(start_loc) + 1)))

bike_data_frame['start_loc'] = bike_data_frame['start_station_code']\
                               .map(start_loc_mapping) \
                               .astype(int)
bike_data_frame['end_loc'] = bike_data_frame['end_station_code'] \
                               .map(start_loc_mapping) \
                               .astype(int)
bike_data_frame.tail()

Unnamed: 0,start_date,start_station_code,end_date,end_station_code,duration_sec,is_member,pickup_cluster,dropoff_cluster,start_hour,start_weekday,end_hour,end_weekday,day_of_month,start_loc,end_loc
204390,2018-07-07 22:36:00+00:00,10002,2018-07-07 22:38:00+00:00,6349,160,1,25.0,25.0,22,5,22,5,7,546,306
866649,2018-07-29 14:40:00+00:00,10002,2018-07-29 14:59:00+00:00,6159,1154,0,25.0,8.0,14,6,14,6,29,546,152
236630,2018-07-09 07:45:00+00:00,10002,2018-07-09 07:49:00+00:00,6921,285,1,25.0,25.0,7,0,7,0,9,546,454
694073,2018-07-23 12:20:00+00:00,10002,2018-07-23 12:40:00+00:00,6052,1249,0,25.0,8.0,12,0,12,0,23,546,56
332334,2018-07-11 21:45:00+00:00,10002,2018-07-11 21:51:00+00:00,6727,370,1,25.0,25.0,21,2,21,2,11,546,413


In [21]:
bike_data_frame = bike_data_frame.drop(['start_date', 'end_date', 'is_member'], axis=1)

In [39]:
station_ID = []
gain_list = []
day_of_month = []
hour_of_day = []

for i in range(0,32): # days of month 0-31
    for j in range(5,24): # hours of day 5 am - 11 pm
        new_frame = bike_data_frame[bike_data_frame['day_of_month'] == i]
        new_frame = new_frame[new_frame['start_hour'] == j]
        
        n_gain = new_frame['start_loc'].value_counts(dropna=True, sort=True)
        n_gain = -1*n_gain      
        p_gain = new_frame['end_loc'].value_counts(dropna=True, sort=True)
        gain = pd.concat([p_gain, n_gain], axis=1)
        gain = gain.replace(np.nan, 0)
        tot_gain = gain['end_loc'] + gain['start_loc']
        tot_gain_ind = tot_gain.index.values
        
        for x in range(0,len(tot_gain)):
            station_ID.append(tot_gain_ind[x])
            gain_list.append(tot_gain.iloc[x])
            day_of_month.append(i)
            hour_of_day.append(j)

#print(gain.to_string())
#print(p_gain.to_string())
#print(n_gain.to_string())
#print(negative_gain)
#station_ID = np.array(negative_gain)
#print(station_ID)

zippedList =  list(zip(station_ID, gain_list, day_of_month, hour_of_day))
dfObj = pd.DataFrame(zippedList, columns = ['station_id' , 'gain', 'dom', 'hod']) 
dfObj.tail()

Unnamed: 0,station_id,gain,dom,hod
262805,539,3.0,31,23
262806,541,1.0,31,23
262807,543,1.0,31,23
262808,544,1.0,31,23
262809,546,1.0,31,23


In [23]:
df_x = bike_data_frame.drop('end_station_code', axis=1)
df_y = bike_data_frame.end_station_code

y = np.array(df_y)
x = np.array(df_x)

In [40]:
df_x = dfObj.drop(['gain'], axis=1)
df_y = dfObj.gain

# turn x and y into arrays
y = np.array(df_y)
x = np.array(df_x)

print(x.shape, y.shape)

(262810, 3) (262810,)


In [24]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=5)
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(714773, 11) (238258, 11)
(714773,) (238258,)


## Neural Network Models for Station Gain

## Model 1

In [26]:
# NEURAL NETWORK
# ----------------------------------------------------
n_col = df_x.shape[1]
print(n_col)

import keras
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras import optimizers

import keras.backend as K

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def model() :   #Created a Model using Keras
    model = keras.models.Sequential()
    model.add(Dropout(0.5,input_shape=(n_col,)))
    model.add(BatchNormalization())
    model.add(Dense(512,activation='relu'))#512 neurons in input layer
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(256,activation='relu')) #256 neurons in hidden layer
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(128, activation='relu')) # 1 neuron in output layer
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(1)) #256 neurons in hidden layer
    
    nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
    adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
    rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    
    
    model.compile(optimizer=nadam,loss='mse', metrics=[rmse])
    return model

model = model()
estimator = model.fit(x=x_train,y=y_train, batch_size=1024, epochs=1, 
                    verbose=1, validation_data=(x, y), 
                    shuffle=True)

11
Train on 714773 samples, validate on 953031 samples
Epoch 1/1


## Model 2

In [27]:
n_col = df_x.shape[1]
print(n_col)

import keras
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation

import keras.backend as K

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def model() :   #Created a Model using Keras
    model = keras.models.Sequential()
    model.add(Dropout(0.5,input_shape=(n_col,)))
    model.add(BatchNormalization())
    model.add(Dense(1024,activation='relu'))#512 neurons in input layer
    model.add(Dropout(0.5))
    model.add(Dense(512,activation='relu'))#512 neurons in input layer
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(256,activation='relu')) #256 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(128,activation='relu'))  # 128 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(64,activation='relu'))   # 64 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(32,activation='relu'))   # 32 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(16,activation='relu')) # 16 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(8,activation='relu')) # 8 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(1)) # 1 neuron in output layer
    
    nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    #adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
    #adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
    #rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    
    #sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True,clipnorm=0.5)

    model.compile(optimizer=nadam,loss='mse', metrics=[rmse])
    return model


model = model()
estimator = model.fit(x=x_train,y=y_train, batch_size=1024, epochs=1, 
                    verbose=1, validation_data=(x, y), 
                    shuffle=True)

11
Train on 714773 samples, validate on 953031 samples
Epoch 1/1
 79872/714773 [==>...........................] - ETA: 2:07 - loss: nan - rmse: nan

KeyboardInterrupt: 

## Model 3

In [None]:
n_col = df_x.shape[1]
print(n_col)

#x_train=x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
#x_test=x_test.reshape(x_test.shape[0], 1, x_test.shape[1])

import keras
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras import optimizers

import keras.backend as K

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def model() :   #Created a Model using Keras
    model = keras.models.Sequential()
    model.add(Dropout(0.5,input_shape=(n_col, )))
    model.add(BatchNormalization())
    model.add(Dense(1024,activation='relu'))#512 neurons in input layer
    model.add(Dropout(0.5))
    model.add(Dense(512,activation='relu'))#512 neurons in input layer
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(256,activation='relu')) #256 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(128,activation='relu'))  # 128 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(64,activation='relu'))   # 64 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(32,activation='relu'))   # 32 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(16,activation='relu')) # 16 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(8,activation='relu')) # 8 neurons in hidden layer
    model.add(BatchNormalization())
    model.add(Dense(1)) # 1 neuron in output layer
    
    nadam = optimizers.Nadam(lr=0.004, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
    adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
    rms = optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0)
    
    #sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True,clipnorm=0.5)

    model.compile(optimizer=rms,loss='mse', metrics=[rmse])
    return model


model = model()
estimator = model.fit(x=x_train,y=y_train, batch_size=2048, epochs=3, 
                    verbose=1, validation_data=(x_test, y_test),  shuffle=True)

In [None]:
testPredict= model.predict(x_test)

fig, ax = plt.subplots()
ax.scatter(y_test, testPredict)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

## RNN

## Model 1

In [28]:
n_col_x = df_x.shape
n_col_y = df_y.shape

print(n_col_x)

(953031, 11)


In [29]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN

x_train=x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
x_test=x_test.reshape(x_test.shape[0], 1, x_test.shape[1])

print (x_train.shape)
print(x_test.shape)

model = Sequential()
model.add(SimpleRNN(units=64, input_shape=(x_train.shape[1:]), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[rmse])
model.summary()

model.fit(x_train,y_train, epochs=1, batch_size=16, verbose=2)
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = (x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print(trainScore)
print("test score:", testScore)

SyntaxError: invalid syntax (<ipython-input-29-28f2214870f3>, line 22)

## Model 2

In [None]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN


x_train=x_train.reshape(x_train.shape[0], 1,x_train.shape[1])
print (x_train.shape)
print(x_test.shape)
x_test=x_test.reshape(x_test.shape[0], 1, x_test.shape[1])

model = Sequential()
model.add(SimpleRNN(units=128, input_shape=(x_train.shape[1:]), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[rmse])
model.summary()

model.fit(x_train,y_train, epochs=4, batch_size=16, verbose=2)
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)

predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)

print("train score:",trainScore)
print("test score:", testScore)

print(x_test.shape)
print(testPredict.shape)

(714773, 1, 11)
(238258, 11)
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 128)               17920     
_________________________________________________________________
dense_18 (Dense)             (None, 8)                 1032      
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 9         
_________________________________________________________________
dense_20 (Dense)             (None, 8)                 16        
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 9         
_________________________________________________________________
dense_22 (Dense)             (None, 8)                 16        
_________________________________________________________________
dense_23 (Dense)         

## Model 3

In [None]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Dropout

#x_train=x_train.reshape(x_train.shape[0], 1,x_train.shape[1])
#x_test=x_test.reshape(x_test.shape[0], 1, x_test.shape[1])
print (x_train.shape)
print(x_test.shape)

model = Sequential()
model.add(SimpleRNN(units=128, input_shape=(x_train.shape[1:]), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[rmse])
model.summary()

model.fit(x_train,y_train, epochs=1, batch_size=16, verbose=2)
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

print(x_test.shape)
print(testPredict.shape)

In [None]:
fig, ax = plt.subplots()
ax.scatter(y_test, testPredict)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
print (testPredict)

## LSTM

In [None]:
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense 
import keras.backend as K
from keras.callbacks import EarlyStopping


In [None]:
K.clear_session() 
model = Sequential() # Sequential Model
model.add(LSTM(20, input_shape=(x_train.shape[1:]))) # (timestep, feature)
model.add(Dense(1)) # output = 1
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[rmse])
model.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)
model.fit(x_train, y_train, epochs=5, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
#x_train=x_train.reshape(x_train.shape[0], 1,x_train.shape[1])
#x_test=x_test.reshape(x_test.shape[0], 1, x_test.shape[1])
print (x_train.shape)
print(x_test.shape)

model1 = Sequential() # Sequential Model
model1.add(LSTM(20, input_shape=(x_train.shape[1:]))) # (timestep, feature)
model1.add(Dense(1)) # output = 1
# Fully connected layer
model1.add(Dense(1, activation='relu'))
# Dropout for regularization
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='relu'))
# Dropout for regularization
model1.add(Dropout(0.5))
model1.compile(loss='mean_squared_error', optimizer='adam', metrics=[rmse])

model1.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(x_train, y_train, epochs=5, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
#RMSprop optimizer
from keras.optimizers import RMSprop

model1 = Sequential()
model1.add(LSTM(20, input_shape=(x_train.shape[1:]))) 
model1.add(Dense(1))
model1.add(Dense(1, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='relu'))
model1.add(Dropout(0.5))


rmsprop = RMSprop(lr=0.00001, rho=0.9, epsilon=1e-08)
model1.compile(loss='mean_squared_error', optimizer=rmsprop, metrics=[rmse] )

model1.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(x_train, y_train, epochs=5, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
#RMSprop optimizer
from keras.optimizers import RMSprop

model1 = Sequential() # Sequential Model
model1.add(LSTM(100, input_shape=(x_train.shape[1:]))) # (timestep, feature)
model1.add(Dense(1)) # output = 1
model1.add(Dense(1, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='relu'))
model1.add(Dropout(0.5))

rmsprop = RMSprop(lr=10, rho=9, epsilon=1e-08)
model1.compile(loss='mean_squared_error', optimizer=rmsprop, metrics=[rmse] )
model1.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(x_train, y_train, epochs=2, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
#ADAM optimizer
from keras.optimizers import Adam

model = Sequential()
model.add(LSTM(100, input_shape=(x_train.shape[1:]))) 
model.add(Dense(1)) 
model.add(Dense(1, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='relu'))
model.add(Dropout(0.5))

adam = keras.optimizers.Adam( lr=10, beta_1=9, beta_2=0.999, amsgrad=False)
model.compile(loss='mean_squared_error', optimizer=adam, metrics=[rmse] )

model.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(x_train, y_train, epochs=2, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train)
testPredict= model.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train, y_train, verbose=0)
testScore = model.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
#ADAM optimizer
print (x_train.shape)
from keras.optimizers import Adam

model1 = Sequential() # Sequential Model
model1.add(LSTM(100, input_shape=(x_train.shape[1:]))) # (timestep, feature)
model1.add(Dense(1)) # output = 1
model1.add(Dense(1, activation='tanh'))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='tanh'))
model1.add(Dropout(0.5))
model1.add(Dense(1)) # output = 1
model1.add(Dense(1, activation='tanh'))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='tanh'))
model1.add(Dropout(0.5))


adam = keras.optimizers.Adam( lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
model1.compile(loss='mean_squared_error', optimizer=rms, metrics=[rmse])

model1.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model1.fit(x_train, y_train, epochs=10, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model1.predict(x_train)
testPredict= model1.predict(x_test)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model1.evaluate(x_train, y_train, verbose=0)
testScore = model1.evaluate(x_test, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

In [None]:
nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)

## Neural Network Models for Cluster Gain

## DNN

In [72]:
# NEURAL NETWORK
# ----------------------------------------------------
n_col = df_x_cluster.shape[1]
print(n_col)

import keras
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras import optimizers

import keras.backend as K

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def model() :   #Created a Model using Keras
    model = keras.models.Sequential()
    model.add(Dropout(0.5,input_shape=(n_col,)))
    model.add(BatchNormalization())
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(256,activation='relu')) 
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(1)) 
    
    nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
    adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
    rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    
    
    model.compile(optimizer=nadam,loss='mse', metrics=[rmse])
    return model

model = model()
#estimator = model.fit(x=x_train_cluster,y=y_train_cluster, batch_size=1024, epochs=1, 
#                    verbose=1, validation_data=(x_cluster, y_cluster), 
#                    shuffle=True)
estimator = model.fit(x=x_train_cluster,y=y_train_cluster, batch_size=1024, epochs=1, 
                    verbose=1, validation_data=(x_cluster, y_cluster), 
                    shuffle=True)
#model.fit(x_train_cluster,y_train_cluster, epochs=3, batch_size=16, verbose=2)
#trainPredict = model.predict(x_test_cluster)

2
Train on 10603 samples, validate on 14138 samples
Epoch 1/1


In [73]:
# NEURAL NETWORK
# ----------------------------------------------------
n_col = df_x_cluster.shape[1]
print(n_col)

import keras
from keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras import optimizers

import keras.backend as K

def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def model() :   #Created a Model using Keras
    model = keras.models.Sequential()
    model.add(Dropout(0.5,input_shape=(n_col,)))
    model.add(BatchNormalization())
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(256,activation='relu')) 
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(128, activation='relu')) 
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(1)) 
    
    nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    adadelta =optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
    adgrad = optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)
    rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    
    
    model.compile(optimizer=nadam,loss='mse', metrics=[rmse])
    return model

model = model()
estimator = model.fit(x=x_train_cluster,y=y_train_cluster, batch_size=2048, epochs=3, 
                    verbose=1, validation_data=(x_cluster, y_cluster), 
                    shuffle=True)

2
Train on 10603 samples, validate on 14138 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


## RNN

In [48]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Dropout

x_train_cluster=x_train_cluster.reshape(x_train_cluster.shape[0], 1,x_train_cluster.shape[1])
x_test_cluster=x_test_cluster.reshape(x_test_cluster.shape[0], 1, x_test_cluster.shape[1])
print (x_train_cluster.shape)
print(x_test_cluster.shape)

model = Sequential()
model.add(SimpleRNN(units=128, input_shape=(x_train_cluster.shape[1:]), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.add(Dropout(0.5))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[rmse])
model.summary()

model.fit(x_train_cluster,y_train_cluster, epochs=3, batch_size=16, verbose=2)
trainPredict = model.predict(x_train_cluster)
testPredict= model.predict(x_test_cluster)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train_cluster, y_train_cluster, verbose=0)
testScore = model.evaluate(x_test_cluster, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

print(x_test_cluster.shape)
print(testPredict.shape)

(10603, 1, 2)
(3535, 1, 2)
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 128)               16768     
_________________________________________________________________
dense_24 (Dense)             (None, 8)                 1032      
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 9         
_________________________________________________________________
dropout_19 (Dropout)         (None, 1)                 0         
_________________________________________________________________
dense_26 (Dense)             (None, 8)                 16        
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 9         
_________________________________________________________________
dropout_20 (Dropout)       

## LSTM

In [35]:
#ADAM optimizer
from keras.optimizers import Adam
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense 
import keras.backend as K
from keras.callbacks import EarlyStopping

#x_train_cluster=x_train_cluster.reshape(x_train_cluster.shape[0], 1,x_train_cluster.shape[1])
#x_test_cluster=x_test_cluster.reshape(x_test_cluster.shape[0], 1, x_test_cluster.shape[1])
print (x_train_cluster.shape)
print(x_test_cluster.shape)

model = Sequential()
model.add(LSTM(100, input_shape=(x_train_cluster.shape[1:]))) 
model.add(Dense(1)) 
model.add(Dense(1, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='relu'))
model.add(Dropout(0.5))

adam = keras.optimizers.Adam( lr=10, beta_1=9, beta_2=0.999, amsgrad=False)
model.compile(loss='mean_squared_error', optimizer=adam, metrics=[rmse] )

model.summary()
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(x_train_cluster, y_train_cluster, epochs=2, batch_size=30, verbose=1, callbacks=[early_stop])
              
trainPredict = model.predict(x_train_cluster)
testPredict= model.predict(x_test_cluster)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

trainScore = model.evaluate(x_train_cluster, y_train_cluster, verbose=0)
testScore = model.evaluate(x_test_cluster, testPredict, verbose=0)
print("train score:",trainScore)
print("test score:", testScore)

(10603, 1, 2)
(3535, 1, 2)
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100)               41200     
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 101       
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 2         
_________________________________________________________________
dropout_13 (Dropout)         (None, 1)                 0         
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 2         
_________________________________________________________________
dropout_14 (Dropout)         (None, 1)                 0         
Total params: 41,305
Trainable params: 41,305
Non-trainable params: 0
_______________________