In [44]:
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from torch import nn
import torch
import numpy as np
from torch.autograd import Variable 

In [45]:
mobi_data = pd.read_csv("../data/Mobi_System_Data_2020.csv", compression='zip').dropna()
geo_data = pd.read_csv('../data/geocodings.csv',index_col=0)
data = mobi_data.merge(geo_data.rename({'lat':'Departure lat','long':'Departure long'},axis=1),left_on='Departure station',right_on='address').drop('address',axis=1)
data = data.merge(geo_data.rename({'lat':'Return lat','long':'Return long'},axis=1),left_on='Return station',right_on='address').drop('address',axis=1)
data.shape
# data = data.loc[data["Departure postal code"].str.startswith("V6")]
print(data.columns)
data["time"] = pd.to_datetime(data["Departure"], format="%Y-%m-%d %H:%M:%S")
data["hour"] = data["time"].dt.hour
data["day"] = data["time"].dt.day
data["month"] = data["time"].dt.month
data["year"] = data["time"].dt.year

# train_data = data[["year", "month", "day", "hour", "Departure lat", "Departure long"]]
features_departure = ["month", "day", "hour", "Departure lat", "Departure long"]
counts_data = data.groupby(["month", "day", "hour", "Departure lat", "Departure long", "Departure station"]).size().reset_index(name='counts')




Index(['Unnamed: 0', 'Departure', 'Return', 'Bike', 'Departure station',
       'Return station', 'Membership type', 'Covered distance (m)',
       'Duration (sec.)', 'Departure battery voltage (mV)',
       'Return battery voltage (mV)', 'Departure temperature (C)',
       'Return temperature (C)', 'Stopover duration (sec.)',
       'Number of stopovers', 'postal_code_x', 'Departure lat',
       'Departure long', 'postal_code_y', 'Return lat', 'Return long'],
      dtype='object')


In [46]:
stations = data[["Departure lat", "Departure long", "Departure station"]].drop_duplicates()
max_lat, min_lat = stations["Departure lat"].max(), stations["Departure lat"].min()
max_long, min_long = stations["Departure long"].max(), stations["Departure long"].min()
batch = stations.shape[0]
print(batch)
predict_data_departure = pd.DataFrame(data={
    "month": [], 
    "day": [], 
    "hour": [], 
    "Departure lat": [],
    "Departure long": []})
for row in stations.iterrows():
    time = pd.date_range('2020-01-01', periods=8760, freq='H')

    station_data = pd.DataFrame(data={"time": time})
    station_data["hour"] = station_data["time"].dt.hour
    station_data["day"] = station_data["time"].dt.day
    station_data["month"] = station_data["time"].dt.month
    station_data["Departure lat"] = (row[1]["Departure lat"] - min_lat) / (max_lat - min_lat)
    station_data["Departure long"] = (row[1]["Departure long"] - min_long) / (max_long - min_long)
    station_data["Departure station"] = row[1]["Departure station"]
    station_data = station_data.drop(columns=["time"])
    
    predict_data_departure = pd.concat([predict_data_departure, station_data])

train_data = predict_data_departure.merge(counts_data[["month", "day", "hour", "Departure station", "counts"]], how="left", left_on=["month", "day", "hour", "Departure station"], right_on=["month", "day", "hour", "Departure station"])
train_data["counts"] = train_data["counts"].fillna(0)
train_data["month"] = train_data["month"] / 12
train_data["day"] = train_data["day"] / 31
train_data["hour"] = train_data["hour"] / 23
# train_data = train_data.sort_values(by="Departure station")
print(train_data)
x = train_data[features_departure]
y = train_data["counts"]

221
            month       day      hour  Departure lat  Departure long  \
0        0.083333  0.032258  0.000000       0.693601        0.000627   
1        0.083333  0.032258  0.043478       0.693601        0.000627   
2        0.083333  0.032258  0.086957       0.693601        0.000627   
3        0.083333  0.032258  0.130435       0.693601        0.000627   
4        0.083333  0.032258  0.173913       0.693601        0.000627   
...           ...       ...       ...            ...             ...   
1935955  1.000000  0.967742  0.826087       0.284948        0.841722   
1935956  1.000000  0.967742  0.869565       0.284948        0.841722   
1935957  1.000000  0.967742  0.913043       0.284948        0.841722   
1935958  1.000000  0.967742  0.956522       0.284948        0.841722   
1935959  1.000000  0.967742  1.000000       0.284948        0.841722   

                                Departure station  counts  
0        0099 Vancouver Art Gallery - North Plaza     1.0  
1        00

In [47]:
class LSTM(nn.Module):
    def __init__(self, input_size=5, hidden_layer_size=5, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1, batch, self.hidden_layer_size),
                            torch.zeros(1, batch, self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
        predictions = self.linear(lstm_out)
        return predictions

In [51]:
train_x = torch.tensor(x.to_numpy()).float()
train_y = torch.tensor(y.to_numpy()).float()
l = train_x.shape[0]
train_x = train_x.reshape((batch, int(l / batch), 5))
train_y = train_y.reshape((batch, int(l / batch), 1))

model = LSTM()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
for e in range(20):
    model.zero_grad()
    model.hidden_cell = (torch.zeros(1, batch, model.hidden_layer_size), torch.zeros(1, batch, model.hidden_layer_size))
    out = model(train_x)
    loss = criterion(out, train_y)
    loss.backward()
    print('Loss:',loss.item())
    optimizer.step()

Loss: 0.7678512334823608
Loss: 0.7913770079612732
Loss: 0.7585370540618896
Loss: 0.7612772583961487
Loss: 0.7645926475524902
Loss: 0.7616039514541626
Loss: 0.7573513388633728
Loss: 0.7548993229866028
Loss: 0.7549518346786499
Loss: 0.7564924359321594
Loss: 0.7578408718109131
Loss: 0.7579511404037476
Loss: 0.7569201588630676
Loss: 0.7555580735206604


KeyboardInterrupt: 

In [50]:
predict_data_departure = pd.DataFrame(data={
    "month": [], 
    "day": [], 
    "hour": [], 
    "Departure lat": [],
    "Departure long": [],})
for row in stations.iterrows():
    time = pd.date_range('2022-01-01', periods=8760, freq='H')

    station_data = pd.DataFrame(data={"time": time})
    station_data["hour"] = station_data["time"].dt.hour / 23
    station_data["day"] = station_data["time"].dt.day / 31
    station_data["month"] = station_data["time"].dt.month / 12
    station_data["Departure lat"] = (row[1]["Departure lat"] - min_lat) / (max_lat - min_lat)
    station_data["Departure long"] = (row[1]["Departure long"] - min_long) / (max_long - min_long)
    station_data["Departure station"] = row[1]["Departure station"]
    station_data = station_data.drop(columns=["time"])
    
    predict_data_departure = pd.concat([predict_data_departure, station_data])
predict_x = predict_data_departure[features_departure].to_numpy()
tensor_x = torch.tensor(predict_x).float()
tensor_x = tensor_x.reshape((batch, int(tensor_x.shape[0] / batch), 5))
with torch.no_grad():
        model.hidden = (torch.zeros(1, batch, model.hidden_layer_size), torch.zeros(1, batch, model.hidden_layer_size))
        output = model(tensor_x)
predict_data_departure["counts"] = torch.flatten(output)
predict_data_departure = predict_data_departure[["Departure station", "counts"]].groupby("Departure station").sum().sort_values("counts")
print(predict_data_departure)
test = counts_data[["Departure station", "counts"]].groupby("Departure station").sum().sort_values("counts")
print(test)

                                                  counts
Departure station                                       
0995 Workshop - On Deck                      1713.635254
0981 Workshop - Service Complete             1761.683228
0245 Woodland & 10th                         1825.209595
0215 Princess & Union                        1837.238159
0281 Windsor & 14th                          1867.220703
...                                                  ...
0209 Stanley Park - Information Booth        2558.562744
0105 Stanley Park - Totem Poles              2558.592773
0101 Stanley Park - Vancouver Aquarium       2558.706543
0103 Stanley Park - Third Beach Parking Lot  2559.119141
0206 8th & Scotia                            2735.822754

[221 rows x 1 columns]
                                        counts
Departure station                             
0995 Workshop - On Deck                      2
0985 Yard - Long Term Storage                2
0986 Quebec Yard - Serviced                  5
