In [1]:
import cycledata as cd
import pandas as pd
import numpy as np
%matplotlib inline

In [None]:
# Import seperated weeks for all years
WD = {}
WE = {}
WD, WE = cd.Import('split')

In [None]:
# Import daily counts for all years
WDdays = {}
WEdays = {}
WDdays, WEdays = cd.Import('daily')

In [None]:
# Combine all dailycounts into complete frame for multi-year analysis
WDs, WEs = cd.JoinYears(WDdays, WEdays)
WDs.to_csv(cd.wd + '\Features\\FullYearWDs.csv')
WEs.to_csv(cd.wd + '\Features\\FullYearWEs.csv')

In [2]:
# Import Full Years (non-seperated weeks)
FullYear = {}
FullYear = cd.Import('fullyear')

Reading: 2012FullYear.csv
Reading: 2013FullYear.csv
Reading: 2014FullYear.csv
Reading: 2015FullYear.csv
Reading: 2016FullYear.csv


In [None]:
# Find most recent 2 weeks
recent = cd.RecentFull(FullYear)

In [None]:
# Find most recent 4 weeks of data to Model
recentWD, recentWE = cd.Recent(WD, WE)

In [None]:
# Model subset of data for individual station
FullModel = cd.Model(recent, 14)
FullModel.PreProcess()
FullModel.WD.plot(figsize=(16,12))

In [None]:
# Adjust any overlaps within the year frames
FY = {}
WD = {}
WE = {}
FY, WD, WE = cd.AdjustOverlap(FullYear)

In [None]:
# Create daily average of yearframes
WDdaily, WEdaily = cd.YearsDaily(WD, WE)

In [None]:
# Fetch AddressBook and stations missing from addressbook
addressbook, missingStations = cd.GetAddressBook()

In [None]:
# Read AddressBook 
addressbook = pd.read_csv("Bike_Stations.csv")

In [None]:
# Create Distances matrix (WARNING: long runtime ~30mins)
distances = cd.GetDistances(addressbook)

In [None]:
# Read Distances matrix
distances = pd.read_csv("Bike_Station_Distances.csv", index_col=0)

In [None]:
# Create test dataframe of a single day from recent (may need to adjust date)
testday = recent['28-2-2016'].copy().reset_index(drop=True)

In [None]:
# Create adjacency matrix of testday 08:00 - 09:00
adjacency, am_peak = cd.Transform(testday, 32, 36)

In [None]:
# Calculate average speeds for testday (WARNING: can be long runtime)
updated = cd.CalcSpeeds(testday)

In [None]:
# Run SARIMA model of Weekdays using most recent dataframe
output, absent, errors = cd.RunWDModel(recent)

In [42]:
# Import historic values to compare against forecasts
historic = FullYear['2015'].copy()
historic.set_index('s_date', inplace=True)
historic = historic['12-07-2015':'12-10-2015'].copy()
historic.reset_index(inplace=True)
# Import Model Forecasts and filter 
modelOut = pd.read_csv('ModelOutput.csv', index_col=0, parse_dates=[2])
row = modelOut.Type == 'Forecast'
predictions = modelOut.loc[row, :]
mse = []
for x in cd.station_range:
    # Create series of prediciton values for station x
    row = predictions.ID == x
    x_predictions = predictions.loc[row, ['count_diff', 'DateTime']]
    x_predictions.set_index(x_predictions.DateTime, drop=True, inplace=True)
    del x_predictions['DateTime']
    del x_predictions.index.name
    # Format series for historic values of station x
    test = cd.Model(historic, x)
    test.PreProcess(separate=True)
    # Calculate Mean Squared Errors
    Errors = pd.Series(x_predictions.count_diff - test.WD.count_diff)
    SqErrors = Errors.apply(lambda y: y**2)
    if ~np.isnan(SqErrors.mean()):
        mse.append(SqErrors.mean())
total_mse = np.mean(mse)