In [109]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import scipy.stats as st
import pickle
import utils
import moran_auto
import gmm
import kmeans_utils
import figure_functions
from sklearn.preprocessing import MinMaxScaler
from sklearn import mixture
from sklearn.cluster import KMeans
import csv
from scipy.misc import imread
from collections import defaultdict
from matplotlib.patches import Ellipse
from matplotlib.patches import Polygon
import glob
from pandas.tseries.holiday import USFederalHolidayCalendar
import datetime
import gmplot
import matplotlib.cm as cm

In [67]:
curr_dir = os.getcwd()
data_path = curr_dir + '/../data'
fig_path = curr_dir + '/../figs'
results_path = curr_dir + '/../results'
animation_path = curr_dir + '/../animation'
belltown_path = data_path + '/Belltown_Hour'
commcore_path = data_path + '/CommercialCore_Hour'
pikepine_path = data_path + '/PikePine_Hour'

In [78]:
with open(os.path.join(data_path, 'bloclocs.pck'), 'rb') as f:
    locations = pickle.load(f)

In [177]:
gmap = gmplot.GoogleMapPlotter(47.612676, -122.345028, 16)
block_info = pd.read_csv(os.path.join(data_path, 'Blockface.csv'))
dirs = [belltown_path, commcore_path, pikepine_path]
colors = iter(['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'])
area_dict = {}

for directory in dirs:

    os.chdir(directory)

    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2000-01-01', end=datetime.datetime.now().date()).to_pydatetime()
    holidays = [hol.date() for hol in holidays]

    avg_loads = []
    gps_loc = []
    park_data = {}

    for fi in glob.glob('*.csv'):
        key = int(fi.split('.')[0])
        
        
        if key in locations:
            if len(locations[key]) == 12:
                print key, len(locations[key])
            curr_block = locations[key]
            
            lat1, lat2 = curr_block[1], curr_block[-2]
            lon1, lon2 = curr_block[0], curr_block[-3]
            
            mid_lat = (lat1 + lat2)/2.
            mid_long = (lon1 + lon2)/2.
            gps_loc.append([mid_lat, mid_long])
            
            areas = block_info.loc[block_info['ElementKey'] == key]['PaidParkingArea'].unique().tolist()
            
            if np.nan in areas:
                areas.remove(np.nan)
            
            area = areas[0]
            
            if area not in area_dict:
                area_dict[area] = next(colors)
            
            color_choice = area_dict[area]
            
            gmap.plot([lat1, lat2], [lon1, lon2], color=color_choice, edge_width=4)
        else:
            continue
        
        block_data = pd.read_csv(fi, names=['Datetime', 'Load'])

        # Clipping the loads to be no higher than 1.5
        block_data['Load'] = block_data['Load'].clip_upper(1.5)
        block_data['Datetime'] = pd.to_datetime(block_data['Datetime'])

        block_data.sort_values(by='Datetime', inplace=True)
        block_data.reset_index(inplace=True, drop=True)

        block_data['Date'] = block_data['Datetime'].dt.date
        block_data['Hour'] = block_data['Datetime'].dt.hour
        block_data['Day'] = block_data['Datetime'].dt.weekday

        # Getting rid of Sunday since there is no paid parking.
        block_data = block_data.loc[block_data['Day'] != 6]

        # Dropping the days where the total parking is 0.
        block_data = block_data.loc[~block_data['Date'].isin(holidays)]
        block_data.reset_index(inplace=True, drop=True)

        park_data[key] = block_data

        # Getting the average load for each hour of the week for the block.
        avg_load = block_data.groupby(['Day', 'Hour'])['Load'].mean().values.reshape((1,-1))
        avg_loads.append(avg_load)



    avg_loads = np.vstack((avg_loads))
    gps_loc = np.vstack((gps_loc))

    index = park_data[key].groupby(['Day', 'Hour']).sum().index

    days = index.get_level_values(0).unique().values
    days = np.sort(days)

    hours = index.get_level_values(1).unique().values
    hours = np.sort(hours)

    idx_to_day_hour = {i*len(hours) + j:(days[i], hours[j]) for i in range(len(days)) 
                                                            for j in range(len(hours))}
    day_hour_to_idx = {v:k for k,v in idx_to_day_hour.items()}

    P = len(idx_to_day_hour)

    os.chdir(curr_dir)

gmap.draw('test2.html')

54005 12
54006 12
135261


In [170]:
elkeys = park_data.keys()

In [128]:
blocks = pd.read_csv(os.path.join(data_path, 'Blockface.csv'))
blocks = blocks.loc[blocks['PaidParkingArea'] == 'Belltown']
blocks.ParkingSpaces.unique()

array([  6.,   3.,   0.,  11.,   8.,   7.,   9.,   4.,  15.,  13.,  10.,
        16.,  19.,   5.,  12.,  22.,   2.,  14.,   1.,  17.,  20.])

In [131]:
check = pd.read_csv(os.path.join(data_path, 'Blockface.csv'))

In [133]:
check['PaidParkingArea'].unique()

array(['Green Lake', 'Fremont', 'Uptown Triangle', 'Belltown', nan,
       'Ballard', 'University District', 'Uptown', 'Westlake Ave N',
       'Denny Triangle', 'Pike-Pine', 'Pioneer Square', 'Commercial Core',
       'Cherry Hill', '12th Avenue', 'Capitol Hill', 'Chinatown/ID',
       'South Lake Union', 'First Hill', 'Roosevelt', 'Ballard Locks'], dtype=object)

In [137]:
df = pd.read_csv(os.path.join(data_path, 'pay_station.csv'))
df = df.loc[df['PAIDAREA'] == 'Belltown']

In [138]:
col = {df.columns.tolist()[i]:i for i in xrange(len(df.columns.tolist()))}

In [140]:
df_important = df[['ELMNTKEY', 'PAIDAREA', 'SUBAREA', 'WKD_RATE1', 'WKD_START1', 'WKD_END1', 
                   'WKD_RATE2', 'WKD_START2', 'WKD_END2', 'WKD_RATE3', 'WKD_START3', 'WKD_END3', 
                   'SAT_RATE1', 'SAT_START1', 'SAT_END1', 'SAT_RATE2', 'SAT_START2', 'SAT_END2', 
                   'SAT_RATE3', 'SAT_START3', 'SAT_END3', 'START_TIME_WKD', 'END_TIME_WKD', 
                   'START_TIME_SAT', 'END_TIME_SAT', 'SHAPE_LAT', 'SHAPE_LNG']]

In [181]:
df_important['SUBAREA'].unique().tolist()

['South', 'North']

In [141]:
df_north = df_important[df_important['SUBAREA'] == 'North']
df_south = df_important.loc[df_important['SUBAREA'] == 'South']

In [142]:
df.head()

Unnamed: 0,OBJECTID,Shape,COMPKEY,ELMNTKEY,COMPTYPE,SEGKEY,DISTANCE,WIDTH,UNITID,UNITTYPE,...,SUN_END3,START_TIME_WKD,END_TIME_WKD,START_TIME_SAT,END_TIME_SAT,START_TIME_SUN,END_TIME_SUN,PEAK_HOUR,SHAPE_LNG,SHAPE_LAT
6,7,"(47.61202450600007, -122.34154800899995)",529576,35909,109,13311,113.5,32.0,PSL-J072,PSL,...,,08AM,08PM,08AM,08PM,,,,-122.341548,47.612025
17,18,"(47.613484723000056, -122.33965395099995)",529626,13305,109,13313,178.0,31.0,PSL-J032,PSL,...,,08AM,08PM,08AM,08PM,,,,-122.339654,47.613485
19,20,"(47.61302384900006, -122.34809164499995)",573950,13521,109,13509,209.0,29.0,PSL-B157,PSL,...,,08AM,08PM,08AM,08PM,,,,-122.348092,47.613024
29,30,"(47.61511158300004, -122.34834377799996)",282382,13345,109,13355,180.0,25.0,PSL-J273,PSL,...,,08AM,08PM,08AM,08PM,,,,-122.348344,47.615112
47,48,"(47.61789829700007, -122.34841041399994)",282134,28961,109,6144,94.0,32.0,PSL-J254,PSL,...,,08AM,08PM,08AM,08PM,,,,-122.34841,47.617898


# Times for Paid Parking

In [145]:
print 'North Times\n'
print 'Start time on weekdays', df_north['START_TIME_WKD'].unique().tolist()
print 'End time on weekdays', df_north['END_TIME_WKD'].unique().tolist()
print 'Start time on saturdays', df_north['START_TIME_SAT'].unique().tolist()
print 'End time on saturdays', df_north['END_TIME_SAT'].unique().tolist()

print '\nSouth Times\n'
print 'Start time on weekdays', df_south['START_TIME_WKD'].unique().tolist()
print 'End time on weekdays', df_south['END_TIME_WKD'].unique().tolist()
print 'Start time on saturdays', df_south['START_TIME_SAT'].unique().tolist()
print 'End time on saturdays', df_south['END_TIME_SAT'].unique().tolist()

North Times

Start time on weekdays ['08AM']
End time on weekdays ['08PM']
Start time on saturdays ['08AM']
End time on saturdays ['08PM']

South Times

Start time on weekdays ['08AM']
End time on weekdays ['08PM']
Start time on saturdays ['08AM']
End time on saturdays ['08PM']


# Rate Intervals

In [146]:
print 'North Rate Intervals\n\n'
print 'Time 1 Start', round(df_north['WKD_START1'].unique()/60.)
print 'Time 1 End', round(df_north['WKD_END1'].unique()/60.)
print 'Price 1', df_north['WKD_RATE1'].unique()
print '\n'

print 'Time 2 Start', round(df_north['WKD_START2'].unique()/60.)
print 'Time 2 End', round(df_north['WKD_END2'].unique()/60.)
print 'Price 2', df_north['WKD_RATE2'].unique().tolist()
print '\n'

print 'Time 3 Start', round(df_north['WKD_START3'].unique()/60.)
print 'Time 3 End', round(df_north['WKD_END3'].unique()/60.)
print 'Price 3', df_north['WKD_RATE3'].unique().tolist()
print '\n'

print 'South Rate Intervals\n\n'
print 'Time 1 Start', round(df_south['WKD_START1'].unique()/60.)
print 'Time 1 End', round(df_south['WKD_END1'].unique()/60.)
print 'Price 1', df_south['WKD_RATE1'].unique()
print '\n'

print 'Time 2 Start', round(df_south['WKD_START2'].unique()/60.)
print 'Time 2 End', round(df_south['WKD_END2'].unique()/60.)
print 'Price 2', df_south['WKD_RATE2'].unique().tolist()
print '\n'

print 'Time 3 Start', round(df_south['WKD_START3'].unique()/60.)
print 'Time 3 End', round(df_south['WKD_END3'].unique()/60.)
print 'Price 3', df_south['WKD_RATE3'].unique().tolist()
print '\n'

North Rate Intervals


Time 1 Start 8.0
Time 1 End 11.0
Price 1 [ 1.]


Time 2 Start 11.0
Time 2 End 17.0
Price 2 [1.5]


Time 3 Start 17.0
Time 3 End 20.0
Price 3 [1.5]


South Rate Intervals


Time 1 Start 8.0
Time 1 End 11.0
Price 1 [ 2.5]


Time 2 Start 11.0
Time 2 End 17.0
Price 2 [2.5]


Time 3 Start 17.0
Time 3 End 20.0
Price 3 [2.5]




In [147]:
north_keys = df_north['ELMNTKEY'].values
south_keys = df_south['ELMNTKEY'].values

In [148]:
params = utils.load_data(data_path)
gps_loc, avg_loads, park_data, N, P, idx_to_day_hour, day_hour_to_idx = params
new_park_data = utils.load_daily_data(park_data)

In [150]:
new_park_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Load,Date,Hour,Day
Datetime,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-03-01 08:00:00,1017,0.000000,2016-03-01,8,1
2016-03-01 08:00:00,1018,0.000000,2016-03-01,8,1
2016-03-01 08:00:00,1021,0.031389,2016-03-01,8,1
2016-03-01 08:00:00,1022,0.285833,2016-03-01,8,1
2016-03-01 08:00:00,1025,0.552407,2016-03-01,8,1
2016-03-01 08:00:00,1026,0.258437,2016-03-01,8,1
2016-03-01 08:00:00,1029,0.093507,2016-03-01,8,1
2016-03-01 08:00:00,1030,0.018785,2016-03-01,8,1
2016-03-01 08:00:00,1033,0.000000,2016-03-01,8,1
2016-03-01 08:00:00,1034,0.000000,2016-03-01,8,1


In [175]:
gmap = gmplot.GoogleMapPlotter(47.612676, -122.345028, 16)

north = []
south = []
other = []

for key in elkeys:
    if key in north_keys:
        curr_block = locations[key]
        lat1, lat2 = curr_block[1], curr_block[-2]
        lon1, lon2 = curr_block[0], curr_block[-3]   
        
        north.append(key)
        gmap.plot([lat1, lat2], [lon1, lon2], color='navy', edge_width=4)
    elif key in south_keys:
        curr_block = locations[key]
        lat1, lat2 = curr_block[1], curr_block[-2]
        lon1, lon2 = curr_block[0], curr_block[-3]  
        
        south.append(key)
        gmap.plot([lat1, lat2], [lon1, lon2], color='orangered', edge_width=4)
    else:
        print key

    
gmap.draw('test.html')

In [None]:
adjacency = np.zeros((len(block_keys), len(block_keys)))
N = len(block_keys)

for i in xrange(N):
    if block_keys[i] in north:
        zone1 = 'north'
    elif block_keys[i] in south:
        zone1 = 'south'
    else:
        print block_keys[i]
    for j in xrange(N):
        if i == j:
            continue
        
        if block_keys[j] in north:
            zone2 = 'north'
        elif block_keys[j] in south:
            zone2 = 'south'
        else:
            print block_keys[j]
        
        if zone1 == zone2:
            adjacency[i, j] = 1
        else:
            pass

In [None]:
fig_path = r'/Users/tfiez/Dropbox/2017Summer/tex/2018ACC/paper/figs'

In [None]:
for i in range(avg_loads.shape[1]):
    print len(np.where(avg_loads[:, i] == 0)[0])

In [None]:
fig, ax = figure_functions.contour_plot(loads=avg_loads, gps_loc=gps_loc, time=51,
                                        title='', N=N, filename='contour1.png',
                                        fig_path=fig_path, contours=10)

fig, ax = figure_functions.mixture_plot(loads=avg_loads, gps_loc=gps_loc, 
                                            times=[51], N=N, fig_path=fig_path, 
                                            shape=(1,1), filename='mixture1.png',
                                            title='')

plt.show()

In [None]:
a = np.random.randn(3, 3)
b = np.random.randn(3, 1)
c = a*b

In [None]:
c

In [None]:
fig, ax = figure_functions.temporal_day_plots(loads=avg_loads*100, P=P, fig_path=r'/Users/tfiez/Dropbox/2017Summer/tex/2018ACC/paper/figs')

In [None]:
time=50
time1 = 50
time2 = 58

In [None]:
len(new_park_data.loc[(new_park_data['Day'] == 4) & (new_park_data['Hour'] == 17)]['Date'].unique().tolist())

In [None]:
means = pickle.load(open('means.pkl', 'rb'))

In [None]:
distances, centroids = kmeans_utils.get_distances(means)
all_time_points = kmeans_utils.get_centroid_circle_paths(distances, centroids)

In [None]:
fig, ax = figure_functions.centroid_radius(centroids, all_time_points, gps_loc,
                                           times=[time2], fig_path=fig_path, shape=(1,1))
plt.show()

fig, ax = figure_functions.centroid_plots(means, gps_loc, N, times=[time2], 
                                          fig_path=fig_path, shape=(1,1))
plt.show()

In [None]:
[round(val, 2) for val in distances[58]]

In [None]:
distances, centroids = kmeans_utils.get_distances(means)

In [None]:
all_time_points = kmeans_utils.get_centroid_circle_paths(distances, centroids)

In [None]:
fig, ax = figure_functions.centroid_radius(centroids, all_time_points, gps_loc, times=range(6), 
                                               fig_path=fig_path, shape=(2,3))

# Train and Test GMM

In [None]:
park_data, gps_loc, N = utils.load_daily_data(data_path)

In [None]:
check = np.array(sorted(park_data['Load'].values.tolist(), reverse=True))

In [None]:
len(np.where(check > 1.5)[0])/float(len(check)) * 100

In [None]:
pd.DataFrame(range(10), index=range(10), columns=['check'])

In [None]:
results = gmm.locational_demand_analysis(park_data, gps_loc, N)

In [None]:
days = [result[0] for result in results]
hours = [result[1] for result in results]
errors = [result[2] for result in results]
morans_mix = [result[3] for result in results]
morans_adj = [result[4] for result in results]
means = [result[5] for result in results]

In [None]:
scores, times = kmeans_utils.get_time_scores(means)

In [None]:
best_times = {}
best_days = {}
for time in times:
    day = time/12
    hour = time%12 + 8
    if hour not in best_times:
        best_times[hour] = time
    if day not in best_days and hour not in [8, 15,16,17]:
        print day, hour
        best_days[day] = time

In [None]:
good_times = []

for time in times:
    if time % 12 + 8 in [8,9]:
        continue
        
    good_times.append(time)
    data = np.vstack((means[time]))
    kmeans = KMeans(n_clusters=3).fit(data)
    labels = kmeans.labels_.tolist()

In [None]:
good_times = best_days.values()

In [None]:
fig, ax = figure_functions.centroid_plots(means, gps_loc, N, times=good_times, fig_path=fig_path, shape=(2,3))