In [1]:
import pandas as pd
import numpy as np
from geopy.distance import distance
import pydeck as pdk

In [2]:
data = pd.read_csv('../../../capstone_other/files_too_large_for_github/ML_table_with_availability_clusters.csv')

In [3]:
data = data[(data['month'] == 5) & (data['num_day'] == 4) & (data['hour'] == 10)]

In [4]:
data = data[['dock_id', 'dock_name', 'latitude', 'longitude',
           'avail_bikes', 'tot_docks', 'avail_bikes_proportion']]

In [5]:
data = data.groupby('dock_id').apply(pd.DataFrame.sample, random_state = 1, n=1).reset_index(drop=True)

In [6]:
data

Unnamed: 0,dock_id,dock_name,latitude,longitude,avail_bikes,tot_docks,avail_bikes_proportion
0,83,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323,50,62,0.806452
1,119,Park Ave & St Edwards St,40.696089,-73.978034,6,19,0.315789
2,120,Lexington Ave & Classon Ave,40.686768,-73.959282,1,19,0.052632
3,143,Clinton St & Joralemon St,40.692395,-73.993379,22,24,0.916667
4,144,Nassau St & Navy St,40.698399,-73.980689,19,19,1.000000
...,...,...,...,...,...,...,...
312,3505,Lexington Ave & E 127 St,40.805726,-73.936322,1,33,0.030303
313,3506,Lexington Ave & E 120 St,40.801307,-73.939817,5,29,0.172414
314,3507,Park Ave & E 124 St,40.804555,-73.939686,13,36,0.361111
315,3509,Lenox Ave & W 115 St,40.801194,-73.950074,8,31,0.258065


In [7]:
def manhattan_distance(start_lat, start_lon, end_lat, end_lon):
    dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \
           distance((end_lat, end_lon), (start_lat, end_lon)).miles
    return dist

In [8]:
data_low = data[data['avail_bikes_proportion'] <= 1/3]

In [9]:
data_high = data[data['avail_bikes_proportion'] >= 2/3]

In [10]:
data_low['deficit'] = round((1/3 - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_low['deficit'] = round((1/3 - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int')


In [11]:
data_high['surplus'] = round((data_high['avail_bikes_proportion'] - 2/3) * data_high['tot_docks']).astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_high['surplus'] = round((data_high['avail_bikes_proportion'] - 2/3) * data_high['tot_docks']).astype('int')


In [12]:
data_low = data_low.sort_values(by = 'deficit', ascending = False)

In [13]:
data_high = data_high.sort_values(by = 'surplus', ascending = False)

In [14]:
rebalancing_dict = {}

In [15]:
low_copy = data_low.copy()
high_copy = data_high.copy()

In [16]:
bikes_rebalanced = 0
for low in low_copy.index:
    if low_copy.loc[low, 'deficit'] == 0:
        continue
    for high in high_copy.index:
        if high_copy.loc[high, 'surplus'] == 0:
            continue
        
        if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'], 
                              high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < 3:
            stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id'])
            change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus'])
            low_copy.loc[low, 'deficit'] -= change
            high_copy.loc[high, 'surplus'] -= change
            bikes_rebalanced += change
            if stations_key in rebalancing_dict.keys():
                rebalancing_dict[stations_key] += change
            else:
                rebalancing_dict[stations_key] = change
            if low_copy.loc[low, 'deficit'] == 0:
                break

In [17]:
rebalancing_dict

{(3164, 3141): 19,
 (3164, 524): 1,
 (3161, 524): 18,
 (3161, 164): 2,
 (3295, 164): 14,
 (3295, 3457): 5,
 (3140, 3457): 9,
 (3140, 3458): 9,
 (3316, 3318): 9,
 (3316, 3338): 6,
 (3146, 3458): 4,
 (3146, 3466): 11,
 (3226, 3459): 10,
 (3226, 3235): 5,
 (3165, 491): 14,
 (3452, 3427): 12,
 (3452, 3112): 2,
 (3176, 491): 3,
 (3176, 168): 11,
 (3328, 456): 8,
 (3328, 3132): 5,
 (3320, 3498): 4,
 (3320, 3379): 3,
 (3294, 516): 9,
 (3294, 3127): 4,
 (3372, 3112): 8,
 (3372, 3235): 4,
 (3372, 3233): 1,
 (3288, 3233): 8,
 (3288, 3127): 3,
 (3288, 3122): 2,
 (3376, 3122): 5,
 (3376, 3469): 4,
 (3376, 3126): 4,
 (3139, 3462): 1,
 (3135, 3123): 1,
 (439, 168): 4,
 (439, 195): 9,
 (3096, 386): 12,
 (369, 195): 6,
 (369, 363): 6,
 (483, 462): 12,
 (2012, 462): 2,
 (2012, 3224): 9,
 (3236, 3224): 3,
 (3236, 3472): 8,
 (3354, 3477): 10,
 (238, 363): 8,
 (238, 3472): 2,
 (3175, 3258): 10,
 (312, 3472): 2,
 (312, 3467): 8,
 (336, 3467): 4,
 (336, 3258): 2,
 (336, 260): 4,
 (3077, 392): 10,
 (3346, 39

In [18]:
sorted_rebalancing = dict(sorted(rebalancing_dict.items(), key=lambda x: x[1], reverse = True))

In [19]:
filtered_rebalancing = {key: value for key, value in sorted_rebalancing.items() if value >= 5}

In [20]:
filtered_rebalancing

{(3164, 3141): 19,
 (3161, 524): 18,
 (3295, 164): 14,
 (3165, 491): 14,
 (3452, 3427): 12,
 (3096, 386): 12,
 (483, 462): 12,
 (3146, 3466): 11,
 (3176, 168): 11,
 (3226, 3459): 10,
 (3354, 3477): 10,
 (3175, 3258): 10,
 (3077, 392): 10,
 (3140, 3457): 9,
 (3140, 3458): 9,
 (3316, 3318): 9,
 (3294, 516): 9,
 (439, 195): 9,
 (2012, 3224): 9,
 (3346, 3232): 9,
 (416, 337): 9,
 (307, 319): 9,
 (3073, 315): 9,
 (3328, 456): 8,
 (3372, 3112): 8,
 (3288, 3233): 8,
 (3236, 3472): 8,
 (238, 363): 8,
 (312, 3467): 8,
 (3304, 3409): 8,
 (3102, 437): 8,
 (365, 83): 8,
 (3365, 418): 8,
 (3315, 249): 8,
 (3396, 264): 8,
 (3339, 321): 8,
 (331, 260): 7,
 (174, 414): 7,
 (3321, 390): 7,
 (3316, 3338): 6,
 (369, 195): 6,
 (369, 363): 6,
 (3308, 3399): 6,
 (3423, 3373): 6,
 (3306, 343): 6,
 (241, 3326): 6,
 (389, 3062): 6,
 (399, 3254): 6,
 (3295, 3457): 5,
 (3226, 3235): 5,
 (3328, 3132): 5,
 (3376, 3122): 5,
 (245, 243): 5,
 (396, 3407): 5}

In [21]:
rebalancing_df = pd.DataFrame(filtered_rebalancing.items(), columns = ['dock_ids', 'num_bikes'])

In [22]:
rebalancing_df[['dock_id_receive', 'dock_id_give']] = rebalancing_df['dock_ids'].tolist()
rebalancing_df.drop(['dock_ids'], axis = 1, inplace = True)

In [23]:
data_df = data[['dock_id', 'latitude', 'longitude']]

In [24]:
rebalancing_df = rebalancing_df.merge(data_df, how = 'left', left_on = 'dock_id_receive', right_on = 'dock_id').rename(
    columns = {'latitude': 'latitude_receive', 'longitude': 'longitude_receive'})
rebalancing_df = rebalancing_df.merge(data_df, how = 'left', left_on = 'dock_id_give', right_on = 'dock_id').rename(
    columns = {'latitude': 'latitude_give', 'longitude': 'longitude_give'})
rebalancing_df.drop(['dock_id_x', 'dock_id_y'], axis = 1, inplace = True)

In [25]:
GREEN_RGB = [0, 255, 0, 40]
RED_RGB = [240, 100, 0, 40]

# Specify a deck.gl ArcLayer
arc_layer = pdk.Layer(
    "ArcLayer",
    data = rebalancing_df,
    get_width="num_bikes",
    get_source_position=["longitude_give", "latitude_give"],
    get_target_position=["longitude_receive", "latitude_receive"],
    get_tilt=15,
    get_source_color=RED_RGB,
    get_target_color=GREEN_RGB,
    pickable=True,
    auto_highlight=True,
)

view_state = pdk.ViewState(latitude=40.74, longitude=-74, bearing=45, pitch=50, zoom=8,)


TOOLTIP_TEXT = {"html": "{num_bikes} rebalanced"}
r = pdk.Deck(arc_layer, initial_view_state=view_state, tooltip=TOOLTIP_TEXT, map_style = 'light')
r

In [26]:
bikes_rebalanced

598

In [27]:
data_low['deficit'].sum()

1397

In [28]:
data_high['surplus'].sum()

598

In [29]:
data_low.head(5)

Unnamed: 0,dock_id,dock_name,latitude,longitude,avail_bikes,tot_docks,avail_bikes_proportion,deficit
148,3164,Columbus Ave & W 72 St,40.777057,-73.978985,2,67,0.029851,20
145,3161,W 76 St & Columbus Ave,40.780184,-73.977285,0,59,0.0,20
173,3295,Central Park W & W 96 St,40.79127,-73.964839,1,59,0.016949,19
138,3140,1 Ave & E 78 St,40.771404,-73.953517,0,55,0.0,18
189,3316,W 104 St & Amsterdam Ave,40.798994,-73.966217,1,47,0.021277,15


In [30]:
data_high.head(5)

Unnamed: 0,dock_id,dock_name,latitude,longitude,avail_bikes,tot_docks,avail_bikes_proportion,surplus
139,3141,1 Ave & E 68 St,40.765005,-73.958185,58,59,0.983051,19
79,524,W 43 St & 6 Ave,40.755273,-73.983169,57,57,1.0,19
77,491,E 24 St & Park Ave S,40.740964,-73.986022,52,53,0.981132,17
6,164,E 47 St & 2 Ave,40.753231,-73.970325,47,47,1.0,16
8,168,W 18 St & 6 Ave,40.739713,-73.994564,46,47,0.978723,15
