In [1]:
import pandas as pd
import numpy as np
from geopy.distance import distance

In [2]:
data = pd.read_csv('../../../capstone_other/files_too_large_for_github/ML_table_with_availability_clusters.csv')

In [3]:
data = data[['dock_id', 'dock_name', 'latitude', 'longitude',
           'avail_bikes', 'tot_docks', 'avail_bikes_proportion']]

In [4]:
data = data.groupby('dock_id').apply(pd.DataFrame.sample, random_state = 0, n=1).reset_index(drop=True) 

In [5]:
def manhattan_distance(start_lat, start_lon, end_lat, end_lon):
    dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \
           distance((end_lat, end_lon), (start_lat, end_lon)).miles
    return dist

In [6]:
data_low = data[data['avail_bikes_proportion'] <= 1/3]

In [7]:
data_high = data[data['avail_bikes_proportion'] >= 2/3]

In [8]:
data_low['deficit'] = round((1/3 - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_low['deficit'] = round((1/3 - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int')


In [9]:
data_high['surplus'] = round((data_high['avail_bikes_proportion'] - 2/3) * data_high['tot_docks']).astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_high['surplus'] = round((data_high['avail_bikes_proportion'] - 2/3) * data_high['tot_docks']).astype('int')


In [10]:
data_low = data_low.sort_values(by = 'deficit', ascending = False)

In [11]:
data_high = data_high.sort_values(by = 'surplus', ascending = False)

In [12]:
rebalancing_dict = {}

In [13]:
low_copy = data_low.copy()
high_copy = data_high.copy()

In [14]:
bikes_rebalanced = 0
for low in low_copy.index:
    if low_copy.loc[low, 'deficit'] == 0:
        continue
    for high in high_copy.index:
        if high_copy.loc[high, 'surplus'] == 0:
            continue
        
        if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'], 
                              high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < 3:
            stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id'])
            change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus'])
            low_copy.loc[low, 'deficit'] -= change
            high_copy.loc[high, 'surplus'] -= change
            bikes_rebalanced += change
            if stations_key in rebalancing_dict.keys():
                rebalancing_dict[stations_key] += change
            else:
                rebalancing_dict[stations_key] = change
            if low_copy.loc[low, 'deficit'] == 0:
                break

In [15]:
rebalancing_dict

{(3140, 3116): 15,
 (3140, 524): 3,
 (469, 524): 11,
 (469, 3165): 5,
 (3134, 3296): 13,
 (3134, 3165): 3,
 (3136, 3165): 4,
 (3136, 3305): 12,
 (386, 392): 10,
 (386, 315): 4,
 (491, 456): 11,
 (491, 3132): 3,
 (355, 315): 6,
 (355, 369): 8,
 (3457, 3132): 7,
 (3457, 3459): 7,
 (3452, 3101): 8,
 (3452, 3056): 6,
 (167, 369): 2,
 (167, 336): 10,
 (167, 3459): 1,
 (168, 3459): 1,
 (168, 3224): 7,
 (168, 3121): 5,
 (3139, 3374): 10,
 (3139, 3318): 3,
 (3284, 3318): 5,
 (3284, 3148): 7,
 (3233, 3148): 1,
 (3233, 3121): 2,
 (3233, 3164): 6,
 (3233, 439): 3,
 (3242, 3418): 10,
 (3242, 3419): 2,
 (3153, 3502): 8,
 (3153, 3351): 4,
 (3146, 3351): 3,
 (3146, 3168): 6,
 (3146, 516): 3,
 (3447, 3112): 5,
 (3447, 516): 1,
 (3447, 3290): 4,
 (3447, 3307): 2,
 (2017, 439): 3,
 (2017, 400): 5,
 (2017, 3176): 4,
 (3372, 3307): 2,
 (3372, 3494): 2,
 (3372, 3123): 2,
 (3372, 3458): 1,
 (3258, 249): 5,
 (3258, 334): 1,
 (337, 3410): 9,
 (337, 3407): 2,
 (3463, 408): 4,
 (3463, 144): 3,
 (3463, 316): 2,


In [16]:
bikes_rebalanced

511

In [17]:
data_low['deficit'].sum()

1045

In [18]:
data_high['surplus'].sum()

520

In [19]:
data_low.head(5)

Unnamed: 0,dock_id,dock_name,latitude,longitude,avail_bikes,tot_docks,avail_bikes_proportion,deficit
143,3140,1 Ave & E 78 St,40.771404,-73.953517,0,55,0.0,18
79,469,Broadway & W 53 St,40.763441,-73.982681,3,57,0.052632,16
138,3134,3 Ave & E 62 St,40.763126,-73.965269,1,51,0.019608,16
140,3136,5 Ave & E 63 St,40.766386,-73.971512,0,47,0.0,16
57,386,Centre St & Worth St,40.714948,-74.002345,0,43,0.0,14


In [20]:
data_high.head(5)

Unnamed: 0,dock_id,dock_name,latitude,longitude,avail_bikes,tot_docks,avail_bikes_proportion,surplus
125,3116,Huron St & Franklin St,40.73266,-73.95826,45,45,1.0,15
83,524,W 43 St & 6 Ave,40.755273,-73.983169,52,57,0.912281,14
183,3296,E 93 St & 2 Ave,40.782454,-73.94892,41,42,0.97619,13
155,3165,Central Park West & W 72 St,40.775793,-73.976203,46,51,0.901961,12
188,3305,E 91 St & 2 Ave,40.781122,-73.949656,38,39,0.974359,12
