In [25]:
import numpy as np
import pandas as pd

import shapely
import geopandas as gpd


import matplotlib.pyplot as plt
import seaborn as sns
import folium

In [26]:
bike_stations = pd.read_csv('ny_bike_stations.csv')
bike_stations.head()

Unnamed: 0,station_id,station_name,station_latitude,station_longitude
0,72,W 52 St & 11 Ave,40.767272,-73.993929
1,79,Franklin St & W Broadway,40.719116,-74.006667
2,82,St James Pl & Pearl St,40.711174,-74.000165
3,83,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323
4,116,W 17 St & 8 Ave,40.741776,-74.001497


In [27]:
NYC_COORDINATES = [40.7128, -74.0060]
m = folium.Map(location=NYC_COORDINATES, tiles='Stamen Terrain', zoom_start=12)

for _, station in bike_stations.iterrows():
    lat, lon = float(station['station_latitude']), float(station['station_longitude'])
    new_marker = folium.CircleMarker(location=[lat, lon], popup=f'{lat},{lon}',
                                     radius=2, color='blue', fill_color='blue', fill_opacity=0.7)
    new_marker.add_to(m)

In [28]:
m

In [29]:
nta = pd.read_csv('nta_region.csv')
# nta = gpd.read_file('nynta.geojson')
nta.set_index("nta_code",inplace=True)
nta.geometry = nta.geometry.apply(lambda g: shapely.wkt.loads(g))
nta.head()

Unnamed: 0_level_0,borough,population,geometry,under_5_years,5-9_years,10-14_years,15-19_years,20-24_years,25-29_years,30-34_years,...,15000_to_24999,25000_to_34999,35000_to_49999,50000_to_74999,75000_to_99999,100000_to_149999,150000_to_199999,200000_or_more,median_income,mean_income
nta_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BK27,Brooklyn,33163,"POLYGON ((-73.9760507905698 40.6312841471042, ...",2210,1826,1922,1657,1726,2850,2771,...,1223,927,1290,2074,1352,1758,706,585,58243.0,75950.0
BK31,Brooklyn,79134,"POLYGON ((-73.79493384594591 40.7578063026709,...",5166,4513,3379,3551,4347,6827,6876,...,3235,3047,3542,5570,4108,4827,2497,2775,63539.0,86319.0
BK75,Brooklyn,75318,"POLYGON ((-73.7757397458527 40.7433339175923, ...",7480,6580,5378,4635,6479,8683,6984,...,3385,2509,3124,3706,2269,2847,1195,1034,39970.0,62616.0
BK29,Brooklyn,64267,"POLYGON ((-73.8037916164017 40.7756183875692, ...",4151,3563,2891,3470,4182,4939,4634,...,2718,2185,2655,3898,2558,2416,1144,844,48097.0,65715.0
BK28,Brooklyn,91646,"POLYGON ((-73.86109864852619 40.7636727485249,...",6498,5162,4529,4648,5793,8036,7766,...,3383,3246,4036,5117,3428,3743,1649,1231,51035.0,70198.0


In [30]:
def corresponding_nta(lat, lon):
    point = shapely.geometry.Point((lon, lat))
    for index, _ in nta.iterrows():
        if point.within(nta.loc[index].geometry):
            return str(index)

In [31]:
bike_stations['nta'] = bike_stations.apply(lambda s: corresponding_nta(s.station_latitude, s.station_longitude), axis=1)

In [32]:
bike_stations.dropna(subset=['nta'], inplace=True) # Drop stations outside of NY

In [33]:
bike_stations.head()

Unnamed: 0,station_id,station_name,station_latitude,station_longitude,nta
0,72,W 52 St & 11 Ave,40.767272,-73.993929,BX37
1,79,Franklin St & W Broadway,40.719116,-74.006667,BX29
2,82,St James Pl & Pearl St,40.711174,-74.000165,QN23
3,83,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323,QN31
4,116,W 17 St & 8 Ave,40.741776,-74.001497,BX28


In [34]:
bike_stations.nta.unique()

array(['BX37', 'BX29', 'QN23', 'QN31', 'BX28', 'MN13', 'BX36', 'BK61',
       'BX41', 'BX22', 'MN14', 'BK63', 'SI99', 'BK45', 'BX30', 'QN49',
       'MN17', 'BX03', 'SI45', 'BX33', 'QN25', 'SI35', 'MN01', 'MN28',
       'QN98', 'MN04', 'MN21', 'BK43', 'MN12', 'QN37', 'SI36', 'BX08',
       'SI25', 'QN20', 'QN06', 'SI12', 'BX27', 'QN57', 'QN63', 'BK81',
       'BK35', 'MN09', 'MN22', 'QN70', 'MN06', 'QN48'], dtype=object)

In [35]:
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'lightblue', 'lightgreen', 'gray']
station_codes = list(bike_stations.nta.unique())
station_prefixes = set(map(lambda x: x[:2],station_codes))
prefix_color = dict(zip(station_prefixes,colors))
nta_color = {c:prefix_color[c[:2]] for c in station_codes}

In [36]:
m = folium.Map(location=NYC_COORDINATES, tiles='Stamen Terrain', zoom_start=12, opacity=0.5)

for _, station in bike_stations.iterrows():
    lat, lon = float(station['station_latitude']), float(station['station_longitude'])
    new_marker = folium.CircleMarker(location=[lat, lon], popup=f'{lat},{lon}',
                                     radius=5, color=nta_color[station.nta], fill_color=nta_color[station.nta], fill_opacity=0.7)
    new_marker.add_to(m)

m

In [37]:
import gmaps
gmaps.configure(api_key='AIzaSyDCnFjCeWSitWkHDC1VcJp0Qfzscaor5B4')

In [38]:
fig = gmaps.figure(center=(40.7128, -74.0060), zoom_level=11)
fig.add_layer(gmaps.bicycling_layer())
fig

Figure(layout=FigureLayout(height='420px'))

In [39]:
bike_stations.to_csv("nyc_bike_stations_ntas.csv")

In [40]:
prefix_color

{'MN': 'red', 'SI': 'blue', 'QN': 'green', 'BX': 'purple', 'BK': 'orange'}

In [41]:
nta_from_num = pd.read_csv("nta-to-num.csv")
nta_from_num.set_index("nta_number",inplace=True)
nta_from_num.head()


Unnamed: 0_level_0,nta
nta_number,Unnamed: 1_level_1
0,MN15
1,MN24
2,MN27
3,BK68
4,MN13


In [42]:
nta_interactions = pd.read_csv("nta_interactions.csv", header=None)
nta_interactions /= nta_interactions.max()
nta_interactions.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,53
0,0.757612,0.071396,0.044469,0.005965,0.202709,0.00199,0.092011,0.343176,0.011412,0.008668,...,0.004088,0.001055,0.064355,0.15272,0.009323,0.004119,0.001295,0.001242,0.0,0.0
1,0.14438,1.0,1.0,0.065435,0.186976,0.054444,0.459883,0.093722,0.242538,0.299461,...,0.000659,0.000703,0.012833,0.043933,0.002578,0.06425,0.073371,0.076159,0.0,0.0
2,0.022224,0.303398,0.978527,0.059712,0.043161,0.057864,0.130232,0.033653,0.080919,0.158115,...,0.0,0.0,0.002298,0.004184,0.001146,0.065898,0.034096,0.098096,0.0,0.0
3,0.002716,0.018928,0.051643,1.0,0.003935,0.349236,0.0079,0.004033,0.194679,0.456972,...,0.000396,0.0,0.000383,0.0,0.000755,0.282537,0.236081,0.125828,0.142857,0.041667
4,0.844922,0.438531,0.374873,0.035476,1.0,0.02288,0.700005,0.658416,0.050592,0.060481,...,0.004813,0.006329,0.085041,0.341004,0.012656,0.008237,0.012085,0.03394,0.0,0.0


In [43]:
nta_region = pd.read_csv("nta_region.csv")
nta_region.geometry = nta_region.geometry.apply(shapely.wkt.loads)
nta_region.set_index("nta_code",inplace=True)
nta_region = gpd.GeoDataFrame(nta_region, geometry=nta_region.geometry)
nta_region["centroid"] = nta_region.geometry.apply(lambda g: g.centroid)

In [44]:
nta_region.head()

Unnamed: 0_level_0,borough,population,geometry,under_5_years,5-9_years,10-14_years,15-19_years,20-24_years,25-29_years,30-34_years,...,25000_to_34999,35000_to_49999,50000_to_74999,75000_to_99999,100000_to_149999,150000_to_199999,200000_or_more,median_income,mean_income,centroid
nta_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BK27,Brooklyn,33163,"POLYGON ((-73.97605 40.63128, -73.97717 40.630...",2210,1826,1922,1657,1726,2850,2771,...,927,1290,2074,1352,1758,706,585,58243.0,75950.0,POINT (-73.98866 40.63096)
BK31,Brooklyn,79134,"POLYGON ((-73.79493 40.75781, -73.79475 40.755...",5166,4513,3379,3551,4347,6827,6876,...,3047,3542,5570,4108,4827,2497,2775,63539.0,86319.0,POINT (-73.80730 40.75412)
BK75,Brooklyn,75318,"POLYGON ((-73.77574 40.74333, -73.77580 40.743...",7480,6580,5378,4635,6479,8683,6984,...,2509,3124,3706,2269,2847,1195,1034,39970.0,62616.0,POINT (-73.78795 40.75104)
BK29,Brooklyn,64267,"POLYGON ((-73.80379 40.77562, -73.80099 40.775...",4151,3563,2891,3470,4182,4939,4634,...,2185,2655,3898,2558,2416,1144,844,48097.0,65715.0,POINT (-73.80955 40.76836)
BK28,Brooklyn,91646,"POLYGON ((-73.86110 40.76367, -73.85993 40.762...",6498,5162,4529,4648,5793,8036,7766,...,3246,4036,5117,3428,3743,1649,1231,51035.0,70198.0,POINT (-73.86840 40.76336)


In [45]:
nta_region.to_csv("nta_region.csv")

In [46]:
nta_m = folium.Map(location=NYC_COORDINATES, tiles='Stamen Terrain', zoom_start=12, opacity=0.5)

for index, _ in nta_region.iterrows():
    if index in nta_color.keys():
        clr = nta_color[index]
        pt = nta_region.loc[index].centroid
        lat, lon = float(pt.y), float(pt.x)
        new_marker = folium.CircleMarker(
            location=[lat, lon], popup=f'{lat},{lon}',
            radius=5, color=clr, fill_color=clr, fill_opacity=0.7)
        new_marker.add_to(nta_m)

        folium.GeoJson(nta_region.loc[index].geometry,
            style_function= lambda x:{
                "color":"black",
                "fillColor":"transparent"
            }
        ).add_to(nta_m)

#for _,station in bike_stations.iterrows():
#    print(station.nta)

nta_m

In [72]:
num_stations = len(nta_interactions.columns)
for i,j in zip(range(num_stations),range(num_stations)):
    ntai = nta_from_num.iloc[i]["nta"]
    ntaj = nta_from_num.iloc[j]["nta"]
    #print(ntai)
    if ntai!="-1" and ntaj!="-1":
        ipt = nta_region.loc[ntai].centroid
        ilat, ilon = float(ipt.x), float(ipt.y)
        jpt = nta_region.loc[ntaj].centroid
        jlat, jlon = float(jpt.x), float(jpt.y)
        folium.PolyLine(locations=[[ilat,ilon],[jlat,jlon]], color="grey", weight=10).add_to(nta_m)

nta_m

In [71]:
nta_from_num.head()

Unnamed: 0_level_0,nta
nta_number,Unnamed: 1_level_1
0,MN15
1,MN24
2,MN27
3,BK68
4,MN13


In [79]:
test = folium.Map(location=NYC_COORDINATES, tiles='Stamen Terrain', zoom_start=12, opacity=0.5)
for i,j in zip(range(num_stations),range(num_stations)):
    ntai = nta_from_num.iloc[i]["nta"]
    ntaj = nta_from_num.iloc[j]["nta"]
    #print(ntai)
    if ntai in nta_color.keys() and ntaj in nta_color.keys():
        ipt = nta_region.loc[ntai].centroid
        ilat, ilon = float(ipt.y), float(ipt.x)
        jpt = nta_region.loc[ntaj].centroid
        jlat, jlon = float(jpt.y), float(jpt.x)
        folium.PolyLine(locations=[[ilat,ilon],[jlat,jlon]], color="grey", weight=10,opacity=1).add_to(test)

test

In [82]:
set(nta_from_num.nta).difference(set(nta_color.keys()))

{'-1',
 'BK09',
 'BK32',
 'BK33',
 'BK37',
 'BK38',
 'BK60',
 'BK64',
 'BK68',
 'BK69',
 'BK72',
 'BK73',
 'BK75',
 'BK76',
 'BK77',
 'BK78',
 'BK79',
 'BK90',
 'BK99',
 'MN03',
 'MN11',
 'MN15',
 'MN19',
 'MN20',
 'MN23',
 'MN24',
 'MN25',
 'MN27',
 'MN31',
 'MN32',
 'MN33',
 'MN34',
 'MN40',
 'MN50',
 'MN99',
 'QN68',
 'QN71',
 'QN72',
 'QN99'}