In [75]:
import json
import numpy as np
import pandas as pd
from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [76]:
LOCATOR = RateLimiter(Nominatim(user_agent='myGeoCoder').geocode, min_delay_seconds=1.5)

def load_geo_json(filename='Boston_Neighborhoods.geojson'):
    with open(filename, 'r') as f:
        boston_geo_json = json.loads(f.read())
    return boston_geo_json


def create_polygon(shape):
    array = np.array(shape)
    if len(array.shape) == 2:
        return Polygon(shape)
    return Polygon(shape[0])


def create_polygons(neighborhood):
    coords = neighborhood['geometry']['coordinates']
    return [create_polygon(shape) for shape in coords]


# Assigns the neighborhoods listed in Boston_Neighborhoods with shapley polygons based off their longitude/latitude
def get_neighborhood_polygons(geo_json):
    return {
        neighborhood['properties']['Name']: create_polygons(neighborhood) 
        for neighborhood in geo_json['features']
    }


def get_lat_long(location):
    loc = LOCATOR(location)
    return loc.latitude, loc.longitude

# Gathers unique set of neighborhoods from dataset and returns 
def get_neighborhoods(filename='rideshare_kaggle.csv'):
    data = pd.read_csv(filename)
    neighborhoods = set(np.concatenate([data['source'].unique(), data['destination'].unique()]))
    return pd.DataFrame([
        (nb, *get_lat_long(f'{nb}, Boston, MA')) for nb in neighborhoods
    ], columns=['Neighborhood', 'Latitude', 'Longitude'])

In [2]:
def load_geo_json(filename='Boston_Neighborhoods.geojson'):
    with open(filename, 'r') as f:
        boston_geo_json = json.loads(f.read())
    return boston_geo_json

boston_geo_json = load_geo_json('Boston_Neighborhoods.geojson')
boston_geo_json

NameError: name 'json' is not defined

In [78]:
nbs = get_neighborhoods()
nbs

Unnamed: 0,Neighborhood,Latitude,Longitude
0,North End,42.365097,-71.054495
1,South Station,42.352474,-71.054974
2,Financial District,42.355838,-71.055616
3,Northeastern University,42.33903,-71.087913
4,Theatre District,42.356258,-71.060798
5,Boston University,42.350317,-71.101227
6,North Station,42.365282,-71.06015
7,Haymarket Square,42.36295,-71.057845
8,West End,42.363919,-71.063899
9,Back Bay,42.350549,-71.080311


In [79]:
boston_geo_json_polygons = get_neighborhood_polygons(load_geo_json())
boston_geo_json_polygons

{'Roslindale': [<shapely.geometry.polygon.Polygon at 0x7f88fa34bd60>,
  <shapely.geometry.polygon.Polygon at 0x7f88fa34b640>,
  <shapely.geometry.polygon.Polygon at 0x7f8928b98160>],
 'Jamaica Plain': [<shapely.geometry.polygon.Polygon at 0x7f88dab39ca0>],
 'Mission Hill': [<shapely.geometry.polygon.Polygon at 0x7f88dab39820>],
 'Longwood': [<shapely.geometry.polygon.Polygon at 0x7f891bcd12b0>],
 'Bay Village': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1340>],
 'Leather District': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1940>],
 'Chinatown': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1280>],
 'North End': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1460>],
 'Roxbury': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1130>],
 'South End': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1a30>],
 'Back Bay': [<shapely.geometry.polygon.Polygon at 0x7f891bcd17c0>],
 'East Boston': [<shapely.geometry.polygon.Polygon at 0x7f891bcd1520>],
 'Charlestown': [<shapely.geometr

In [80]:
nbs = get_neighborhoods()

In [81]:
boston_geo_json['features'][0]

{'type': 'Feature',
 'properties': {'OBJECTID': 27,
  'Name': 'Roslindale',
  'Acres': 1605.5682375,
  'Neighborhood_ID': '15',
  'SqMiles': 2.51,
  'ShapeSTArea': 69938272.92557049,
  'ShapeSTLength': 53563.912597056624},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-71.12592717485386, 42.272013107957406],
     [-71.12610933458738, 42.2716219294518],
     [-71.12603188298199, 42.27158985153841],
     [-71.12571713956957, 42.27152070474045],
     [-71.12559042372907, 42.27146017841939],
     [-71.12523676125656, 42.271387313901805],
     [-71.12522437821433, 42.271425073651166],
     [-71.12489533053173, 42.27134458090032],
     [-71.12482468090687, 42.271318140479686],
     [-71.12485155056099, 42.27124753819149],
     [-71.12476329046935, 42.270292339717635],
     [-71.12470249712558, 42.270295367758344],
     [-71.12259088359436, 42.2700534081311],
     [-71.1223931813923, 42.27003085475475],
     [-71.12252039300371, 42.269427196690025],
     [-71.12214745279846, 42.2

In [83]:
coords = boston_geo_json['features'][0]['geometry']['coordinates']
coords

[[[[-71.12592717485386, 42.272013107957406],
   [-71.12610933458738, 42.2716219294518],
   [-71.12603188298199, 42.27158985153841],
   [-71.12571713956957, 42.27152070474045],
   [-71.12559042372907, 42.27146017841939],
   [-71.12523676125656, 42.271387313901805],
   [-71.12522437821433, 42.271425073651166],
   [-71.12489533053173, 42.27134458090032],
   [-71.12482468090687, 42.271318140479686],
   [-71.12485155056099, 42.27124753819149],
   [-71.12476329046935, 42.270292339717635],
   [-71.12470249712558, 42.270295367758344],
   [-71.12259088359436, 42.2700534081311],
   [-71.1223931813923, 42.27003085475475],
   [-71.12252039300371, 42.269427196690025],
   [-71.12214745279846, 42.26937277217365],
   [-71.12192848881409, 42.27038614438027],
   [-71.1221406576364, 42.27041692631439],
   [-71.1220178727131, 42.27096341291406],
   [-71.12201165286537, 42.271016446462525],
   [-71.12190239148987, 42.27099668884986],
   [-71.12184778412909, 42.271128915878194],
   [-71.12185167622407, 42.2

In [25]:
Polygon(coords[0][0]), Polygon(coords[1][0])

(<shapely.geometry.polygon.Polygon at 0x7f88fa34b7f0>,
 <shapely.geometry.polygon.Polygon at 0x7f88fa34b700>)

In [100]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import numpy as np
def create_polygon(shape):
    array = np.array(shape)
    if len(array.shape) == 2:
        return Polygon(shape)
    return Polygon(shape[0])
def create_polygons(neighborhood):
    coords = neighborhood['geometry']['coordinates']
    
    return [create_polygon(shape) for shape in coords]
neighborhood_polygons = {
    neighborhood['properties']['Name']: create_polygons(neighborhood) 
    for neighborhood in boston_geo_json['features']
}



In [85]:
neighborhood_polygons

{'Roslindale': [<shapely.geometry.polygon.Polygon at 0x7f891bce57f0>,
  <shapely.geometry.polygon.Polygon at 0x7f891bce5820>,
  <shapely.geometry.polygon.Polygon at 0x7f891bce5850>],
 'Jamaica Plain': [<shapely.geometry.polygon.Polygon at 0x7f891bce5550>],
 'Mission Hill': [<shapely.geometry.polygon.Polygon at 0x7f891bce5d60>],
 'Longwood': [<shapely.geometry.polygon.Polygon at 0x7f891bce5e20>],
 'Bay Village': [<shapely.geometry.polygon.Polygon at 0x7f891bce5e80>],
 'Leather District': [<shapely.geometry.polygon.Polygon at 0x7f891bce5f10>],
 'Chinatown': [<shapely.geometry.polygon.Polygon at 0x7f891bce5fa0>],
 'North End': [<shapely.geometry.polygon.Polygon at 0x7f891bce5d00>],
 'Roxbury': [<shapely.geometry.polygon.Polygon at 0x7f891bce56d0>],
 'South End': [<shapely.geometry.polygon.Polygon at 0x7f891bce5eb0>],
 'Back Bay': [<shapely.geometry.polygon.Polygon at 0x7f89201e7040>],
 'East Boston': [<shapely.geometry.polygon.Polygon at 0x7f8928b86790>],
 'Charlestown': [<shapely.geometr

In [98]:
def get_neighborhoods(filename='rideshare_kaggle.csv'):
    data = pd.read_csv(filename)
    neighborhoods = set(np.concatenate([data['source'].unique(), data['destination'].unique()]))
    return pd.DataFrame([
        (nb, *get_lat_long(f'{nb}, Boston, MA')) for nb in neighborhoods
    ], columns=['Neighborhood', 'Latitude', 'Longitude'])

neighborhoods = get_neighborhoods('rideshare_kaggle.csv')
neighborhoods

pandas.core.frame.DataFrame

In [101]:
def which_neighborhoods(row):
    point = Point(row['Longitude'], row['Latitude'],)
    for neighborhood, polygons in neighborhood_polygons.items():
        for polygon in polygons:
            if point.within(polygon):
                return neighborhood

In [102]:
neighborhoods['geo_json_neighborhood'] = neighborhoods.apply(which_neighborhoods, axis=1)
neighborhoods

Unnamed: 0,Neighborhood,Latitude,Longitude,geo_json_neighborhood
0,North End,42.365097,-71.054495,North End
1,South Station,42.352474,-71.054974,Downtown
2,Financial District,42.355838,-71.055616,Downtown
3,Northeastern University,42.33903,-71.087913,Fenway
4,Theatre District,42.356258,-71.060798,Downtown
5,Boston University,42.350317,-71.101227,Fenway
6,North Station,42.365282,-71.06015,West End
7,Haymarket Square,42.36295,-71.057845,Downtown
8,West End,42.363919,-71.063899,West End
9,Back Bay,42.350549,-71.080311,Back Bay


In [91]:
LOCATOR = RateLimiter(Nominatim(user_agent='myGeoCoder').geocode, min_delay_seconds=1.5)

In [92]:
def get_lat_long(location):
    loc = LOCATOR(location)
    return loc.latitude, loc.longitude

In [95]:
list(neighborhoods.items())

[('Neighborhood',
  0                   North End
  1               South Station
  2          Financial District
  3     Northeastern University
  4            Theatre District
  5           Boston University
  6               North Station
  7            Haymarket Square
  8                    West End
  9                    Back Bay
  10                Beacon Hill
  11                     Fenway
  Name: Neighborhood, dtype: object),
 ('Latitude',
  0     42.365097
  1     42.352474
  2     42.355838
  3     42.339030
  4     42.356258
  5     42.350317
  6     42.365282
  7     42.362950
  8     42.363919
  9     42.350549
  10    42.358708
  11    42.345187
  Name: Latitude, dtype: float64),
 ('Longitude',
  0    -71.054495
  1    -71.054974
  2    -71.055616
  3    -71.087913
  4    -71.060798
  5    -71.101227
  6    -71.060150
  7    -71.057845
  8    -71.063899
  9    -71.080311
  10   -71.067829
  11   -71.104599
  Name: Longitude, dtype: float64)]

In [96]:
neighborhoods_df = pd.DataFrame(
    [(neighborhood, loc.latitude, loc.longitude) 
    for neighborhood, loc in neighborhoods.items()], 
    columns=['Neighborhood', 'Latitude', 'Longitude']
)
neighborhoods_df

AttributeError: 'Series' object has no attribute 'latitude'

In [108]:
neighborhoods_df['geo_json_neighborhood'] = neighborhoods_df.apply(which_neighborhoods, axis=1)
neighborhoods_df

Unnamed: 0,neighborhood,latitude,longitude,geo_json_neighborhood
0,North End,42.365097,-71.054495,North End
1,Financial District,42.355838,-71.055616,Downtown
2,Theatre District,42.356258,-71.060798,Downtown
3,Beacon Hill,42.358708,-71.067829,Beacon Hill
4,Northeastern University,42.33903,-71.087913,Fenway
5,West End,42.363919,-71.063899,West End
6,North Station,42.365282,-71.06015,West End
7,Back Bay,42.350549,-71.080311,Back Bay
8,Boston University,42.350317,-71.101227,Fenway
9,Haymarket Square,42.36295,-71.057845,Downtown
