## Exploring the New York Geographical Coordinates Dataset
## The data set is available at : https://geo.nyu.edu/catalog/nyu_2451_34572

## --------------------------------------------------------------------------------------------------------

## The following cells downloads all the libraries required for this exploration

In [5]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analysis

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

print("Pandas lilbrary imported")

Pandas lilbrary imported


In [6]:
import json # library to handle JSON files

print("Json library imported")

Json library imported


In [8]:

!pip install geopy
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
print("Geopy library imported")

Geopy library imported


In [9]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

print("matplotlib imported")

matplotlib imported


In [11]:
! pip install folium==0.5.0
import folium # plotting library

print("Folium imported")

Collecting folium==0.5.0
  Downloading folium-0.5.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 9.1 MB/s  eta 0:00:01
[?25hCollecting branca
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Building wheels for collected packages: folium
  Building wheel for folium (setup.py) ... [?25ldone
[?25h  Created wheel for folium: filename=folium-0.5.0-py3-none-any.whl size=76240 sha256=7623298231310d1e9211399ebcc73e01b1edbefacd85b20f3190684d521d8bbc
  Stored in directory: /tmp/wsuser/.cache/pip/wheels/b2/2f/2c/109e446b990d663ea5ce9b078b5e7c1a9c45cca91f377080f8
Successfully built folium
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.5.0
Folium imported


In [12]:
import csv # implements classes to read and write tabular data in CSV form

print('csv imported.')

csv imported.


## Accessing the Newyork Data - Using wget

In [13]:
!wget -q -O 'newyork_data.json' https://ibm.box.com/shared/static/fbpwbovar7lf8p5sgddm06cgipa2rxpe.json
print('Data downloaded!')

Data downloaded!


## Load the data 

In [14]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
print("Data loaded")

Data loaded


## The key features in the dataset contains the list of neighbourhoods and its details.
## The following cell defines a new variable which will store this data

In [15]:
neighborhoods_data = newyork_data['features']
# Printing the first item for testing purpose

neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

## The following cells converts the data which is in the form of Python Dictionaries into a pandas Dataframe

In [17]:
# Specify the column names required for the dataframe
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
#Print the column template
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [18]:
# Loop through the data and fill the dataframe 
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
print("Dataframe filled")

Dataframe filled


In [19]:
#Printing the first five datas for testing purpose

neighborhoods.head(5)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [20]:
#Verifying if all the 5 boroughs and 306 neighbourhoods are listed
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


## Converting the neighbourhoods dataframe to csv format

In [21]:
neighborhoods.to_csv('BON1_NYC_GEO.csv',index=False)

## Collecting the latitude and longitude values of Newyork City using Geopy library

In [22]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="Jupyter")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


## The following cell generates a map of Newyork City with its neighbourhoods superimposed on top
## Folium library is used for this

In [23]:
map_NewYork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_NewYork)  
    
map_NewYork