# Capstone Project - Battle of Neighborhoods

## Finding the best area to set up a small boutque cafe in Manhattan, NY

### Load Libraries

In [None]:
!conda install -c conda-forge geopy --yes
#!conda install -c conda-forge folium=0.5.0 --yes

In [3]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
#import folium # map rendering library
print("Libraries imported")

Libraries imported


### Data

Load New York data json file and read to a DataFrame

In [None]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset #import json file
print('Data downloaded!')

In [None]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [None]:
newyork_data

In [None]:
neighborhoods_data = newyork_data['features']
neighborhoods_data[0]

In [None]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

#create dataframe
neighborhoods_df = pd.DataFrame(columns=column_names)
neighborhoods_df

In [None]:
for data in neighborhoods_data:
    borough = neighborhood_name=data['properties']['borough']
    neighborhood_name = data['properties']['name']
    
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods_df=neighborhoods_df.append({'Borough': borough,
                                              'Neighborhood': neighborhood_name,
                                              'Latitude': neighborhood_lat,
                                              'Longitude':neighborhood_lon},ignore_index=True)
    
neighborhoods_df.head()

To confirm the data is correct we will check there is 5 Boroughs and 306 neighborhoods

In [27]:
print('This dataframe has {} boroughs and {} neighborhoods in New York City'.format(len(neighborhoods_df['Borough'].unique()),neighborhoods_df.shape[0]))

This dataframe has 5 boroughs and 306 neighborhoods in New York City


As we are only looking at Manhattan for this project we will create a new dataframe with only Manhattan Data

In [28]:
manhattan_df=neighborhoods_df[neighborhoods_df['Borough']=='Manhattan'].reset_index(drop=True)
manhattan_df.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


Read in New York Crime data

In [56]:
import csv

In [65]:
crime_data=pd.read_csv('https://data.cityofnewyork.us/api/views/qgea-i56i/rows.csv?accessType=DOWNLOAD')
crime_data.head()                       

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,CMPLNT_NUM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,ADDR_PCT_CD,RPT_DT,KY_CD,OFNS_DESC,PD_CD,PD_DESC,CRM_ATPT_CPTD_CD,LAW_CAT_CD,BORO_NM,LOC_OF_OCCUR_DESC,PREM_TYP_DESC,JURIS_DESC,JURISDICTION_CODE,PARKS_NM,HADEVELOPT,HOUSING_PSA,X_COORD_CD,Y_COORD_CD,SUSP_AGE_GROUP,SUSP_RACE,SUSP_SEX,TRANSIT_DISTRICT,Latitude,Longitude,Lat_Lon,PATROL_BORO,STATION_NAME,VIC_AGE_GROUP,VIC_RACE,VIC_SEX
0,325341655,02/11/2015,15:00:00,,,73.0,02/11/2015,359,OFFENSES AGAINST PUBLIC ADMINI,749.0,VIOLATION OF ORDER OF PROTECTI,COMPLETED,MISDEMEANOR,BROOKLYN,INSIDE,RESIDENCE - PUBLIC HOUSING,N.Y. HOUSING POLICE,2.0,,TILDEN,405.0,1009656.0,181287.0,25-44,BLACK,M,,40.664239,-73.908425,"(40.664239422, -73.908425011)",PATROL BORO BKLYN NORTH,,<18,BLACK,M
1,393816841,03/17/2012,10:30:00,03/17/2012,11:00:00,69.0,03/17/2012,344,ASSAULT 3 & RELATED OFFENSES,114.0,OBSTR BREATH/CIRCUL,COMPLETED,MISDEMEANOR,BROOKLYN,INSIDE,RESIDENCE-HOUSE,N.Y. POLICE DEPT,0.0,,,,1014035.0,174133.0,,,,,40.64459,-73.892672,"(40.644589618, -73.892672426)",PATROL BORO BKLYN SOUTH,,45-64,BLACK,F
2,802896158,10/27/2016,13:48:00,11/03/2016,13:49:00,71.0,11/03/2016,578,HARRASSMENT 2,638.0,"HARASSMENT,SUBD 3,4,5",COMPLETED,VIOLATION,BROOKLYN,INSIDE,PUBLIC SCHOOL,N.Y. POLICE DEPT,0.0,,,,1000222.0,179282.0,<18,BLACK,M,,40.658758,-73.942435,"(40.658758183, -73.942434788)",PATROL BORO BKLYN SOUTH,,18-24,BLACK,M
3,633812343,11/27/2014,19:00:00,11/27/2014,22:30:00,112.0,11/28/2014,104,RAPE,157.0,RAPE 1,COMPLETED,FELONY,QUEENS,INSIDE,RESIDENCE - APT. HOUSE,N.Y. POLICE DEPT,0.0,,,,1025420.0,202485.0,25-44,WHITE HISPANIC,M,,40.722364,-73.851474,"(40.722363687, -73.851473894)",PATROL BORO QUEENS NORTH,,25-44,WHITE,F
4,300349533,12/11/2013,13:30:00,12/11/2013,14:15:00,24.0,12/12/2013,109,GRAND LARCENY,438.0,"LARCENY,GRAND FROM BUILDING (NON-RESIDENCE) UN...",COMPLETED,FELONY,MANHATTAN,INSIDE,DOCTOR/DENTIST OFFICE,N.Y. POLICE DEPT,0.0,,,,992848.0,228356.0,,,,,40.793465,-73.96895,"(40.793464597, -73.968949638)",PATROL BORO MAN NORTH,,45-64,WHITE,F


In [66]:
crime_data.shape

(6983207, 35)

In [72]:
crime_data.dtypes

CMPLNT_NUM             int64
CMPLNT_FR_DT          object
CMPLNT_FR_TM          object
CMPLNT_TO_DT          object
CMPLNT_TO_TM          object
ADDR_PCT_CD          float64
RPT_DT                object
KY_CD                  int64
OFNS_DESC             object
PD_CD                float64
PD_DESC               object
CRM_ATPT_CPTD_CD      object
LAW_CAT_CD            object
BORO_NM               object
LOC_OF_OCCUR_DESC     object
PREM_TYP_DESC         object
JURIS_DESC            object
JURISDICTION_CODE    float64
PARKS_NM              object
HADEVELOPT            object
HOUSING_PSA           object
X_COORD_CD           float64
Y_COORD_CD           float64
SUSP_AGE_GROUP        object
SUSP_RACE             object
SUSP_SEX              object
TRANSIT_DISTRICT     float64
Latitude             float64
Longitude            float64
Lat_Lon               object
PATROL_BORO           object
STATION_NAME          object
VIC_AGE_GROUP         object
VIC_RACE              object
VIC_SEX       

We will only include the following columns for the analysis

CMPLNT_FR_DT - Complaint first reported date  <br>
LAW_CAT_CD - Law category  <br>
BORO_NM - Borough Name  <br>
Latitide- Latitude  <br>
Longitude- Longitude

In [75]:
crime_df = crime_data[['CMPLNT_FR_DT','LAW_CAT_CD','BORO_NM','Latitude','Longitude']]
crime_df.head()

Unnamed: 0,CMPLNT_FR_DT,LAW_CAT_CD,BORO_NM,Latitude,Longitude
0,02/11/2015,MISDEMEANOR,BROOKLYN,40.664239,-73.908425
1,03/17/2012,MISDEMEANOR,BROOKLYN,40.64459,-73.892672
2,10/27/2016,VIOLATION,BROOKLYN,40.658758,-73.942435
3,11/27/2014,FELONY,QUEENS,40.722364,-73.851474
4,12/11/2013,FELONY,MANHATTAN,40.793465,-73.96895


Filter crime_df to only include crimes in Manhattan

In [101]:
crime_df=crime_df[crime_df['BORO_NM']=='MANHATTAN']
crime_df.head()
crime_df.shape

(98693, 5)

Filter crime_df to only inlcude felony and misdemeanor crimes

In [81]:
crime_df=crime_df[crime_df.LAW_CAT_CD.isin(['FELONY', 'MISDEMEANOR'])]
crime_df.shape                   

(1497457, 5)

Filter crime_df to only include crimes from 2019

In [92]:
crime_df=crime_df.dropna(subset=['CMPLNT_FR_DT'])
crime_df.shape     

(1497334, 5)

In [96]:
crime_df=crime_df[crime_df['CMPLNT_FR_DT'].str.contains("2019")]

In [98]:
crime_df.shape

(98693, 5)

In [100]:
crime_df.head(10)

Unnamed: 0,CMPLNT_FR_DT,LAW_CAT_CD,BORO_NM,Latitude,Longitude
1830027,01/02/2019,FELONY,MANHATTAN,40.773332,-73.961074
1830636,01/03/2019,FELONY,MANHATTAN,40.787567,-73.943132
1832787,01/06/2019,MISDEMEANOR,MANHATTAN,40.765024,-73.984836
1832815,01/09/2019,MISDEMEANOR,MANHATTAN,40.815732,-73.94542
1832851,01/14/2019,FELONY,MANHATTAN,40.794515,-73.966324
1832873,01/18/2019,MISDEMEANOR,MANHATTAN,40.723659,-73.991022
1832897,01/27/2019,MISDEMEANOR,MANHATTAN,40.732356,-73.984941
1832910,01/31/2019,MISDEMEANOR,MANHATTAN,40.74978,-73.987781
1832917,01/29/2019,MISDEMEANOR,MANHATTAN,40.823575,-73.937675
1832941,02/04/2019,MISDEMEANOR,MANHATTAN,40.710783,-73.996632


Load Subway Entrances JSON Data

In [None]:
content = requests.get("https://data.cityofnewyork.us/resource/he7q-3hwy.json")
json = json.loads(content.content)   

In [16]:
json

[{'objectid': '1734',
  'url': 'http://web.mta.info/nyct/service/',
  'name': 'Birchall Ave & Sagamore St at NW corner',
  'the_geom': {'type': 'Point',
   'coordinates': [-73.86835600032798, 40.84916900104506]},
  'line': '2-5'},
 {'objectid': '1735',
  'url': 'http://web.mta.info/nyct/service/',
  'name': 'Birchall Ave & Sagamore St at NE corner',
  'the_geom': {'type': 'Point',
   'coordinates': [-73.86821300022677, 40.84912800131844]},
  'line': '2-5'},
 {'objectid': '1736',
  'url': 'http://web.mta.info/nyct/service/',
  'name': 'Morris Park Ave & 180th St at NW corner',
  'the_geom': {'type': 'Point',
   'coordinates': [-73.87349900050798, 40.84122300105249]},
  'line': '2-5'},
 {'objectid': '1737',
  'url': 'http://web.mta.info/nyct/service/',
  'name': 'Morris Park Ave & 180th St at NW corner',
  'the_geom': {'type': 'Point',
   'coordinates': [-73.8728919997833, 40.84145300067447]},
  'line': '2-5'},
 {'objectid': '1738',
  'url': 'http://web.mta.info/nyct/service/',
  'name':

Create new Dataframe for JSON data

In [10]:
column_names=['Object_ID', 'Name', 'URL','Latitude', 'Longitude']
subway_df=pd.DataFrame(columns=column_names)

In [17]:
subway_df

Unnamed: 0,Object_ID,Name,URL,Latitude,Longitude
0,1734,Birchall Ave & Sagamore St at NW corner,http://web.mta.info/nyct/service/,40.849169,-73.868356
1,1735,Birchall Ave & Sagamore St at NE corner,http://web.mta.info/nyct/service/,40.849128,-73.868213
2,1736,Morris Park Ave & 180th St at NW corner,http://web.mta.info/nyct/service/,40.841223,-73.873499
3,1737,Morris Park Ave & 180th St at NW corner,http://web.mta.info/nyct/service/,40.841453,-73.872892
4,1738,Boston Rd & 178th St at SW corner,http://web.mta.info/nyct/service/,40.840815,-73.879623
5,1739,Boston Rd & E Tremont Ave at NW corner,http://web.mta.info/nyct/service/,40.840434,-73.880005
6,1740,Boston Rd & E Tremont Ave at NE corner,http://web.mta.info/nyct/service/,40.840354,-73.879833
7,1741,Boston Rd & 178th St at SE corner,http://web.mta.info/nyct/service/,40.840639,-73.879555
8,1742,Boston Rd & 178th St at NW corner,http://web.mta.info/nyct/service/,40.841078,-73.879397
9,1743,Boston Rd & 174th St at SW corner,http://web.mta.info/nyct/service/,40.837325,-73.888048


In [None]:
for data in json:
    objectid = data['objectid']
    name=data['name']
    url = data['url']
    subway_latlon = data['the_geom']['coordinates']
    subway_lat = subway_latlon[1]
    subway_lon = subway_latlon[0]
    
    subway_df = subway_df.append({'Object_ID': objectid,
                                          'Name':name,
                                          'URL': url,
                                          'Latitude': subway_lat,
                                          'Longitude': subway_lon}, ignore_index=True)

In [21]:
subway_df.shape

(98, 5)

In [18]:
subway_df.head()

Unnamed: 0,Object_ID,Name,URL,Latitude,Longitude
0,1734,Birchall Ave & Sagamore St at NW corner,http://web.mta.info/nyct/service/,40.849169,-73.868356
1,1735,Birchall Ave & Sagamore St at NE corner,http://web.mta.info/nyct/service/,40.849128,-73.868213
2,1736,Morris Park Ave & 180th St at NW corner,http://web.mta.info/nyct/service/,40.841223,-73.873499
3,1737,Morris Park Ave & 180th St at NW corner,http://web.mta.info/nyct/service/,40.841453,-73.872892
4,1738,Boston Rd & 178th St at SW corner,http://web.mta.info/nyct/service/,40.840815,-73.879623
