# The *Perfect* Fit
  With this Notebook I will be extracting data with the Foursquare API and scrapping RentHop.com for average rent for one bedroom apartments. Once all data is obtained, it will be cleaned and processed by city, by neighborhood. This will utilize the k-means clustering algorithm to further choose between which city is my personal best fit and will contribute to neighborhood decisions as well. 

In [53]:
import numpy as np                          # library to handle data in a vectorized manner
import pandas as pd                         # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json                                 # library to handle JSON files
from geopy.geocoders import Nominatim       # convert an address into latitude and longitude values
import requests                             # library to handle requests
from pandas.io.json import json_normalize   # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium                               # map rendering library

print('Libraries imported.')

Libraries imported.


### Downloading Datasets for each city
  New York City - This data is the same as the week 3's "Segmenting and Clustering Neighborhoods" data  
    
  Boston - https://data.boston.gov/dataset/boston-neighborhoods/resource/13ee2b65-6547-4168-b112-83995f138602 geojson data of Boston's neighborhoods  
    
  Chicago - https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Neighborhoods/bbvz-uum9 geojson data of Chicago's neighborhoods

In [54]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
    print("New York JSON data loaded")

with open('Boston_Neighborhoods.json') as json_data:
    boston_data = json.load(json_data)
    print("Boston JSON data loaded")

with open('Chicago_Neighborhoods.json') as json_data:
    chicago_data = json.load(json_data)
    print("Chicago JSON data loaded")

ny_neighborhoods_data = newyork_data['features']
bos_neighborhoods_data = boston_data['features']
chi_neighborhoods_data = chicago_data['features']

New York JSON data loaded
Boston JSON data loaded
Chicago JSON data loaded


### Transform into pandas df

In [89]:
ny_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
bos_names = ['Neighborhood', 'Latitude', 'Longitude']
chi_names = ['Primary Neighborhood', 'Secondary Neighborhood', 'Latitude', 'Longitude']

ny_hoods = pd.DataFrame(columns=ny_names)
bos_hoods = pd.DataFrame(columns=bos_names)
chi_hoods = pd.DataFrame(columns=chi_names)

### If the JSON files were all configured the same...
  ~~hoods = ny_hoods, bos_hoods, chi_hoods  
  for hood in hoods:~~

In [56]:
# Loop for New York City Data
for data in ny_neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    ny_hoods = ny_hoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [57]:
# Loop for Boston Data
for data in bos_neighborhoods_data:
    neighborhood_name = data["properties"]["Name"]
    address = neighborhood_name + ", MA"

    geolocator = Nominatim(user_agent="bos_explorer")
    location = geolocator.geocode(address)
    try:
        latitude = location.latitude
        longitude = location.longitude
    except AttributeError:
        pass
    #print('The geograpical coordinate of {} are {}, {}.'.format(neighborhood_name, latitude, longitude))
    bos_hoods = bos_hoods.append({'Neighborhood': neighborhood_name,
                                  'Latitude': latitude,
                                  'Longitude': longitude}, ignore_index=True)

In [83]:
ny_hoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [85]:
# Manually fixing specific neighborhoods
### Leather District
bos_hoods.loc[[5], ["Latitude"]] = 42.3505
bos_hoods.loc[[5], ["Longitude"]] = -71.0579

### Harbor Islands
bos_hoods.loc[[25], ["Latitude"]] = 42.3167
bos_hoods.loc[[25], ["Longitude"]] = -70.9681

bos_hoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Roslindale,42.291209,-71.124497
1,Jamaica Plain,42.30982,-71.12033
2,Mission Hill,42.33256,-71.103608
3,Longwood,42.341718,-71.109922
4,Bay Village,42.350011,-71.066948


In [90]:
for data in chi_neighborhoods_data:
    primary_name = data["properties"]["pri_neigh"]
    secondary_name = data["properties"]["sec_neigh"].title()
    address = primary_name + ", Chicago"

    geolocator = Nominatim(user_agent="chi_explorer")
    location = geolocator.geocode(address)
    try:
        latitude = location.latitude
        longitude = location.longitude
    except AttributeError:
        pass
    #print('The geograpical coordinate of {} are {}, {}.'.format(primary_name, latitude, longitude))
    chi_hoods = chi_hoods.append({'Primary Neighborhood': primary_name,
                                  'Secondary Neighborhood': secondary_name,
                                  'Latitude': latitude,
                                  'Longitude': longitude}, ignore_index=True)

In [96]:
# Manually fixing specific neighborhoods
### River North
chi_hoods.loc[[32], ["Latitude"]] = 41.8924
chi_hoods.loc[[32], ["Longitude"]] = -87.6341

### Musuem Campus 
chi_hoods.loc[[97], ["Latitude"]] = 41.8636
chi_hoods.loc[[97], ["Longitude"]] = -87.6163

chi_hoods.head()

Unnamed: 0,Primary Neighborhood,Secondary Neighborhood,Latitude,Longitude
0,Grand Boulevard,Bronzeville,41.813923,-87.617272
1,Printers Row,Printers Row,41.873787,-87.6289
2,United Center,United Center,41.880683,-87.674185
3,Sheffield & DePaul,Sheffield & Depaul,41.880683,-87.674185
4,Humboldt Park,Humboldt Park,41.905767,-87.704174
