In [15]:
# Import libraries
import pandas as pd
import numpy as np
import math

# # For API Call
# from geopy.geocoders import Nominatim
# geolocator = Nominatim(user_agent= 'alison.sadel@gmail.com')
# from geopy.extra.rate_limiter import RateLimiter
# import webbrowser

# For Visualizations
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

# To see full column values
pd.set_option('max_colwidth', 800)


In [2]:
# Create path
file = "Resources/output.csv"

In [5]:
# Read in file
df = pd.read_csv(file)

In [8]:
# Drop 2nd index
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,Street,Unit,Borough,Neighborhood,City,State,Year,Price,Status,Postalcode,Location,Address,point,Latitude,Longitude
0,9602 4th Avenue,6D,Brooklyn,Bay Ridge,New York,New York,2019-01-01,,RENTED,11209,"9602 4th Avenue, New York, New York, 11209","9602, 4th Avenue, Fort Hamilton, Brooklyn, Kings County, New York, 11209, United States","(40.6151866, -74.032020545154, 0.0)",40.615187,-74.032021
1,94 Degraw Street,,Brooklyn,Columbia Street Waterfront,Brooklyn,New York,2020-01-01,2350000.0,SOLD,11231,"94 Degraw Street, Brooklyn, New York, 11231","94, Degraw Street, Columbia Street Waterfront District, Brooklyn, Kings County, New York, 11231, United States","(40.685799349999996, -74.00335189192313, 0.0)",40.685799,-74.003352
2,933 Lafayette Street,1st floor,Brooklyn,Bedford-Stuyvesant,Brooklyn,New York,2020-01-01,1095000.0,SOLD,11221,"933 Lafayette Street, Brooklyn, New York, 11221",,,40.692015,-73.934678
3,918 Manhattan Ave,#2,Brooklyn,Greenpoint,Brooklyn,New York,2021-01-01,5000.0,RENTED,11222,"918 Manhattan Ave, Brooklyn, New York, 11222","Green Farms Supermarket, 918, Manhattan Avenue, Brooklyn, Kings County, New York, 11222, United States","(40.7307511, -73.9540256, 0.0)",40.730751,-73.954026
4,906 Prospect Place,2A,Brooklyn,Crown Heights,Brooklyn,New York,2021-01-01,735000.0,SOLD,11213,"906 Prospect Place, Brooklyn, New York, 11213","906, Prospect Place, Eastern Parkway, Brooklyn, Kings County, New York, 11213, United States","(40.67376755, -73.94657791384977, 0.0)",40.673768,-73.946578


In [10]:
locations = df[['Latitude', 'Longitude']]
locationlist = locations.values.tolist()
locationlist

[[40.6151866, -74.032020545154],
 [40.68579935, -74.00335189192313],
 [40.692015, -73.934678],
 [40.7307511, -73.9540256],
 [40.67376755, -73.94657791384977],
 [40.69886695, -73.93986610491517],
 [40.68560405, -73.92165635454509],
 [40.69052825, -73.93423862971204],
 [40.6786036, -73.96327695],
 [40.68824595, -73.92800579875049],
 [40.71529507692308, -74.00703146153846],
 [40.7112356, -73.9481786],
 [40.7112356, -73.9481786],
 [40.7112356, -73.9481786],
 [40.6941834, -73.99585531608435],
 [40.7266738, -73.94668815],
 [40.7266738, -73.94668815],
 [40.7247992, -73.94660784727965],
 [40.65299665, -73.91209596183775],
 [40.678081500000005, -73.98419386229119],
 [40.635121, -73.9615148],
 [40.6814318, -73.91167355],
 [40.7062521, -73.89861955],
 [40.68833835, -73.93710600950291],
 [40.72156795, -73.94240743840871],
 [40.68717169999999, -73.94380349754783],
 [40.68717169999999, -73.94380349754783],
 [40.68717169999999, -73.94380349754783],
 [40.68717169999999, -73.94380349754783],
 [40.68717

In [21]:
# Create a map

m_2 = folium.Map(location=[40.6872, -73.9418], tiles='openstreetmap', zoom_start=12)

# Display the map
m_2

# Add points to the map
for idx, row in df.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']], radius = 5, popup= row[['Neighborhood', 'Price', 'Borough']]).add_to(m_2)

# CircleMarker with radius
#folium.CircleMarker(location = [28.5011226, 77.4099794],
                    #radius = 50, popup = ' FRI ').add_to(my_map2)
# Display the map
m_2


# save method of Map object will create a map
#my_map1.save("my_map1.html")

In [16]:
# Create the map
m_3 = folium.Map(location=[40.6872, -73.9418], tiles='cartodbpositron', zoom_start=13)

# Add points to the map
mc = MarkerCluster()
for idx, row in df.iterrows():
    if not math.isnan(row['Longitude']) and not math.isnan(row['Latitude']):
        mc.add_child(Marker([row['Latitude'], row['Longitude']]))
m_3.add_child(mc)

# Display the map
m_3


In [18]:
m = folium.Map(location=[40.6872, -73.9418], tiles='cartodbpositron', zoom_start=13)

locations = list(zip(df.Latitude, df.Longitude))
icons = [folium.Icon(icon="building", prefix="fa") for _ in range(len(locations))]

cluster = MarkerCluster(locations=locations, icons=icons)
m.add_child(cluster)
m

In [19]:
# Create a base map
m_4 = folium.Map(location=[40.6872, -73.9418], tiles='cartodbpositron', zoom_start=13)

def color_producer(val):
    if val <= 1000000:
        return 'royalblue'
    else:
        return 'crimson'
    
# Add a bubble map to the base map
for i in range(0,len(df)):
    Circle(
        location=[df.iloc[i]['Latitude'], df.iloc[i]['Longitude']],
        radius=20,
        color=color_producer(df.iloc[i]['Price'])).add_to(m_4)

# Display the map
m_4

#### Part Two

In [20]:
# Create a base map
m_5 = folium.Map(location=[40.6872, -73.9418], tiles='cartodbpositron', zoom_start=12)

# Add a heatmap to the base map
HeatMap(data=df[['Latitude', 'Longitude']], radius=15).add_to(m_5)

# Display the map
m_5

In [None]:
import pandas as pd
import json
import pyproj
from shapely.geometry import shape, Point
from shapely.ops import transform
from functools import partial
import time
start_time = time.time()
from shapely.geometry import Point, Polygon
import geopandas as gpd

In [None]:
nyc

In [None]:
#Load NYC Zillow Neighborhood GeoJSON file
with open('Resources/nyc.geojson', 'r') as jsonFile:
    nyc = json.load(jsonFile)  
 

In [None]:
   
import geoplot
import geoplot.crs as gcrs

In [None]:
full.columns

In [None]:
long = full.columns.get_loc('Longitude')
lat = full.columns.get_loc('Latitude')

## use shapely to check if lat/lon is within the zillow neighborhood shape
for i in range(len(full)):
    point = full(full.loc['Longitude'],full.loc['Latitude']) ## Longitude, Latitude

    for feature in nyc['features']:
        polygon = shape(feature['geometry'])
        if polygon.contains(point):
            full.iloc[i, full.columns.get_loc('neighborhood')] = feature['properties']['name']
            
#dc_food_final.to_csv("DC_FoodEstablish_Since2005.csv", index = False) ## write the data so we don't have to re-run this every time
        
dc_food_final.head()


In [None]:
# GeoDataFrame with geographical boundaries of NYC Neighborhoods
import geopandas as gpd
districts_full = gpd.read_file('/Users/alison/Desktop/P/code/nynta_21b/nynta.shp')
#districts_full = districts_full.set_index("NTAName")
districts = districts_full[["NTAName", "geometry"]].set_index("NTAName")
p = districts.index.to_list()
p.sort()

districts

In [None]:
# GeoDataFrame with geographical boundaries of NYC Neighborhoods
import geopandas as gpd
districts_full = gpd.read_file('/Users/alison/Desktop/P/code/nynta_21b/nynta.shp')
#districts_full = districts_full.set_index("NTAName")
districts = districts_full[["NTAName", "geometry"]].set_index("NTAName")
districts

# Number of crimes in each police district
plot_dict = full.Neighborhood.value_counts()
plot_dict.head()

In [None]:
filtered = full.groupby(['Borough','Year']).sum().unstack('Year', fill_value=0)
filtered

In [None]:
# then we need to melt the df so it's in tidy format. reshape the table to keep the columns borough, category, year (as variable) and value
melted = pd.melt(violence,  id_vars=['Address', 'Price', 'Neighborhood', 'Unit', 'Borough', 'City', 'State',
       'Status'])

melted['variable'] = melted['value']
melted['value'] = melted['Price']

melted

In [None]:
neighborhood = melted.groupby('Neighborhood').sum()
df2 = melted.groupby('Borough').sum()

df2

In [None]:
filtered

In [None]:
# then let's pivot the dataframe to add on the column 'major_category' and add up the values for each borough by year
crime = melted.pivot_table(values='Price', index=['Neighborhood'], columns='variable', aggfunc=np.sum)
crime.columns = crime.columns.get_level_values(0)
crime.columns = ['2017', '2018', '2019', '2020', '2021']
crime = crime.fillna(0)

crime['Total'] =  crime['2017'] + crime['2018'] + crime['2019'] + crime['2020'] + crime['2021']
crime.to_csv("Resources/test.csv")
# Add Totals columns at Bottom
# total = crime.sum()
# total.name = 'Total'
# crime = crime.append(total)
# crime

In [None]:
crime