# Analysis of Crime, Income, Sales and Development Over Time

In [1]:
# Import libraries for analysis
import pandas as pd
import numpy as np


In [35]:
# Import libraries for visualization
%matplotlib inline
import hvplot.pandas
import plotly.express as px
import matplotlib.pyplot as plt
from panel.interact import interact

In [163]:
# # Import csv's from individual analysis of the following:

# # Historical Crime 2003-2018
crime_path = 'crime_stats.csv'
crime_df = pd.read_csv(crime_path)
# Historical Income 2004-2016
income_path = 'Data/Income/income_data.csv'
income_df = pd.read_csv(income_path)
# # Historical Sales 2003-2018
sales_path = 'Historic_Sales_Analysis/sales_stats.csv'
sales_df = pd.read_csv(sales_path)
# Historical Development 2000-2019
dev_path = 'dev_df.csv'
dev_df =  pd.read_csv(dev_path)

Unnamed: 0,Neighborhood,last_income,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr
0,1021,256.526464,0.0762,0.0724,-0.0038,0.0691,-0.0071
1,1026,259.148148,0.0395,0.0205,-0.019,0.0253,-0.0142
2,3004,43.070362,0.019,0.0193,0.0003,0.0163,-0.0027
3,3019,89.128119,0.065,0.0815,0.0165,0.0755,0.0105
4,AIRPORT JFK,47.046511,0.0078,0.0151,0.0073,0.0162,0.0084


## Ranking Metrics

In [None]:
# adding ranking metrics to crime_df

In [164]:
# adding ranking metrics to income_df
new_income_columns = ['neighborhood',
                     'last_income',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
income_df.columns = new_income_columns
income_df.set_index(['neighborhood'], inplace=True)
income_df['current_rank']=income_df['last_income'].rank(ascending=False)
income_df['avg_change_rank']=income_df['avg_pct_change'].rank(ascending=False)
income_df['growth_3yr_rank']=income_df['comparison_3yr'].rank(ascending=False)
income_df['growth_5yr_rank']=income_df['comparison_5yr'].rank(ascending=False)
income_df.head()


Unnamed: 0_level_0,last_income,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1021,256.526464,0.0762,0.0724,-0.0038,0.0691,-0.0071,25.0,3.0,163.0,199.0
1026,259.148148,0.0395,0.0205,-0.019,0.0253,-0.0142,24.0,63.0,183.0,205.0
3004,43.070362,0.019,0.0193,0.0003,0.0163,-0.0027,179.0,166.5,151.0,185.5
3019,89.128119,0.065,0.0815,0.0165,0.0755,0.0105,55.0,10.0,17.0,25.0
AIRPORT JFK,47.046511,0.0078,0.0151,0.0073,0.0162,0.0084,159.0,205.0,100.0,49.0


In [165]:
# adding ranking metrics to sales_df

new_sales_columns = ['neighborhood',
                     'avg_yearly_sales',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
sales_df.columns = new_sales_columns
sales_df.set_index(['neighborhood'], inplace=True)
sales_df['current_rank']=sales_df['avg_yearly_sales'].rank(ascending=False)
sales_df['avg_change_rank']=sales_df['avg_pct_change'].rank(ascending=False)
sales_df['growth_3yr_rank']=sales_df['comparison_3yr'].rank(ascending=False)
sales_df['growth_5yr_rank']=sales_df['comparison_5yr'].rank(ascending=False)
sales_df.head()

Unnamed: 0_level_0,avg_yearly_sales,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
MIDTOWN WEST,6054405000.0,0.3036,0.0389,-0.2647,0.1308,-0.1728,1.0,37.0,176.0,179.0
UPPER EAST SIDE (59-79),4235901000.0,0.0831,-0.0774,-0.1605,0.0112,-0.0719,2.0,128.5,150.0,145.0
MIDTOWN CBD,3705496000.0,0.2531,-0.0645,-0.3176,-0.0884,-0.3415,3.0,48.0,181.0,191.0
UPPER WEST SIDE (59-79),3279761000.0,0.1025,0.0332,-0.0693,0.0963,-0.0062,4.0,111.0,113.0,120.0
UPPER EAST SIDE (79-96),2897718000.0,0.0642,-0.0251,-0.0893,0.0421,-0.0221,5.0,157.0,124.0,130.0


In [166]:
# adding ranking metrics to development_df
new_dev_columns = ['neighborhood',
                     'cumulative_sum',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
dev_df.columns = new_dev_columns
dev_df.set_index(['neighborhood'], inplace=True)
dev_df['current_rank']=dev_df['cumulative_sum'].rank(ascending=False)
dev_df['avg_change_rank']=dev_df['avg_pct_change'].rank(ascending=False)
dev_df['growth_3yr_rank']=dev_df['comparison_3yr'].rank(ascending=False)
dev_df['growth_5yr_rank']=dev_df['comparison_5yr'].rank(ascending=False)
dev_df.head()


Unnamed: 0_level_0,cumulative_sum,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
MIDTOWN WEST,13849590000.0,0.2515,0.1094,-0.1421,0.125,-0.1265,1.0,133.0,69.0,72.0
MIDTOWN CBD,12094720000.0,139.3866,0.1198,-139.2668,0.119,-139.2676,2.0,2.0,186.0,186.0
UPPER EAST SIDE (59-79),6525430000.0,147.6482,0.0966,-147.5516,0.104,-147.5442,3.0,1.0,187.0,187.0
FASHION,5149296000.0,0.2746,0.111,-0.1636,0.12,-0.1546,4.0,106.5,89.0,100.0
FINANCIAL,4768688000.0,0.1909,0.0985,-0.0924,0.141,-0.0499,5.0,174.0,28.0,12.0


## Present Day Ranking of Neighborhoods

In [173]:
# create dataframe to contain rankings by crime, income, sales, development

current_ranking = pd.concat([income_df['current_rank'], sales_df['current_rank'], dev_df['current_rank']], 
                            axis='columns', 
                            join = 'inner')
current_rank_columns = ['income_rank', 'avg_sales_rank','cum_development_rank']
current_ranking.columns = current_rank_columns
len(current_ranking)

184

In [174]:
current_ranking.head(50)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

Unnamed: 0_level_0,income_rank,avg_sales_rank,cum_development_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AIRPORT LA GUARDIA,166.0,200.0,183.0
ALPHABET CITY,34.0,77.0,83.0
ASTORIA,76.0,27.0,30.0
BATH BEACH,89.0,110.0,142.0
BATHGATE,206.0,168.0,110.0
BAY RIDGE,72.0,35.0,52.0
BAYCHESTER,191.0,107.0,120.0
BAYSIDE,97.0,32.0,40.0
BEDFORD PARK/NORWOOD,190.0,91.0,64.0
BEDFORD STUYVESANT,127.0,21.0,22.0


## Ranking Neighborhoods Based on Growth Rate

### Average Increase Across all Years of Available Data

In [177]:
avg_increase_ranking = pd.concat([income_df['avg_change_rank'], sales_df['avg_change_rank'], dev_df['avg_change_rank']], 
                            axis='columns', 
                            join = 'inner')
avg_increase_rank_columns = ['income_rank', 'sales_rank','development_rank']
avg_increase_ranking.columns = avg_change_rank_columns
len(avg_increase_ranking)


184

In [179]:
avg_increase_ranking.head(10)

Unnamed: 0_level_0,income_rank,sales_rank,development_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AIRPORT LA GUARDIA,150.0,33.0,6.0
ALPHABET CITY,44.0,40.0,128.0
ASTORIA,69.0,132.0,148.0
BATH BEACH,85.0,146.0,53.0
BATHGATE,166.5,27.0,164.0
BAY RIDGE,60.5,148.0,78.0
BAYCHESTER,190.0,108.0,161.0
BAYSIDE,74.0,183.0,92.0
BEDFORD PARK/NORWOOD,171.5,76.0,162.0
BEDFORD STUYVESANT,48.5,133.0,140.0


### Growth in the last 5 Years in Comparison to Average Growth Rate

In [182]:
growth_5yr_ranking = pd.concat([income_df['growth_5yr_rank'], sales_df['growth_5yr_rank'], dev_df['growth_5yr_rank']], 
                            axis='columns', 
                            join = 'inner')
growth_5yr_rank_columns = ['income_rank', 'sales_rank','development_rank']
growth_5yr_ranking.columns = growth_5yr_rank_columns
len(growth_5yr_ranking)

184

In [183]:
growth_5yr_ranking.head(50)

Unnamed: 0_level_0,income_rank,sales_rank,development_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AIRPORT LA GUARDIA,34.5,146.0,182.0
ALPHABET CITY,189.0,151.0,101.5
ASTORIA,59.0,131.0,70.0
BATH BEACH,100.5,93.0,148.0
BATHGATE,168.0,194.0,52.5
BAY RIDGE,63.0,44.0,129.0
BAYCHESTER,164.0,61.0,83.0
BAYSIDE,180.0,83.0,81.0
BEDFORD PARK/NORWOOD,151.5,53.0,44.0
BEDFORD STUYVESANT,20.0,134.0,31.0


### Recent Growth in Comparison to Average Growth Rate

In [180]:
growth_3yr_ranking = pd.concat([income_df['growth_3yr_rank'], sales_df['growth_3yr_rank'], dev_df['growth_3yr_rank']], 
                            axis='columns', 
                            join = 'inner')
growth_3yr_rank_columns = ['income_rank', 'sales_rank','development_rank']
growth_3yr_ranking.columns = growth_3yr_rank_columns
len(growth_3yr_ranking)


184

In [181]:
growth_3yr_ranking.head(50)

Unnamed: 0_level_0,income_rank,sales_rank,development_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AIRPORT LA GUARDIA,39.5,11.0,182.0
ALPHABET CITY,186.0,56.0,98.0
ASTORIA,44.0,117.0,76.0
BATH BEACH,84.5,109.0,141.0
BATHGATE,27.5,179.0,70.0
BAY RIDGE,133.5,61.0,116.0
BAYCHESTER,84.5,47.0,65.0
BAYSIDE,148.0,62.0,82.0
BEDFORD PARK/NORWOOD,77.5,44.0,56.0
BEDFORD STUYVESANT,21.0,145.0,42.0
