# Analysis of Crime, Income, Sales and Development Over Time

In [2]:
# Import libraries for analysis
import pandas as pd
import numpy as np


In [7]:
# Import cleaned csv's from individual analysis of the following:

# Historical Crime 2003-2018
crime_path = 'joint_analysis_resources/crime_stats.csv'
crime_df = pd.read_csv(crime_path)
# Historical Income 2004-2016
income_path = 'joint_analysis_resources/income_data.csv'
income_df = pd.read_csv(income_path)
# Historical Sales 2003-2018
sales_path = 'joint_analysis_resources/sales_stats.csv'
sales_df = pd.read_csv(sales_path)
# Historical Development 2000-2019
dev_path = 'joint_analysis_resources/dev_df.csv'
dev_df =  pd.read_csv(dev_path)

## Ranking Metrics

### Added Ranks to Crime Data

In [4]:
# adding ranking metrics to crime_df
# rename columns for easy calling
new_crime_columns = ['neighborhood',
                     'avg_yearly_crime',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']

# set new column names
crime_df.columns = new_crime_columns

# set index to neighborhood to have a common index for concatenating
crime_df.set_index(['neighborhood'], inplace=True)

# use rank function to calculate rankings for current, avg change, 3yr comparison and 5yrr comparison
crime_df['current_rank']=crime_df['avg_yearly_crime'].rank(ascending=False)
crime_df['avg_change_rank']=crime_df['avg_pct_change'].rank(ascending=False)
crime_df['growth_3yr_rank']=crime_df['comparison_3yr'].rank(ascending=False)
crime_df['growth_5yr_rank']=crime_df['comparison_5yr'].rank(ascending=False)

# check a few lines
crime_df.head()


Unnamed: 0_level_0,avg_yearly_crime,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ASTORIA,1691.0,-0.041,-0.011,0.03,-0.019,0.022,30.0,143.5,43.0,48.5
BATH,1017.0,-0.047,-0.063,-0.016,-0.062,-0.015,144.5,168.0,109.0,132.0
BATHBEACH,1017.0,-0.047,-0.063,-0.016,-0.062,-0.015,144.5,168.0,109.0,132.0
GRAVESEND,1017.0,-0.047,-0.063,-0.016,-0.062,-0.015,144.5,168.0,109.0,132.0
FINANCIAL,1356.0,-0.048,-0.021,0.027,-0.003,0.045,64.5,178.0,60.5,16.5


### Added Ranks to Income Data

In [8]:
# adding ranking metrics to income_df
new_income_columns = ['neighborhood',
                     'last_income',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
income_df.columns = new_income_columns
income_df['neighborhood']=income_df['neighborhood'].str.replace(' ','')
income_df.set_index(['neighborhood'], inplace=True)
income_df['current_rank']=income_df['last_income'].rank(ascending=True)
income_df['avg_change_rank']=income_df['avg_pct_change'].rank(ascending=True)
income_df['growth_3yr_rank']=income_df['comparison_3yr'].rank(ascending=True)
income_df['growth_5yr_rank']=income_df['comparison_5yr'].rank(ascending=True)
income_df.head()

Unnamed: 0_level_0,last_income,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1021,256.526464,0.0762,0.0724,-0.0038,0.0691,-0.0071,184.0,206.0,46.0,10.0
1026,259.148148,0.0395,0.0205,-0.019,0.0253,-0.0142,185.0,146.0,26.0,4.0
3004,43.070362,0.019,0.0193,0.0003,0.0163,-0.0027,30.0,42.5,58.0,23.5
3019,89.128119,0.065,0.0815,0.0165,0.0755,0.0105,154.0,199.0,192.0,184.0
AIRPORTJFK,47.046511,0.0078,0.0151,0.0073,0.0162,0.0084,50.0,4.0,109.0,160.0


### Added Ranks to Sales Data

In [9]:
# adding ranking metrics to sales_df
new_sales_columns = ['neighborhood',
                     'avg_yearly_sales',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
sales_df.columns = new_sales_columns
sales_df['neighborhood']=sales_df['neighborhood'].str.replace(' ','')
sales_df.set_index(['neighborhood'], inplace=True)
sales_df['current_rank']=sales_df['avg_yearly_sales'].rank(ascending=True)
sales_df['avg_change_rank']=sales_df['avg_pct_change'].rank(ascending=True)
sales_df['growth_3yr_rank']=sales_df['comparison_3yr'].rank(ascending=True)
sales_df['growth_5yr_rank']=sales_df['comparison_5yr'].rank(ascending=True)
sales_df.head()


Unnamed: 0_level_0,avg_yearly_sales,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
MIDTOWNWEST,6054405000.0,0.3036,0.0389,-0.2647,0.1308,-0.1728,202.0,166.0,27.0,24.0
UPPEREASTSIDE(59-79),4235901000.0,0.0831,-0.0774,-0.1605,0.0112,-0.0719,201.0,74.5,53.0,58.0
MIDTOWNCBD,3705496000.0,0.2531,-0.0645,-0.3176,-0.0884,-0.3415,200.0,155.0,22.0,12.0
UPPERWESTSIDE(59-79),3279761000.0,0.1025,0.0332,-0.0693,0.0963,-0.0062,199.0,92.0,90.0,83.0
UPPEREASTSIDE(79-96),2897718000.0,0.0642,-0.0251,-0.0893,0.0421,-0.0221,198.0,46.0,79.0,73.0


### Add Ranks to Development Data

In [10]:
# adding ranking metrics to development_df
new_dev_columns = ['neighborhood',
                     'cumulative_sum',
                     'avg_pct_change',
                     'pct_change_3yr',
                     'comparison_3yr',
                     'pct_change_5yr',
                     'comparison_5yr']
dev_df.columns = new_dev_columns
dev_df['neighborhood']=dev_df['neighborhood'].str.replace(' ','')
dev_df.set_index(['neighborhood'], inplace=True)
dev_df['current_rank']=dev_df['cumulative_sum'].rank(ascending=True)
dev_df['avg_change_rank']=dev_df['avg_pct_change'].rank(ascending=True)
dev_df['growth_3yr_rank']=dev_df['comparison_3yr'].rank(ascending=True)
dev_df['growth_5yr_rank']=dev_df['comparison_5yr'].rank(ascending=True)
dev_df.head()


Unnamed: 0_level_0,cumulative_sum,avg_pct_change,pct_change_3yr,comparison_3yr,pct_change_5yr,comparison_5yr,current_rank,avg_change_rank,growth_3yr_rank,growth_5yr_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
MIDTOWNWEST,13849590000.0,0.2515,0.1094,-0.1421,0.125,-0.1265,187.0,55.0,119.0,116.0
MIDTOWNCBD,12094720000.0,139.3866,0.1198,-139.2668,0.119,-139.2676,186.0,186.0,2.0,2.0
UPPEREASTSIDE(59-79),6525430000.0,147.6482,0.0966,-147.5516,0.104,-147.5442,185.0,187.0,1.0,1.0
FASHION,5149296000.0,0.2746,0.111,-0.1636,0.12,-0.1546,184.0,81.5,99.0,88.0
FINANCIAL,4768688000.0,0.1909,0.0985,-0.0924,0.141,-0.0499,183.0,14.0,160.0,176.0


## Present Day Snapshot of Neighborhoods

### Combine Current Rankings

In [33]:
# create dataframe to contain rankings by crime, income, sales, development

# concatenate all current ranks
current_ranking = pd.concat([crime_df['current_rank'], 
                             income_df['current_rank'], 
                             sales_df['current_rank'],
                             dev_df['current_rank']],
                            axis='columns', 
                            join = 'inner')

# define new column names
current_rank_columns = ['crime_rank','income_rank', 'avg_sales_rank','cum_development_rank']

# set new column names
current_ranking.columns = current_rank_columns
print(len(current_ranking))

# check a few lines
current_ranking.head()

183


Unnamed: 0_level_0,crime_rank,income_rank,avg_sales_rank,cum_development_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ASTORIA,30.0,133.0,176.0,158.0
BATHBEACH,144.5,120.0,93.0,46.0
GRAVESEND,144.5,77.0,150.0,126.0
FINANCIAL,64.5,199.0,196.0,183.0
SOUTHBRIDGE,64.5,168.0,139.0,131.0


### Calculate current neighborhood score                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         

In [34]:
# calculate aggregate score using equal weights
current_ranking['eq_weight_score'] = round((1/4)*(1/206)*(current_ranking['crime_rank']+
                                                          current_ranking['income_rank']+
                                                          current_ranking['avg_sales_rank']+
                                                          current_ranking['cum_development_rank']),6)

# calculate aggregate score with weights based on data completeness
current_ranking['weights_by_completeness'] = round((1/4)*(1/206)*(current_ranking['crime_rank']*(18/19)+
                                                                  current_ranking['income_rank']*(13/19)+ 
                                                                  current_ranking['avg_sales_rank']*(16/19)+
                                                                  current_ranking['cum_development_rank']*(1)),6)

# calculate aggregate score with weights based on data reliability
current_ranking['weights_by_reliability'] = round((1/4)*(1/206)*(current_ranking['crime_rank']*(.5)+
                                                                  current_ranking['income_rank']*(.8)+ 
                                                                  current_ranking['avg_sales_rank']*(1)+
                                                                  current_ranking['cum_development_rank']*(.9)),6)

In [36]:
# check a few lines
current_ranking.sort_values(['weights_by_reliability'], ascending=False)

Unnamed: 0_level_0,crime_rank,income_rank,avg_sales_rank,cum_development_rank,eq_weight_score,weights_by_completeness,weights_by_reliability
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MIDTOWNWEST,134.5,195.0,202.0,187.0,0.871966,0.749936,0.720328
UPPERWESTSIDE(59-79),134.5,198.0,199.0,177.0,0.859830,0.737225,0.708677
MIDTOWNEAST,140.0,196.0,195.0,176.0,0.858010,0.736587,0.704126
UPPERWESTSIDE(79-96),134.5,191.0,191.0,167.0,0.829490,0.711101,0.681250
MURRAYHILL,140.0,186.0,186.0,172.0,0.830097,0.714231,0.679126
KIPSBAY,140.0,174.0,189.0,174.0,0.821602,0.709760,0.673301
TRIBECA,64.5,208.0,192.0,182.0,0.784587,0.663963,0.672876
FINANCIAL,64.5,199.0,196.0,183.0,0.779733,0.661791,0.670085
GRAMERCY,140.0,193.0,181.0,163.0,0.821602,0.704011,0.670024
UPPEREASTSIDE(59-79),17.5,204.0,201.0,185.0,0.737257,0.619443,0.654672


In [None]:
# export as csv to dashboard for visualization
current_ranking.to_csv('dash_current_rank.csv')

## Ranking Neighborhoods Based on Growth Rate

### Average Increases Across all Years of Available Data

In [13]:
# create dataframe to contain rankings by crime, income, sales, development
avg_change_ranking = pd.concat([crime_df['avg_change_rank'], 
                             income_df['avg_change_rank'], 
                             sales_df['avg_change_rank'],
                             dev_df['avg_change_rank']],
                            axis='columns', 
                            join = 'inner')
avg_change_rank_columns = ['crime_rank','income_rank', 'sales_rank','dev_rank']
avg_change_ranking.columns = avg_change_rank_columns
avg_change_ranking.head()

Unnamed: 0_level_0,crime_rank,income_rank,sales_rank,dev_rank
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ASTORIA,143.5,140.0,71.0,40.0
BATHBEACH,168.0,124.0,57.0,135.0
GRAVESEND,168.0,71.5,49.5,56.0
FINANCIAL,178.0,2.0,177.0,14.0
SOUTHBRIDGE,178.0,204.0,189.0,21.0


### Calculate Aggregate Score for Average Increases

In [14]:
avg_change_ranking['eq_weight_score'] = round((1/4)*(1/206)*(avg_change_ranking['crime_rank']+
                                                          avg_change_ranking['income_rank']+
                                                          avg_change_ranking['sales_rank']+
                                                          avg_change_ranking['dev_rank']),4)
avg_change_ranking['weights_by_completeness'] = round((1/4)*(1/206)*(avg_change_ranking['crime_rank']*(18/19)+
                                                                  avg_change_ranking['income_rank']*(13/19)+ 
                                                                  avg_change_ranking['sales_rank']*(16/19)+
                                                                  avg_change_ranking['dev_rank']*(1)),4)
avg_change_ranking['weights_by_reliability'] = round((1/4)*(1/206)*(avg_change_ranking['crime_rank']*(.5)+
                                                                  avg_change_ranking['income_rank']*(.8)+ 
                                                                  avg_change_ranking['sales_rank']*(1)+
                                                                  avg_change_ranking['dev_rank']*(.9)),4)


In [15]:
# check a few lines
avg_change_ranking.head()

Unnamed: 0_level_0,crime_rank,income_rank,sales_rank,dev_rank,eq_weight_score,weights_by_completeness,weights_by_reliability
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ASTORIA,143.5,140.0,71.0,40.0,0.4788,0.4023,0.3529
BATHBEACH,168.0,124.0,57.0,135.0,0.5874,0.5182,0.439
GRAVESEND,168.0,71.5,49.5,56.0,0.4187,0.3711,0.2926
FINANCIAL,178.0,2.0,177.0,14.0,0.4502,0.4042,0.34
SOUTHBRIDGE,178.0,204.0,189.0,21.0,0.7184,0.5927,0.5584


In [74]:
# export to CSV for Dashboard Visualization
avg_change_ranking.to_csv('dash_avg_change_scores.csv')

### Growth in the last 5 Years in Comparison to Average Growth Rate

In [75]:
# create dataframe to contain rankings by crime, income, sales, development
growth_5yr_ranking = pd.concat([crime_df['growth_5yr_rank'], 
                             income_df['growth_5yr_rank'], 
                             sales_df['growth_5yr_rank'],
                             dev_df['growth_5yr_rank']],
                            axis='columns', 
                            join = 'inner')
growth_5yr_rank_columns = ['crime_rank','income_rank', 'sales_rank','dev_rank']
growth_5yr_ranking.columns = growth_5yr_rank_columns


# calculate aggregate scores by three methods
growth_5yr_ranking['eq_weight_score'] = round((1/4)*(1/206)*(growth_5yr_ranking['crime_rank']+
                                                          growth_5yr_ranking['income_rank']+
                                                          growth_5yr_ranking['sales_rank']+
                                                          growth_5yr_ranking['dev_rank']),4)
growth_5yr_ranking['weights_by_completeness'] = round((1/4)*(1/206)*(growth_5yr_ranking['crime_rank']*(18/19)+
                                                                  growth_5yr_ranking['income_rank']*(13/19)+ 
                                                                  growth_5yr_ranking['sales_rank']*(16/19)+
                                                                  growth_5yr_ranking['dev_rank']*(1)),4)
growth_5yr_ranking['weights_by_reliability'] = round((1/4)*(1/206)*(growth_5yr_ranking['crime_rank']*(.5)+
                                                                  growth_5yr_ranking['income_rank']*(.8)+ 
                                                                  growth_5yr_ranking['sales_rank']*(1)+
                                                                  growth_5yr_ranking['dev_rank']*(.9)),4)

# check a few lines
growth_5yr_ranking.head()

183


Unnamed: 0_level_0,crime_rank,income_rank,sales_rank,dev_rank,eq_weight_score,weights_by_completeness,weights_by_reliability
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ASTORIA,48.5,150.0,72.0,118.0,0.4715,0.3971,0.3913
BATHBEACH,132.0,108.5,110.0,40.0,0.4739,0.4028,0.3626
GRAVESEND,132.0,63.5,103.0,80.0,0.4593,0.4068,0.3541
FINANCIAL,16.5,206.0,8.0,176.0,0.4933,0.4118,0.412
SOUTHBRIDGE,16.5,2.0,186.0,122.0,0.3962,0.3588,0.3709


In [76]:
# export to CSV for Dashboard Visualization
growth_5yr_ranking.to_csv('dash_5yr_scores.csv')

### Recent Growth (3-year) in Comparison to Average Growth Rate

In [77]:
# create dataframe to contain rankings by crime, income, sales, development
growth_3yr_ranking = pd.concat([crime_df['growth_3yr_rank'], 
                             income_df['growth_3yr_rank'], 
                             sales_df['growth_3yr_rank'],
                             dev_df['growth_3yr_rank']],
                            axis='columns', 
                            join = 'inner')
growth_3yr_rank_columns = ['crime_rank','income_rank', 'sales_rank','dev_rank']
growth_3yr_ranking.columns = growth_3yr_rank_columns


growth_3yr_ranking['eq_weight_score'] = round((1/4)*(1/206)*(growth_3yr_ranking['crime_rank']+
                                                          growth_3yr_ranking['income_rank']+
                                                          growth_3yr_ranking['sales_rank']+
                                                          growth_3yr_ranking['dev_rank']),4)
growth_3yr_ranking['weights_by_completeness'] = round((1/4)*(1/206)*(growth_3yr_ranking['crime_rank']*(18/19)+
                                                                  growth_3yr_ranking['income_rank']*(13/19)+ 
                                                                  growth_3yr_ranking['sales_rank']*(16/19)+
                                                                  growth_3yr_ranking['dev_rank']*(1)),4)
growth_3yr_ranking['weights_by_reliability'] = round((1/4)*(1/206)*(growth_3yr_ranking['crime_rank']*(.5)+
                                                                  growth_3yr_ranking['income_rank']*(.8)+ 
                                                                  growth_3yr_ranking['sales_rank']*(1)+
                                                                  growth_3yr_ranking['dev_rank']*(.9)),4)
# check a few lines
growth_3yr_ranking.head()

183


Unnamed: 0_level_0,crime_rank,income_rank,sales_rank,dev_rank,eq_weight_score,weights_by_completeness,weights_by_reliability
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ASTORIA,43.0,165.0,86.0,112.0,0.4927,0.4103,0.413
BATHBEACH,109.0,124.5,94.0,47.0,0.4545,0.3818,0.3524
GRAVESEND,109.0,84.0,110.0,115.5,0.5079,0.4477,0.4073
FINANCIAL,60.5,137.0,11.0,160.0,0.4472,0.3887,0.3578
SOUTHBRIDGE,60.5,16.0,16.0,133.5,0.2743,0.2612,0.2175


In [78]:
growth_3yr_ranking.to_csv('dash_3yr_scores.csv')

## The Opportunity Zone

In [11]:
# import opportunity zone data
op_path = 'joint_analysis_resources/opp_zone_data.csv'
op_df = pd.read_csv(op_path)

In [12]:
op_df['Neighborhood']=op_df['Neighborhood'].str.replace(' ','')
op_df.set_index(['Neighborhood'],inplace=True)

# check length
len(op_df)

208

## Combine the Recent Growth Dataframe with Opportunity Zone for Analysis

In [82]:
oppzone_3yr_ranking = pd.concat([crime_df['growth_3yr_rank'], 
                             income_df['growth_3yr_rank'], 
                             sales_df['growth_3yr_rank'],
                             dev_df['growth_3yr_rank'], 
                            op_df],
                            axis='columns', 
                            join = 'inner')
oppzone_3yr_rank_columns = ['crime_rank','income_rank', 'sales_rank','dev_rank','opp_zone']
oppzone_3yr_ranking.columns = oppzone_3yr_rank_columns
print(len(oppzone_3yr_ranking))

oppzone_3yr_ranking['eq_weight_score'] = round((1/5)*(1/206)*(oppzone_3yr_ranking['crime_rank']+
                                                          oppzone_3yr_ranking['income_rank']+
                                                          oppzone_3yr_ranking['sales_rank']+
                                                          oppzone_3yr_ranking['dev_rank'])+ 
                                                          oppzone_3yr_ranking['opp_zone']*.2,4)
oppzone_3yr_ranking['weights_by_completeness'] = round((1/5)*(1/206)*(oppzone_3yr_ranking['crime_rank']*(18/19)+
                                                                  oppzone_3yr_ranking['income_rank']*(13/19)+ 
                                                                  oppzone_3yr_ranking['sales_rank']*(16/19)+
                                                                  oppzone_3yr_ranking['dev_rank']*(1))+
                                                                  oppzone_3yr_ranking['opp_zone']*.2,4)
oppzone_3yr_ranking['weights_by_reliability'] = round((1/5)*(1/206)*(oppzone_3yr_ranking['crime_rank']*(.5)+
                                                                  oppzone_3yr_ranking['income_rank']*(.8)+ 
                                                                  oppzone_3yr_ranking['sales_rank']*(1)+
                                                                  oppzone_3yr_ranking['dev_rank']*(.9))+
                                                                  oppzone_3yr_ranking['opp_zone']*.2,4)

oppzone_3yr_ranking.head()


183


Unnamed: 0,crime_rank,income_rank,sales_rank,dev_rank,opp_zone,eq_weight_score,weights_by_completeness,weights_by_reliability
ASTORIA,43.0,165.0,86.0,112.0,1.0,0.5942,0.5282,0.5304
BATHBEACH,109.0,124.5,94.0,47.0,0.0,0.3636,0.3054,0.2819
GRAVESEND,109.0,84.0,110.0,115.5,1.0,0.6063,0.5581,0.5259
FINANCIAL,60.5,137.0,11.0,160.0,0.0,0.3578,0.311,0.2863
SOUTHBRIDGE,60.5,16.0,16.0,133.5,0.0,0.2194,0.209,0.174


In [83]:
oppzone_3yr_ranking.to_csv('dash_oppzone_3yr_scores.csv')

## Check to see how well the current ranks correlate to the opp_zone

In [92]:
# find number of opportunity zones
oppzone_neighs = op_df[op_df['op_zone']==1]
len(oppzone_neighs)

59

# slice dataframe of current scores to show the worst 100 neighborhoods
equal_weights = current_ranking.sort_values(['eq_weight_score'], ascending=False)[-100:]
weights_by_completeness = current_ranking.sort_values(['weights_by_completeness'], ascending=False)[-100:]
weights_by_reliability = current_ranking.sort_values(['weights_by_reliability'], ascending=False)[-100:]

In [93]:
# use an inner join to identify the number of neighborhoods each method of score has in common with the opportunity zones
def common_neighs(df1,df2):
    return len(pd.concat([df1,df2], axis='columns', join='inner'))

In [107]:
# initialize dictionary to hold values
weighing_the_weights={}

# call common_neighs function for each data slice
weighing_the_weights['Equal Weights'] = common_neighs(oppzone_neighs,equal_weights)
weighing_the_weights['Weights by Data Completeness'] = common_neighs(oppzone_neighs,weights_by_completeness)
weighing_the_weights['Weights by Data Reliability'] = common_neighs(oppzone_neighs,weights_by_reliability)

# print dictionary
weighing_the_weights

{'Equal Weights': 31,
 'Weights by Data Completeness': 30,
 'Weights by Data Reliability': 27}

Equal Weight Aggregate Scoring Identified the most number of Opportunity Zones!