# Calculate initial opportunity and competition level score

Goal:
* Make and easy score for the product demo

### Import and initialize

In [125]:
import arcgis
from arcgis.gis import GIS
from arcgis.features import FeatureLayer, FeatureLayerCollection

import pandas as pd

import psycopg2

import sys
sys.path.append('../../')
from utils import get_config

sys.path.append('../')
from gis_resources import san_diego_county_zips,  execute_sql, create_where_clause, read_exact_food_biz_categories, read_exact_unhealthy_food_biz_categories

In [126]:
username = get_config("arcgis","username")
password = get_config("arcgis","passkey")
gis = GIS("https://ucsdonline.maps.arcgis.com/home", username=username, password=password)

<configparser.ConfigParser object at 0x000001B43B8E4100>
<configparser.ConfigParser object at 0x000001B43B8E41F0>


Definition a function which takes in below variables and give an opportunity score.

### Query feature layers into dataframes

#### Consumer Spending Layer

In [127]:
consumer_spending_layer_a = FeatureLayer(url="https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/a2afc4/FeatureServer/0")
consumer_spending_layer_a

<FeatureLayer url:"https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/a2afc4/FeatureServer/0">

In [128]:
# Picking up sample variables for just poc on rendering maps as per our need.
csa_out_fields = ["fips", "X1002_A", "X1130_A", "X1130FY_A"]

In [129]:
consumer_spending_layer_a_df = consumer_spending_layer_a.query(out_fields=csa_out_fields,
                              as_df=True,
                              return_geometry=False)

del consumer_spending_layer_a_df['FID']

consumer_spending_layer_a_df

Unnamed: 0,fips,x1002_a,x1130_a,x1130fy_a
0,060730100101,6877.52,2942.05,3623.67
1,060730100102,8364.19,3457.18,4231.38
2,060730100103,7101.43,2935.24,3277.84
3,060730100111,10566.02,4519.91,5287.83
4,060730150012,11587.27,4781.76,6792.55
...,...,...,...,...
2052,060730221012,20195.98,8544.28,9033.19
2053,060730221021,22616.71,9568.42,11358.21
2054,060730221022,22722.96,9613.37,11373.74
2055,060730221023,14510.63,6183.54,7345.41


#### Business Data Layer

Note: S01_BUS and N01_BUS gave the same number, so S01_BUS was used.

In [130]:
business_variables_layer = FeatureLayer(url="https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/a633a0/FeatureServer/0")
business_variables_layer

<FeatureLayer url:"https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/a633a0/FeatureServer/0">

In [131]:
business_out_fields = ["fips", "S01_BUS", "S12_BUS", "S16_BUS", "N13_BUS", "N37_BUS"]

In [132]:
business_variables_layer_df = business_variables_layer.query(out_fields=business_out_fields,
                              as_df=True,
                              return_geometry=False)

del business_variables_layer_df['FID']

business_variables_layer_df

Unnamed: 0,fips,n13_bus,n37_bus,s01_bus,s12_bus,s16_bus
0,060730083111,0.0,0.0,47.0,0.0,0.0
1,060730083112,0.0,0.0,25.0,0.0,0.0
2,060730083121,0.0,12.0,101.0,0.0,12.0
3,060730083122,0.0,6.0,35.0,0.0,6.0
4,060730083123,0.0,0.0,17.0,0.0,0.0
...,...,...,...,...,...,...
2052,060730221012,8.0,41.0,1594.0,12.0,40.0
2053,060730221021,0.0,0.0,36.0,1.0,0.0
2054,060730221022,4.0,17.0,180.0,5.0,16.0
2055,060730221023,0.0,0.0,24.0,0.0,0.0


#### Demographics Layer
Note on Diversity Index (DIVINDX_CY): https://storymaps.arcgis.com/stories/94db3c9e75b54e22a0e99978ad77df54

In [133]:
demographics_layer_1 = FeatureLayer(url="https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/aff1b6/FeatureServer/0")
demographics_layer_1

<FeatureLayer url:"https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/aff1b6/FeatureServer/0">

In [134]:
demographics_layer_1_out_fields = ['fips', 'TOTPOP_CY', "POPDENS_CY", "POPDENS_FY", "MEDDI_CY", "DIVINDX_CY"]

In [135]:
demographics_layer_1_df = demographics_layer_1.query(out_fields=demographics_layer_1_out_fields,
                              as_df=True,
                              return_geometry=False)
del demographics_layer_1_df['FID']

demographics_layer_1_df

Unnamed: 0,divindx_cy,fips,meddi_cy,popdens_cy,popdens_fy,totpop_cy
0,46.0,060730001001,129998.0,4714.0,4690.4,1199.0
1,45.6,060730001002,152077.0,4993.9,4864.1,1692.0
2,47.5,060730002011,100616.0,4614.1,4793.2,902.0
3,59.9,060730002021,60616.0,8342.2,8218.7,1283.0
4,62.4,060730002022,45921.0,6809.9,6742.7,911.0
...,...,...,...,...,...,...
2052,58.4,060730221012,113615.0,131.2,129.1,450.0
2053,52.9,060730221021,131072.0,3204.1,3326.8,1802.0
2054,64.2,060730221022,132157.0,3840.9,4090.8,1906.0
2055,80.0,060730221023,87061.0,4379.3,4526.5,2171.0


In [136]:
demographics_layer_2 = FeatureLayer(url="https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/ab8ff7/FeatureServer/0")
demographics_layer_2

<FeatureLayer url:"https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/ab8ff7/FeatureServer/0">

In [137]:
demographics_layer_2_out_fields = ['fips', "TOTPOP_FY"]

In [138]:
demographics_layer_2_df = demographics_layer_2.query(out_fields=demographics_layer_2_out_fields,
                              as_df=True,
                              return_geometry=False)
del demographics_layer_2_df['FID']

demographics_layer_2_df

Unnamed: 0,fips,totpop_fy
0,060730083111,1821.0
1,060730083112,1130.0
2,060730083121,611.0
3,060730083122,1419.0
4,060730083123,769.0
...,...,...
2052,060730221012,443.0
2053,060730221021,1871.0
2054,060730221022,2030.0
2055,060730221023,2244.0


### Merge the feature layers together

In [139]:
opportunity_competition_df = consumer_spending_layer_a_df.merge(business_variables_layer_df, left_on = 'fips', right_on = 'fips')
opportunity_competition_df = opportunity_competition_df.merge(demographics_layer_1_df, left_on = 'fips', right_on = 'fips')
opportunity_competition_df = opportunity_competition_df.merge(demographics_layer_2_df, left_on = 'fips', right_on = 'fips')
opportunity_competition_df

Unnamed: 0,fips,x1002_a,x1130_a,x1130fy_a,n13_bus,n37_bus,s01_bus,s12_bus,s16_bus,divindx_cy,meddi_cy,popdens_cy,popdens_fy,totpop_cy,totpop_fy
0,060730100101,6877.52,2942.05,3623.67,0.0,0.0,11.0,0.0,0.0,78.7,47338.0,15154.6,14831.8,1690.0,1654.0
1,060730100102,8364.19,3457.18,4231.38,2.0,3.0,25.0,2.0,2.0,80.8,45231.0,9018.8,8816.6,2185.0,2136.0
2,060730100103,7101.43,2935.24,3277.84,1.0,3.0,29.0,2.0,3.0,84.8,46459.0,15980.3,15622.9,1565.0,1530.0
3,060730100111,10566.02,4519.91,5287.83,1.0,1.0,16.0,1.0,1.0,84.2,75000.0,7335.9,7390.2,1485.0,1496.0
4,060730150012,11587.27,4781.76,6792.55,2.0,8.0,43.0,1.0,8.0,73.1,59813.0,9046.8,8844.2,1563.0,1528.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2052,060730221012,20195.98,8544.28,9033.19,8.0,41.0,1594.0,12.0,40.0,58.4,113615.0,131.2,129.1,450.0,443.0
2053,060730221021,22616.71,9568.42,11358.21,0.0,0.0,36.0,1.0,0.0,52.9,131072.0,3204.1,3326.8,1802.0,1871.0
2054,060730221022,22722.96,9613.37,11373.74,4.0,17.0,180.0,5.0,16.0,64.2,132157.0,3840.9,4090.8,1906.0,2030.0
2055,060730221023,14510.63,6183.54,7345.41,0.0,0.0,24.0,0.0,0.0,80.0,87061.0,4379.3,4526.5,2171.0,2244.0


In [140]:
var_mapping = {'x1002_a': 'Avg 2022 Food', 'x1003_a': 'Avg 2022 Food at Home', 'x1130_a': 'Avg 2022 Food Away from Home', 'x1002fy_a': 'Avg 2027 Food', 'x1003fy_a': 'Avg 2027 Food at Home', 'x1130fy_a': 'Avg 2027 Food Away from Home', 's01_bus': '2021 Total (SIC01-99) Businesses', 's12_bus': '2021 Food Stores (SIC54) Businesses', 's16_bus': '2021 Eating & Drinking (SIC58) Businesses', 'n01_bus': '2021 Total (NAICS11-99) Businesses', 'n13_bus': '2021 Food & Beverage Stores (NAICS445) Businesses', 'n37_bus': '2021 Food Srv & Drinking Places (NAICS722) Businesses', 'mp28068a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 1-Disagree Completely', 'mp28068a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 1-Disagree Completely: Index', 'mp28069a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 2-Disagree Somewhat', 'mp28069a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 2-Disagree Somewhat: Index', 'mp28070a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 3-Agree Somewhat', 'mp28070a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 3-Agree Somewhat: Index', 'mp28071a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 4-Agree Completely', 'mp28071a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 4-Agree Completely: Index', 'mp28258a_b': '2021 Like Healthier Options at Fast Food: 1-Disagree Completely', 'mp28258a_i': '2021 Like Healthier Options at Fast Food: 1-Disagree Completely: Index', 'mp28259a_b': '2021 Like Healthier Options at Fast Food: 2-Disagree Somewhat', 'mp28259a_i': '2021 Like Healthier Options at Fast Food: 2-Disagree Somewhat: Index', 'mp28260a_b': '2021 Like Healthier Options at Fast Food: 3-Agree Somewhat', 'mp28260a_i': '2021 Like Healthier Options at Fast Food: 3-Agree Somewhat: Index', 'mp28261a_b': '2021 Like Healthier Options at Fast Food: 4-Agree Completely', 'mp28261a_i': '2021 Like Healthier Options at Fast Food: 4-Agree Completely: Index', 'totpop_cy': '2021 Total Population (Esri)', 'popdens_cy': '2021 Population Density (Pop per Square Mile) (Esri)', 'totpop_fy': '2026 Total Population (Esri)', 'popdens_fy': '2026 Population Density (Pop per Square Mile) (Esri)', 'meddi_cy': '2021 Median Disposable Income (Esri)'
              , 'divindx_cy': '2021 Diversity Index (Esri)', 'divindx_fy': '2026 Diversity Index (Esri)'}
var_mapping

{'x1002_a': 'Avg 2022 Food',
 'x1003_a': 'Avg 2022 Food at Home',
 'x1130_a': 'Avg 2022 Food Away from Home',
 'x1002fy_a': 'Avg 2027 Food',
 'x1003fy_a': 'Avg 2027 Food at Home',
 'x1130fy_a': 'Avg 2027 Food Away from Home',
 's01_bus': '2021 Total (SIC01-99) Businesses',
 's12_bus': '2021 Food Stores (SIC54) Businesses',
 's16_bus': '2021 Eating & Drinking (SIC58) Businesses',
 'n01_bus': '2021 Total (NAICS11-99) Businesses',
 'n13_bus': '2021 Food & Beverage Stores (NAICS445) Businesses',
 'n37_bus': '2021 Food Srv & Drinking Places (NAICS722) Businesses',
 'mp28068a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 1-Disagree Completely',
 'mp28068a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 1-Disagree Completely: Index',
 'mp28069a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 2-Disagree Somewhat',
 'mp28069a_i': '2021 Eat Healthy/Pay Attention to Nutrition: 2-Disagree Somewhat: Index',
 'mp28070a_b': '2021 Eat Healthy/Pay Attention to Nutrition: 3-Agree Somewhat',
 'mp

In [141]:
opportunity_competition_df.rename(columns = var_mapping, inplace = True)
opportunity_competition_df

Unnamed: 0,fips,Avg 2022 Food,Avg 2022 Food Away from Home,Avg 2027 Food Away from Home,2021 Food & Beverage Stores (NAICS445) Businesses,2021 Food Srv & Drinking Places (NAICS722) Businesses,2021 Total (SIC01-99) Businesses,2021 Food Stores (SIC54) Businesses,2021 Eating & Drinking (SIC58) Businesses,2021 Diversity Index (Esri),2021 Median Disposable Income (Esri),2021 Population Density (Pop per Square Mile) (Esri),2026 Population Density (Pop per Square Mile) (Esri),2021 Total Population (Esri),2026 Total Population (Esri)
0,060730100101,6877.52,2942.05,3623.67,0.0,0.0,11.0,0.0,0.0,78.7,47338.0,15154.6,14831.8,1690.0,1654.0
1,060730100102,8364.19,3457.18,4231.38,2.0,3.0,25.0,2.0,2.0,80.8,45231.0,9018.8,8816.6,2185.0,2136.0
2,060730100103,7101.43,2935.24,3277.84,1.0,3.0,29.0,2.0,3.0,84.8,46459.0,15980.3,15622.9,1565.0,1530.0
3,060730100111,10566.02,4519.91,5287.83,1.0,1.0,16.0,1.0,1.0,84.2,75000.0,7335.9,7390.2,1485.0,1496.0
4,060730150012,11587.27,4781.76,6792.55,2.0,8.0,43.0,1.0,8.0,73.1,59813.0,9046.8,8844.2,1563.0,1528.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2052,060730221012,20195.98,8544.28,9033.19,8.0,41.0,1594.0,12.0,40.0,58.4,113615.0,131.2,129.1,450.0,443.0
2053,060730221021,22616.71,9568.42,11358.21,0.0,0.0,36.0,1.0,0.0,52.9,131072.0,3204.1,3326.8,1802.0,1871.0
2054,060730221022,22722.96,9613.37,11373.74,4.0,17.0,180.0,5.0,16.0,64.2,132157.0,3840.9,4090.8,1906.0,2030.0
2055,060730221023,14510.63,6183.54,7345.41,0.0,0.0,24.0,0.0,0.0,80.0,87061.0,4379.3,4526.5,2171.0,2244.0


In [142]:
opportunity_competition_df_og = opportunity_competition_df.copy(deep = True)

#### Create variables

Perecentage of spending on food away from home per household

In [143]:
opportunity_competition_df["Food Away From Home Spending Rate"] = (opportunity_competition_df["Avg 2022 Food Away from Home"] 
                                                                   / opportunity_competition_df["Avg 2022 Food"])
opportunity_competition_df["Food Away From Home Spending Rate"]

0       0.427778
1       0.413331
2       0.413331
3       0.427778
4       0.412674
          ...   
2052    0.423068
2053    0.423069
2054    0.423069
2055    0.426139
2056    0.426138
Name: Food Away From Home Spending Rate, Length: 2057, dtype: Float64

Projected spending growth on food away from home 22-27 per household

In [144]:
opportunity_competition_df["Project 5Yr Food Away Home Spending Growth"] = (opportunity_competition_df["Avg 2027 Food Away from Home"] / 
                                                                  opportunity_competition_df["Avg 2022 Food Away from Home"] -1)

opportunity_competition_df["Project 5Yr Food Away Home Spending Growth"]

0       0.231682
1        0.22394
2        0.11672
3       0.169897
4       0.420513
          ...   
2052    0.057221
2053    0.187052
2054    0.183117
2055    0.187897
2056    0.193864
Name: Project 5Yr Food Away Home Spending Growth, Length: 2057, dtype: Float64

Total % of food related business

In [145]:
opportunity_competition_df["Total Pct of Food Related Businesses"] = (opportunity_competition_df["2021 Food & Beverage Stores (NAICS445) Businesses"] + 
 opportunity_competition_df["2021 Food Srv & Drinking Places (NAICS722) Businesses"] + 
 opportunity_competition_df["2021 Food Stores (SIC54) Businesses"] + 
 opportunity_competition_df["2021 Eating & Drinking (SIC58) Businesses"]) / opportunity_competition_df["2021 Total (SIC01-99) Businesses"]

opportunity_competition_df["Total Pct of Food Related Businesses"]

0            0.0
1           0.36
2       0.310345
3           0.25
4        0.44186
          ...   
2052    0.063363
2053    0.027778
2054    0.233333
2055         0.0
2056    0.069486
Name: Total Pct of Food Related Businesses, Length: 2057, dtype: Float64

Total Food Business Per capita 21

In [146]:
opportunity_competition_df["Pct of Food Related Businesses Per Capita"] = (opportunity_competition_df["2021 Food & Beverage Stores (NAICS445) Businesses"] + 
 opportunity_competition_df["2021 Food Srv & Drinking Places (NAICS722) Businesses"] + 
 opportunity_competition_df["2021 Food Stores (SIC54) Businesses"] + 
 opportunity_competition_df["2021 Eating & Drinking (SIC58) Businesses"]) / opportunity_competition_df["2021 Total Population (Esri)"]

opportunity_competition_df["Pct of Food Related Businesses Per Capita"]

0            0.0
1       0.004119
2       0.005751
3       0.002694
4       0.012156
          ...   
2052    0.224444
2053    0.000555
2054    0.022036
2055         0.0
2056    0.032787
Name: Pct of Food Related Businesses Per Capita, Length: 2057, dtype: Float64

Projected population growth

In [147]:
opportunity_competition_df["Projected Population Growth Rate"] = (opportunity_competition_df["2021 Total Population (Esri)"] 
                                                                  / opportunity_competition_df["2026 Total Population (Esri)"] -1)

opportunity_competition_df["Projected Population Growth Rate"] 

0       0.021765
1        0.02294
2       0.022876
3      -0.007353
4       0.022906
          ...   
2052    0.015801
2053   -0.036879
2054   -0.061084
2055   -0.032531
2056    0.020364
Name: Projected Population Growth Rate, Length: 2057, dtype: Float64

In [148]:
opportunity_competition_df

Unnamed: 0,fips,Avg 2022 Food,Avg 2022 Food Away from Home,Avg 2027 Food Away from Home,2021 Food & Beverage Stores (NAICS445) Businesses,2021 Food Srv & Drinking Places (NAICS722) Businesses,2021 Total (SIC01-99) Businesses,2021 Food Stores (SIC54) Businesses,2021 Eating & Drinking (SIC58) Businesses,2021 Diversity Index (Esri),2021 Median Disposable Income (Esri),2021 Population Density (Pop per Square Mile) (Esri),2026 Population Density (Pop per Square Mile) (Esri),2021 Total Population (Esri),2026 Total Population (Esri),Food Away From Home Spending Rate,Project 5Yr Food Away Home Spending Growth,Total Pct of Food Related Businesses,Pct of Food Related Businesses Per Capita,Projected Population Growth Rate
0,060730100101,6877.52,2942.05,3623.67,0.0,0.0,11.0,0.0,0.0,78.7,47338.0,15154.6,14831.8,1690.0,1654.0,0.427778,0.231682,0.0,0.0,0.021765
1,060730100102,8364.19,3457.18,4231.38,2.0,3.0,25.0,2.0,2.0,80.8,45231.0,9018.8,8816.6,2185.0,2136.0,0.413331,0.22394,0.36,0.004119,0.02294
2,060730100103,7101.43,2935.24,3277.84,1.0,3.0,29.0,2.0,3.0,84.8,46459.0,15980.3,15622.9,1565.0,1530.0,0.413331,0.11672,0.310345,0.005751,0.022876
3,060730100111,10566.02,4519.91,5287.83,1.0,1.0,16.0,1.0,1.0,84.2,75000.0,7335.9,7390.2,1485.0,1496.0,0.427778,0.169897,0.25,0.002694,-0.007353
4,060730150012,11587.27,4781.76,6792.55,2.0,8.0,43.0,1.0,8.0,73.1,59813.0,9046.8,8844.2,1563.0,1528.0,0.412674,0.420513,0.44186,0.012156,0.022906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2052,060730221012,20195.98,8544.28,9033.19,8.0,41.0,1594.0,12.0,40.0,58.4,113615.0,131.2,129.1,450.0,443.0,0.423068,0.057221,0.063363,0.224444,0.015801
2053,060730221021,22616.71,9568.42,11358.21,0.0,0.0,36.0,1.0,0.0,52.9,131072.0,3204.1,3326.8,1802.0,1871.0,0.423069,0.187052,0.027778,0.000555,-0.036879
2054,060730221022,22722.96,9613.37,11373.74,4.0,17.0,180.0,5.0,16.0,64.2,132157.0,3840.9,4090.8,1906.0,2030.0,0.423069,0.183117,0.233333,0.022036,-0.061084
2055,060730221023,14510.63,6183.54,7345.41,0.0,0.0,24.0,0.0,0.0,80.0,87061.0,4379.3,4526.5,2171.0,2244.0,0.426139,0.187897,0.0,0.0,-0.032531


#### Remove bias or unnecessary variables

In [149]:
opportunity_competition_df.drop(columns = ["Avg 2022 Food", "Avg 2022 Food Away from Home", "Avg 2027 Food Away from Home", 
                                          "2021 Food & Beverage Stores (NAICS445) Businesses", "2021 Food Srv & Drinking Places (NAICS722) Businesses",
                                          "2021 Total (SIC01-99) Businesses", "2021 Food Stores (SIC54) Businesses", "2021 Eating & Drinking (SIC58) Businesses",
                                          "2026 Population Density (Pop per Square Mile) (Esri)", "2021 Total Population (Esri)", "2026 Total Population (Esri)",
                                          "2021 Median Disposable Income (Esri)"], inplace = True)
opportunity_competition_df

Unnamed: 0,fips,2021 Diversity Index (Esri),2021 Population Density (Pop per Square Mile) (Esri),Food Away From Home Spending Rate,Project 5Yr Food Away Home Spending Growth,Total Pct of Food Related Businesses,Pct of Food Related Businesses Per Capita,Projected Population Growth Rate
0,060730100101,78.7,15154.6,0.427778,0.231682,0.0,0.0,0.021765
1,060730100102,80.8,9018.8,0.413331,0.22394,0.36,0.004119,0.02294
2,060730100103,84.8,15980.3,0.413331,0.11672,0.310345,0.005751,0.022876
3,060730100111,84.2,7335.9,0.427778,0.169897,0.25,0.002694,-0.007353
4,060730150012,73.1,9046.8,0.412674,0.420513,0.44186,0.012156,0.022906
...,...,...,...,...,...,...,...,...
2052,060730221012,58.4,131.2,0.423068,0.057221,0.063363,0.224444,0.015801
2053,060730221021,52.9,3204.1,0.423069,0.187052,0.027778,0.000555,-0.036879
2054,060730221022,64.2,3840.9,0.423069,0.183117,0.233333,0.022036,-0.061084
2055,060730221023,80.0,4379.3,0.426139,0.187897,0.0,0.0,-0.032531


A little cleanup

In [150]:
opportunity_competition_df['2021 Diversity Index (Esri)'] = opportunity_competition_df['2021 Diversity Index (Esri)']/100

In [151]:
opportunity_competition_df.set_index('fips', inplace = True)

### Make the opportunity score

#### Scale the data

In [152]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [153]:
opportunity_competition_df[opportunity_competition_df.columns] = scaler.fit_transform(opportunity_competition_df)
opportunity_competition_df

Unnamed: 0_level_0,2021 Diversity Index (Esri),2021 Population Density (Pop per Square Mile) (Esri),Food Away From Home Spending Rate,Project 5Yr Food Away Home Spending Growth,Total Pct of Food Related Businesses,Pct of Food Related Businesses Per Capita,Projected Population Growth Rate
fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
060730100101,0.501946,0.717180,0.947194,0.413864,-0.954059,-0.232773,0.495591
060730100102,0.660222,-0.004886,-0.523809,0.331707,1.451413,-0.136343,0.523990
060730100103,0.961701,0.814349,-0.523838,-0.806064,1.119624,-0.098141,0.522437
060730100111,0.916479,-0.202931,0.947211,-0.241767,0.716408,-0.169713,-0.208377
060730150012,0.079876,-0.001591,-0.590763,2.417647,1.998394,0.051814,0.523160
...,...,...,...,...,...,...,...
060730221012,-1.028057,-1.050786,0.467668,-1.437438,-0.530678,5.021704,0.351404
060730221021,-1.442590,-0.689165,0.467694,-0.059731,-0.768451,-0.219781,-0.922195
060730221022,-0.590913,-0.614225,0.467689,-0.101487,0.605044,0.283105,-1.507379
060730221023,0.599927,-0.550866,0.780294,-0.050759,-0.954059,-0.232773,-0.817089


#### Make a score
Sum all columns

In [154]:
opportunity_competition_df['Total Pct of Food Related Businesses'] = (-1)*opportunity_competition_df['Total Pct of Food Related Businesses']
opportunity_competition_df['Pct of Food Related Businesses Per Capita'] = (-1)*opportunity_competition_df['Pct of Food Related Businesses Per Capita']

Cap the total STD for [Project 5Yr Food Away Home Spending Growth] to 4 STD (major outlier is skewing the final score).

In [155]:
opportunity_competition_df.describe()

Unnamed: 0,2021 Diversity Index (Esri),2021 Population Density (Pop per Square Mile) (Esri),Food Away From Home Spending Rate,Project 5Yr Food Away Home Spending Growth,Total Pct of Food Related Businesses,Pct of Food Related Businesses Per Capita,Projected Population Growth Rate
count,2057.0,2057.0,2053.0,2053.0,2057.0,2057.0,2057.0
mean,-7.184876e-16,-1.658048e-16,2.630358e-16,-5.883695e-17,2.0725600000000003e-17,-1.0362800000000002e-17,0.0
std,1.000243,1.000243,1.000244,1.000244,1.000243,1.000243,1.000243
min,-4.419689,-1.066037,-5.053605,-4.375874,-5.727808,-29.86713,-13.914076
25%,-0.6210609,-0.6195329,-0.6834765,-0.4050449,-0.5308007,0.01970318,-0.030612
50%,0.1853938,-0.213793,-0.01325965,-0.0569509,0.2507042,0.1774277,0.40954
75%,0.8637202,0.3180419,0.9012543,0.355751,0.9540585,0.2327729,0.501511
max,1.391307,11.39741,2.91688,27.0514,0.9540585,0.2327729,0.934502


In [156]:
def truncate_std(std, boundary = 4):
    if std > boundary:
        return boundary
    elif std < -boundary:
        return -boundary
    else:
        return std

In [157]:
for col in opportunity_competition_df.columns:
    opportunity_competition_df[col] = opportunity_competition_df[col].apply(lambda x: truncate_std(x))

In [158]:
opportunity_competition_df['Opportunity_Competition_Score'] = opportunity_competition_df.sum(axis=1)
opportunity_competition_df['Opportunity_Competition_Score'].describe()

count    2057.000000
mean        0.023140
std         2.452716
min       -10.698047
25%        -1.608548
50%         0.075404
75%         1.739337
max         7.677773
Name: Opportunity_Competition_Score, dtype: float64

#### Scaled the opportunity score

In [159]:
scoremin= opportunity_competition_df['Opportunity_Competition_Score'].min()
scoremax = opportunity_competition_df['Opportunity_Competition_Score'].max()

opportunity_competition_df['Opportunity_Competition_Score_Scaled'] = ((opportunity_competition_df['Opportunity_Competition_Score']-scoremin) 
                                                                      / (scoremax-scoremin) )

opportunity_competition_df['Opportunity_Competition_Score_Scaled']

fips
060730100101    0.814149
060730100102    0.564339
060730100103    0.579302
060730100111    0.618311
060730150012    0.602758
                  ...   
060730221012    0.246602
060730221021    0.491967
060730221022    0.406163
060730221023    0.644672
060730221024    0.649228
Name: Opportunity_Competition_Score_Scaled, Length: 2057, dtype: float64

In [160]:
opportunity_competition_df['Opportunity_Competition_Score_Scaled'].describe()

count    2057.000000
mean        0.583440
std         0.133475
min         0.000000
25%         0.494645
50%         0.586284
75%         0.676834
max         1.000000
Name: Opportunity_Competition_Score_Scaled, dtype: float64

In [161]:
opportunity_competition_df.describe()

Unnamed: 0,2021 Diversity Index (Esri),2021 Population Density (Pop per Square Mile) (Esri),Food Away From Home Spending Rate,Project 5Yr Food Away Home Spending Growth,Total Pct of Food Related Businesses,Pct of Food Related Businesses Per Capita,Projected Population Growth Rate,Opportunity_Competition_Score,Opportunity_Competition_Score_Scaled
count,2057.0,2057.0,2053.0,2053.0,2057.0,2057.0,2057.0,2057.0,2057.0
mean,0.000204,-0.016175,0.004434,-0.011434,0.002551,0.027602,0.015945,0.02314,0.58344
std,0.999384,0.900347,0.980071,0.804173,0.988661,0.449498,0.891849,2.452716,0.133475
min,-4.0,-1.066037,-4.0,-4.0,-4.0,-4.0,-4.0,-10.698047,0.0
25%,-0.621061,-0.619533,-0.683477,-0.405045,-0.530801,0.019703,-0.030612,-1.608548,0.494645
50%,0.185394,-0.213793,-0.01326,-0.056951,0.250704,0.177428,0.40954,0.075404,0.586284
75%,0.86372,0.318042,0.901254,0.355751,0.954059,0.232773,0.501511,1.739337,0.676834
max,1.391307,4.0,2.91688,4.0,0.954059,0.232773,0.934502,7.677773,1.0


### See how score performs

In [162]:
score_inspection = opportunity_competition_df.merge(opportunity_competition_df_og, left_on = 'fips', right_on = 'fips')
score_inspection.sort_values(by = 'Opportunity_Competition_Score_Scaled', ascending=False).to_csv('Opportunity_Competition_Score.csv', index = False)

#### Export final result

In [163]:
opportunity_competition_df.reset_index(inplace = True, drop = False)

In [165]:
opportunity_competition_df[['fips', 'Opportunity_Competition_Score_Scaled']].to_csv('Fips_Opportunity_Competition_Score.csv', index = False)

# Old Amol Stuff

In [21]:
# Picking up sample variables for just poc on rendering maps as per our need.
out_fields = ["fips", "X1130_A", "X1132_A", "X1137_A", "X1142_A", "X1147_A", 
                      "X1130FY_A", "X1132FY_A", "X1137FY_A", "X1142FY_A", "X1147FY_A", "X1002_A", "X1003_A", "TOTPOP_CY"]

In [22]:
away_food_spending_df = consumer_spending_layer.query(out_fields=out_fields,
                              as_df=True,
                              return_geometry=False)
away_food_spending_df

Exception: Cannot perform query. Invalid query parameters.
Unable to perform query. Please check your parameters.
(Error Code: 400)

##### This is painful: Since we created multiple feature layers, we would always be uncertain as to which variables are present where ?

In [9]:
# Checking TOTPOP_CY variable in 1st layer 
demo_df = demographics_layer_1.query(out_fields=['fips', 'TOTPOP_CY', # 2021 Total Population (Esri)
                                                ],
                                     as_df=True,
                                     return_geometry=False
                                    )
demo_df

Unnamed: 0,FID,fips,totpop_cy
0,1,060730001001,1199.0
1,2,060730001002,1692.0
2,3,060730002011,902.0
3,4,060730002021,1283.0
4,5,060730002022,911.0
...,...,...,...
2052,2053,060730221012,450.0
2053,2054,060730221021,1802.0
2054,2055,060730221022,1906.0
2055,2056,060730221023,2171.0


In [10]:
# Checking HHPOP_CY, TOTHH_CY variable in 2nd layer 
demo_df_2 = demographics_layer_2.query(out_fields=['fips',
                                                 #'TOTPOP_CY', # 2021 Total Population (Esri)
                                                 'HHPOP_CY', # 2021 Household Population (Esri)
                                                 'TOTHH_CY', # 2021 Total Households (Esri)
                                                ],
                                     as_df=True,
                                     return_geometry=False
                                    )
demo_df_2

Unnamed: 0,FID,fips,hhpop_cy,tothh_cy
0,1,060730083111,1744.0,631.0
1,2,060730083112,1140.0,449.0
2,3,060730083121,596.0,250.0
3,4,060730083122,1190.0,517.0
4,5,060730083123,781.0,358.0
...,...,...,...,...
2052,2053,060730221012,450.0,184.0
2053,2054,060730221021,1802.0,686.0
2054,2055,060730221022,1906.0,775.0
2055,2056,060730221023,2171.0,666.0


Picking up the overlayed layer created to ger businesses associated with block groups

In [11]:
block_grp_zip_food_biz_overlay_features = FeatureLayer(gis = gis,
                                                       url="https://services1.arcgis.com/eGSDp8lpKe5izqVc/arcgis/rest/services/San_Diego_Food_Business_Per_Block_Group/FeatureServer/0")

Calculate Count of food businesses per block group

In [12]:
food_biz_df = block_grp_zip_food_biz_overlay_features.query(out_fields=['FIPS','biz_name','biz_type','Join_Count'],
                                             as_df=True,
                                             return_geometry=False)
food_biz_df

Unnamed: 0,FIPS,Join_Count,OBJECTID_1,biz_name,biz_type
0,060730208061,1,1,Amici Pizza Pasta & Subs,Italian restaurant;Restaurant
1,060730004001,1,2,Mess Royale,Bagel shop;Restaurant;Sandwich shop
2,060730004001,1,3,Fiesta Cantina,Mexican restaurant;Bar;Bar & grill;Brunch rest...
3,060730004001,1,4,Creme De La Crepe,French restaurant
4,060730121011,1,5,"W.D. Dickinson: Farm, House, Mercantile",Farm;Event venue;Historical landmark
...,...,...,...,...,...
11508,060730083462,1,11509,Chick-fil-A,Fast food restaurant;Breakfast restaurant;Cate...
11509,060730083591,1,11510,Tasty Pot,Hot pot restaurant
11510,060730173061,1,11511,Starbucks,Coffee shop;Breakfast restaurant;Cafe;Coffee s...
11511,060730162021,1,11512,Fuerte's NY Pizzeria,Pizza restaurant;Bar;Beer store;Chicken wings ...


In [13]:
food_density_biz_df = food_biz_df.groupby('FIPS').size().reset_index(name='count_biz')
food_density_biz_df

Unnamed: 0,FIPS,count_biz
0,060250123012,2
1,060730001002,8
2,060730002011,3
3,060730002012,17
4,060730002021,10
...,...,...
1241,060730221011,1
1242,060730221012,47
1243,060730221022,13
1244,060730221023,1


In [14]:
## Let's Join the consumer spending, demographics and food density
merged_df = pd.merge(away_food_spending_df, demo_df, on='fips', how='inner')
merged_df = pd.merge(merged_df, demo_df_2, on='fips', how='inner')
merged_df = pd.merge(merged_df, food_density_biz_df, left_on='fips', right_on='FIPS', how='left')
merged_df

Unnamed: 0,FID_x,fips,x1130_a,x1130fy_a,x1132_a,x1132fy_a,x1137_a,x1137fy_a,x1142_a,x1142fy_a,x1147_a,x1147fy_a,FID_y,totpop_cy,FID,hhpop_cy,tothh_cy,FIPS,count_biz
0,1,060730100101,2942.05,3623.67,858.36,1057.22,1392.62,1715.26,250.6,308.66,303.81,374.19,76,1690.0,161,1690.0,500.0,,
1,2,060730100102,3457.18,4231.38,1140.71,1396.15,1579.12,1932.75,266.28,325.91,364.58,446.23,406,2185.0,405,2185.0,682.0,060730100102,5.0
2,3,060730100103,2935.24,3277.84,968.49,1081.53,1340.71,1497.2,226.08,252.46,309.54,345.67,931,1565.0,533,1565.0,493.0,060730100103,5.0
3,4,060730100111,4519.91,5287.83,1318.7,1542.75,2139.5,2502.99,385.0,450.41,466.74,546.04,932,1485.0,535,1471.0,364.0,060730100111,2.0
4,5,060730150012,4781.76,6792.55,1459.8,2073.67,2256.23,3205.0,386.28,548.71,475.28,675.15,28,1563.0,68,1558.0,625.0,060730150012,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2052,2053,060730221012,8544.28,9033.19,2484.53,2626.69,4098.08,4332.57,702.16,742.34,826.65,873.95,2053,450.0,2053,450.0,184.0,060730221012,47.0
2053,2054,060730221021,9568.42,11358.21,2782.33,3302.77,4589.28,5447.71,786.32,933.4,925.73,1098.89,2054,1802.0,2054,1802.0,686.0,,
2054,2055,060730221022,9613.37,11373.74,2795.4,3307.29,4610.84,5455.16,790.02,934.68,930.08,1100.39,2055,1906.0,2055,1906.0,775.0,060730221022,13.0
2055,2056,060730221023,6183.54,7345.41,1856.83,2205.72,2940.3,3492.77,515.72,612.62,605.01,718.69,2056,2171.0,2056,2171.0,666.0,060730221023,1.0


In [15]:
## finding float columns
float_cols = merged_df.select_dtypes(include=['float']).columns
merged_df[float_cols] = merged_df[float_cols].fillna(0)

In [16]:
def calculate_competition(count_food_biz, totpop_cy):
    '''
    Return a competition score.
    '''
    # for now lets just return the difference
    
    try:
        score = count_food_biz/totpop_cy
    except ZeroDivisionError:
        score = 0.0
        
    return score

In [17]:
def calculate_opportunity(count_food_biz,
                           X1130_A # 2022 Food Away from Home 
#                           X1132_A, # 2022 Lunch Away from Home
#                           X1137_A, # 2022 Dinner Away from Home
#                           X1142_A, # 2022 Snacks & Beverages Away from Home
#                           X1147_A, # 2022 Breakfast and Brunch Away from Home
#                           X1130FY_A, 
#                           X1132FY_A, 
#                           X1137FY_A, 
#                           X1142FY_A, 
#                           X1147FY_A,
                          ):
    '''
    Return opportunity score.
    '''
    # All these _A variables are spending per household
        
    try:
        score = X1130_A/count_food_biz
    except ZeroDivisionError:
        score = 0.0
  
    return score
    

In [18]:
merged_df['competition_score'] = merged_df.apply(lambda x: calculate_competition(x.count_biz, x.totpop_cy), axis=1)
merged_df['opportunity_score'] = merged_df.apply(lambda x: calculate_opportunity(x.count_biz, x.x1130_a), axis=1)
merged_df

Unnamed: 0,FID_x,fips,x1130_a,x1130fy_a,x1132_a,x1132fy_a,x1137_a,x1137fy_a,x1142_a,x1142fy_a,...,x1147fy_a,FID_y,totpop_cy,FID,hhpop_cy,tothh_cy,FIPS,count_biz,competition_score,opportunity_score
0,1,060730100101,2942.05,3623.67,858.36,1057.22,1392.62,1715.26,250.6,308.66,...,374.19,76,1690.0,161,1690.0,500.0,,0.0,0.000000,0.000000
1,2,060730100102,3457.18,4231.38,1140.71,1396.15,1579.12,1932.75,266.28,325.91,...,446.23,406,2185.0,405,2185.0,682.0,060730100102,5.0,0.002288,691.436000
2,3,060730100103,2935.24,3277.84,968.49,1081.53,1340.71,1497.2,226.08,252.46,...,345.67,931,1565.0,533,1565.0,493.0,060730100103,5.0,0.003195,587.048000
3,4,060730100111,4519.91,5287.83,1318.7,1542.75,2139.5,2502.99,385.0,450.41,...,546.04,932,1485.0,535,1471.0,364.0,060730100111,2.0,0.001347,2259.955000
4,5,060730150012,4781.76,6792.55,1459.8,2073.67,2256.23,3205.0,386.28,548.71,...,675.15,28,1563.0,68,1558.0,625.0,060730150012,8.0,0.005118,597.720000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2052,2053,060730221012,8544.28,9033.19,2484.53,2626.69,4098.08,4332.57,702.16,742.34,...,873.95,2053,450.0,2053,450.0,184.0,060730221012,47.0,0.104444,181.793191
2053,2054,060730221021,9568.42,11358.21,2782.33,3302.77,4589.28,5447.71,786.32,933.4,...,1098.89,2054,1802.0,2054,1802.0,686.0,,0.0,0.000000,0.000000
2054,2055,060730221022,9613.37,11373.74,2795.4,3307.29,4610.84,5455.16,790.02,934.68,...,1100.39,2055,1906.0,2055,1906.0,775.0,060730221022,13.0,0.006821,739.490000
2055,2056,060730221023,6183.54,7345.41,1856.83,2205.72,2940.3,3492.77,515.72,612.62,...,718.69,2056,2171.0,2056,2171.0,666.0,060730221023,1.0,0.000461,6183.540000
