In [1]:
import pandas as pd
import numpy as np
import os

# Zip Code Level Data

We first calculate the number of farm operations. This data is from two seperate files, so the data are added to each other with missing values set to zero to produce the final numbers

In [2]:
df_farms_1 = pd.read_csv("Farms 1.csv")

In [3]:
df_farms_1["Zip Code"] = df_farms_1["Zip Code"].astype(str).str.zfill(5)
df_farms_1['Value'] = pd.to_numeric(df_farms_1['Value'].str.replace(',', ''), errors='coerce')
df_farms_1.rename(columns={'Value': 'Num Farms'}, inplace=True)
grouped_df = df_farms_1.groupby('Zip Code')['Num Farms'].sum().reset_index()
grouped_df.set_index('Zip Code', inplace=True)

In [4]:
df_farms_2 = pd.read_csv("Farms 2.csv")
df_farms_2["Zip Code"] = df_farms_2["Zip Code"].astype(str).str.zfill(5)
df_farms_2['Value'] = pd.to_numeric(df_farms_2['Value'].str.replace(',', ''), errors='coerce')
df_farms_2.rename(columns={'Value': 'Num Farms'}, inplace=True)

In [5]:
grouped_df_2 = df_farms_2.groupby('Zip Code')['Num Farms'].sum().reset_index()
grouped_df_2.set_index('Zip Code', inplace=True)

In [6]:
df_farms = grouped_df.add(grouped_df_2, fill_value=0)

The zip code 99999 represents areas which are not included in any zip code (military bases, federal lands, etc.). This line will be removed prior to statistical analysis but is computationally convenient to leave in the data until the end due to its presence in other data files.

Next, use data from the CBP to determine the number of employer establishments in each zip code. This does not include farms, so it is added to the farm data to produce a total employer establishment estimate.

In [7]:
cbp_df = pd.read_csv("zbp20totals.csv", encoding='latin1')
cbp_df["zip"] = cbp_df["zip"].astype(str).str.zfill(5)
cbp_df.set_index("zip", inplace=True)

In [8]:
cbp_df.drop("99999").describe()

Unnamed: 0,emp,qp1,ap,est
count,35052.0,35052.0,35052.0,35052.0
mean,3638.92514,54099.63,204711.9,227.137111
std,8085.098199,201458.4,694456.1,406.851597
min,0.0,0.0,11.0,3.0
25%,80.0,681.0,2958.0,11.0
50%,423.0,4163.5,17164.0,41.0
75%,3367.0,35430.5,142452.8,255.0
max,177226.0,9898336.0,26270200.0,6893.0


In [9]:
master_df = pd.concat([df_farms, cbp_df[["est", "emp"]]], axis=1)
master_df = master_df.sort_index()
master_df.rename(columns={'est': 'Num Estabs', "emp":"CBP Emp"}, inplace=True)
master_df["Total Emp Bus"] = master_df["Num Farms"].add(master_df["Num Estabs"], fill_value=0)

Next, we calculate the percentage of the population that is held by various racial and ethnic groups for each zip code.

In [10]:
race_df = pd.read_csv("Race By ZIP.csv", header = 1)
race_df["Zip"] = race_df["Geography"].str.slice(-5)
race_df.set_index("Zip", inplace=True)

In [11]:
race_df["Min Share"] = 100 * (1 - (race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] / race_df[" !!Total:"]))
race_df["Min Share Excl B"] = 100 * (1 - ((race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] + race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Black or African American alone"]) / race_df[" !!Total:"]))
race_df["Black Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Black or African American alone"] / race_df[" !!Total:"]
race_df["Hisp Share"] = 100 * race_df[" !!Total:!!Hispanic or Latino"] / race_df[" !!Total:"]
race_df["Asian Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Asian alone"] / race_df[" !!Total:"]
race_df["White Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] / race_df[" !!Total:"]
race_df["Native Share"] =  100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!American Indian and Alaska Native alone"] / race_df[" !!Total:"]

  race_df["Min Share"] = 100 * (1 - (race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] / race_df[" !!Total:"]))
  race_df["Min Share Excl B"] = 100 * (1 - ((race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] + race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Black or African American alone"]) / race_df[" !!Total:"]))
  race_df["Black Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Black or African American alone"] / race_df[" !!Total:"]
  race_df["Hisp Share"] = 100 * race_df[" !!Total:!!Hispanic or Latino"] / race_df[" !!Total:"]
  race_df["Asian Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!Asian alone"] / race_df[" !!Total:"]
  race_df["White Share"] = 100 * race_df[" !!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone"] / race_df[" !!Total:"]
  race_df["Native Share"] =  100 * race_df[" !!Total:!!No

In [12]:
master_df = pd.concat([master_df, race_df[["Min Share", "Min Share Excl B", "Black Share", "Hisp Share", 
                                           "Asian Share", "Native Share", "White Share", 
                                           " !!Total:"]]], axis=1)
master_df.rename(columns={' !!Total:': 'Total Pop'}, inplace=True)
master_df = master_df.sort_index()

In [13]:
rural_df = pd.read_csv("Rural.csv")
rural_df["ZIP"] = rural_df["GEOCODE"].astype(str).str.zfill(5)
rural_df.set_index("ZIP", inplace=True)
master_df["Rural"] = rural_df["Percent Rural"] * 100
master_df

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural
00501,,5.0,49.0,5.0,,,,,,,,,
00601,,,,,99.785408,99.779608,0.005800,99.669412,0.000000,0.000000,0.214592,17242.0,54.187449
00602,,,,,99.432726,99.406094,0.026633,99.280920,0.026633,0.007990,0.567274,37548.0,0.604560
00603,,,,,98.827403,98.666774,0.160630,98.377640,0.098386,0.010039,1.172597,49804.0,0.252992
00606,,,,,99.480934,99.460970,0.019964,99.301258,0.039928,0.000000,0.519066,5009.0,100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99927,,,,,14.285714,14.285714,0.000000,4.081633,0.000000,0.000000,85.714286,49.0,100.000000
99928,1.0,4.0,15.0,5.0,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.000000,3.607504,1.202501,17.364117,63.540164,2079.0,100.000000
99950,3.0,12.0,43.0,15.0,,,,,,,,,


In [14]:
df = pd.read_csv("nhgis0004_ds254_20215_zcta.csv")
df["ZIP"] = df["GISJOIN"].astype(str).str[1:]
df.set_index("ZIP", inplace=True)
master_df["PC Inc"] = df["AORME001"]
master_df

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc
00501,,5.0,49.0,5.0,,,,,,,,,,
00601,,,,,99.785408,99.779608,0.005800,99.669412,0.000000,0.000000,0.214592,17242.0,54.187449,7587.0
00602,,,,,99.432726,99.406094,0.026633,99.280920,0.026633,0.007990,0.567274,37548.0,0.604560,10699.0
00603,,,,,98.827403,98.666774,0.160630,98.377640,0.098386,0.010039,1.172597,49804.0,0.252992,12280.0
00606,,,,,99.480934,99.460970,0.019964,99.301258,0.039928,0.000000,0.519066,5009.0,100.000000,8574.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99927,,,,,14.285714,14.285714,0.000000,4.081633,0.000000,0.000000,85.714286,49.0,100.000000,
99928,1.0,4.0,15.0,5.0,,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.000000,3.607504,1.202501,17.364117,63.540164,2079.0,100.000000,31080.0
99950,3.0,12.0,43.0,15.0,,,,,,,,,,


In [15]:
df = pd.read_csv("nhgis0004_ds255_20215_zcta.csv")
df["ZIP"] = df["GISJOIN"].astype(str).str[1:]
df.set_index("ZIP", inplace=True)
master_df["Gini"] = df["AO72E001"] * 100
master_df

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc,Gini
00501,,5.0,49.0,5.0,,,,,,,,,,,
00601,,,,,99.785408,99.779608,0.005800,99.669412,0.000000,0.000000,0.214592,17242.0,54.187449,7587.0,44.90
00602,,,,,99.432726,99.406094,0.026633,99.280920,0.026633,0.007990,0.567274,37548.0,0.604560,10699.0,49.26
00603,,,,,98.827403,98.666774,0.160630,98.377640,0.098386,0.010039,1.172597,49804.0,0.252992,12280.0,57.02
00606,,,,,99.480934,99.460970,0.019964,99.301258,0.039928,0.000000,0.519066,5009.0,100.000000,8574.0,43.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99927,,,,,14.285714,14.285714,0.000000,4.081633,0.000000,0.000000,85.714286,49.0,100.000000,,
99928,1.0,4.0,15.0,5.0,,,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.000000,3.607504,1.202501,17.364117,63.540164,2079.0,100.000000,31080.0,41.46
99950,3.0,12.0,43.0,15.0,,,,,,,,,,,


In [16]:
df = pd.read_csv("nhgis0006_ds244_20195_zcta.csv")
df["ZIP"] = df["GISJOIN"].astype(str).str[1:]
df.set_index("ZIP", inplace=True)
master_df["UR"] = df["ALY3E005"] / df["ALY3E002"] * 100
master_df

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc,Gini,UR
00501,,5.0,49.0,5.0,,,,,,,,,,,,
00601,,,,,99.785408,99.779608,0.005800,99.669412,0.000000,0.000000,0.214592,17242.0,54.187449,7587.0,44.90,34.986667
00602,,,,,99.432726,99.406094,0.026633,99.280920,0.026633,0.007990,0.567274,37548.0,0.604560,10699.0,49.26,11.262690
00603,,,,,98.827403,98.666774,0.160630,98.377640,0.098386,0.010039,1.172597,49804.0,0.252992,12280.0,57.02,20.851813
00606,,,,,99.480934,99.460970,0.019964,99.301258,0.039928,0.000000,0.519066,5009.0,100.000000,8574.0,43.73,12.011372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99927,,,,,14.285714,14.285714,0.000000,4.081633,0.000000,0.000000,85.714286,49.0,100.000000,,,
99928,1.0,4.0,15.0,5.0,,,,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.000000,3.607504,1.202501,17.364117,63.540164,2079.0,100.000000,31080.0,41.46,6.626506
99950,3.0,12.0,43.0,15.0,,,,,,,,,,,,


In [17]:
df = pd.read_csv("nhgis0006_ds254_20215_zcta.csv")
df["ZIP"] = df["GISJOIN"].astype(str).str[1:]
df.set_index("ZIP", inplace=True)
master_df["%BachOrMore"] = 100 * (df["AOP8E022"] + df["AOP8E023"] + df["AOP8E024"]+ df["AOP8E025"]) / df["AOP8E001"]
master_df

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc,Gini,UR,%BachOrMore
00501,,5.0,49.0,5.0,,,,,,,,,,,,,
00601,,,,,99.785408,99.779608,0.005800,99.669412,0.000000,0.000000,0.214592,17242.0,54.187449,7587.0,44.90,34.986667,14.562560
00602,,,,,99.432726,99.406094,0.026633,99.280920,0.026633,0.007990,0.567274,37548.0,0.604560,10699.0,49.26,11.262690,21.817471
00603,,,,,98.827403,98.666774,0.160630,98.377640,0.098386,0.010039,1.172597,49804.0,0.252992,12280.0,57.02,20.851813,24.267387
00606,,,,,99.480934,99.460970,0.019964,99.301258,0.039928,0.000000,0.519066,5009.0,100.000000,8574.0,43.73,12.011372,10.361217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99927,,,,,14.285714,14.285714,0.000000,4.081633,0.000000,0.000000,85.714286,49.0,100.000000,,,,0.000000
99928,1.0,4.0,15.0,5.0,,,,,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.000000,3.607504,1.202501,17.364117,63.540164,2079.0,100.000000,31080.0,41.46,6.626506,15.172855
99950,3.0,12.0,43.0,15.0,,,,,,,,,,,,,


In [18]:
master_df.drop("99999", inplace=True)
master_df.tail()

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc,Gini,UR,%BachOrMore
99926,1.0,11.0,51.0,12.0,90.332907,90.076825,0.256082,2.944942,0.0,74.327785,9.667093,1562.0,100.0,24979.0,38.18,17.690058,9.295199
99927,,,,,14.285714,14.285714,0.0,4.081633,0.0,0.0,85.714286,49.0,100.0,,,,0.0
99928,1.0,4.0,15.0,5.0,,,,,,,,,,,,,
99929,2.0,93.0,474.0,95.0,36.459836,36.459836,0.0,3.607504,1.202501,17.364117,63.540164,2079.0,100.0,31080.0,41.46,6.626506,15.172855
99950,3.0,12.0,43.0,15.0,,,,,,,,,,,,,


In [19]:
master_df.describe()

Unnamed: 0,Num Farms,Num Estabs,CBP Emp,Total Emp Bus,Min Share,Min Share Excl B,Black Share,Hisp Share,Asian Share,Native Share,White Share,Total Pop,Rural,PC Inc,Gini,UR,%BachOrMore
count,32109.0,35052.0,35052.0,37322.0,33631.0,33631.0,33631.0,33631.0,33631.0,33631.0,33631.0,33774.0,33631.0,32768.0,32405.0,32538.0,33131.0
mean,61.825968,227.137111,3638.92514,266.512513,26.309044,18.998565,7.310479,10.415032,2.316485,1.77479,73.690956,9910.777107,64.4572,34018.67627,41.173309,5.457529,25.701774
std,88.443966,406.851597,8085.098199,414.475046,24.866075,20.157029,14.854133,16.399289,5.762396,9.231636,24.866075,14918.152573,44.097957,16924.633394,8.190941,6.166137,17.743436
min,1.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,289.0,0.02,0.0,0.0
25%,9.0,11.0,80.0,24.0,7.695221,6.483358,0.264276,1.863354,0.150038,0.082109,62.592946,650.0,8.526885,24837.0,37.35,2.374202,13.765205
50%,31.0,41.0,423.0,90.0,16.28866,10.917816,1.001669,4.099142,0.508504,0.220264,83.71134,2649.5,100.0,30755.0,41.57,4.277693,21.38093
75%,78.0,255.0,3367.0,326.0,37.407054,23.085094,6.340451,10.763758,1.719629,0.519481,92.304779,13359.0,100.0,38831.75,45.77,6.736421,33.676333
max,1634.0,6893.0,177226.0,6897.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,135256.0,100.0,701688.0,82.76,100.0,100.0


# County Level Data

In [20]:
county_df = pd.read_csv("ZIP-COUNTY-FIPS_2017-06.csv")
county_df["ZIP"] = county_df["ZIP"].astype(str).str.zfill(5)
county_df["STCOUNTYFP"] = county_df["STCOUNTYFP"].astype(str).str.zfill(5)

In [21]:
county_df_grouped = county_df.groupby('ZIP')['STCOUNTYFP'].agg(list).reset_index()
county_df_grouped.set_index('ZIP', inplace=True)

In [22]:
master_df = pd.concat([master_df, county_df_grouped], axis=1)

In [23]:
emp_df = pd.read_csv("Employer Race by County.csv", header = 1)
emp_df = emp_df[["Geographic identifier code", 
                 "Geographic Area Name", 
                 "Sex code", 
                 "Meaning of Sex code", 
                 "Ethnicity code",
                 "Meaning of Ethnicity code", 
                 "Race code",
                 "Meaning of Race code",
                 "Veteran code",
                 "Meaning of Veteran code",
                 "Year",
                 "Number of employer firms"]]
emp_df["FIPS"] = emp_df["Geographic identifier code"].str.slice(-5)

In oder to filter rows to just those needed, we filter the values of the other characteristic columns to those only identified as totals.

In [24]:
emp_df_filtered = emp_df[emp_df["Sex code"] == 1]
emp_df_filtered = emp_df_filtered[emp_df_filtered["Ethnicity code"] == 1]
emp_df_filtered = emp_df_filtered[emp_df_filtered["Veteran code"] == 1]
emp_df_filtered["Number of employer firms"].replace("S", np.nan, inplace=True)

In [25]:
index = sorted(list(set(emp_df_filtered["FIPS"])))
columns = list(set(emp_df_filtered["Meaning of Race code"]))
owner_df = pd.DataFrame(index = index, columns = columns).fillna(0)

In [26]:
for column in owner_df.columns:
    sub_df = emp_df_filtered[emp_df_filtered["Meaning of Race code"] == column]
    sub_df.set_index("FIPS", inplace=True)
    owner_df[column] = sub_df["Number of employer firms"].astype(np.double)

Becaude Hispanic is accounted for in another column, we calculate their share of employer businesses seperately.

In [27]:
emp_df_2 = emp_df[emp_df["Sex code"] == 1]
emp_df_2 = emp_df_2[emp_df_2["Veteran code"] == 1]
emp_df_2 = emp_df_2[emp_df_2["Race code"] == 0]
emp_df_2["Number of employer firms"].replace("S", np.nan, inplace=True)

In [28]:
owner_df[["Hispanic", "Non-Hispanic"]] = 0

In [29]:
for category in ["Hispanic", "Non-Hispanic"]:
    sub_df = emp_df_2[emp_df_2["Meaning of Ethnicity code"] == category]
    sub_df.set_index("FIPS", inplace=True)
    owner_df[category] = sub_df["Number of employer firms"].astype(np.double)

In [30]:
owner_df["Emp Min Share"] = owner_df["Minority"] / owner_df["Total"]
owner_df["Emp Black Share"] = owner_df["Black or African American"] / owner_df["Total"]
owner_df["Emp White Share"] = owner_df["White"] / owner_df["Total"]
owner_df["Emp Asian Share"] = owner_df["Asian"] / owner_df["Total"]
owner_df["Emp Hisp Share"] = owner_df["Hispanic"] / owner_df["Total"]
owner_df["Emp Non-Hisp Share"] = owner_df["Non-Hispanic"] / owner_df["Total"]
owner_df.describe()

Unnamed: 0,American Indian and Alaska Native,Total,Asian,White,Equally minority/nonminority,Black or African American,Native Hawaiian and Other Pacific Islander,Minority,Nonminority,Hispanic,Non-Hispanic,Emp Min Share,Emp Black Share,Emp White Share,Emp Asian Share,Emp Hisp Share,Emp Non-Hisp Share
count,173.0,2972.0,666.0,2667.0,313.0,370.0,37.0,1041.0,2652.0,477.0,2686.0,1031.0,367.0,2639.0,664.0,475.0,2657.0
mean,75.300578,2164.483513,785.462462,1855.20135,220.72524,285.440541,95.324324,939.705091,1712.736425,609.81761,1969.518243,0.115679,0.029252,0.798482,0.066516,0.05619,0.823044
std,107.058188,7324.429619,2782.302697,5714.036688,445.289248,511.731815,161.23121,3604.659095,4952.981741,2099.655255,6396.526661,0.087411,0.031003,0.071444,0.052491,0.073591,0.068547
min,20.0,20.0,21.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,0.013952,0.002319,0.357963,0.008291,0.003944,0.183206
25%,31.0,221.0,69.25,210.0,42.0,40.25,31.0,63.0,202.0,56.0,208.0,0.058591,0.011579,0.761289,0.035497,0.018301,0.798561
50%,44.0,520.5,168.5,487.0,94.0,106.0,46.0,158.0,475.0,145.0,496.5,0.092402,0.020918,0.807671,0.053566,0.033228,0.834123
75%,79.0,1387.5,507.75,1301.5,219.0,289.75,82.0,510.0,1268.25,390.0,1344.75,0.1413,0.033303,0.845813,0.07621,0.065245,0.862944
max,961.0,228558.0,55036.0,156111.0,5282.0,4408.0,884.0,81550.0,130465.0,33891.0,192444.0,0.679245,0.30463,1.0,0.534213,0.602947,1.0


In [31]:
missing_values_count = owner_df.isnull().sum()
missing_df = pd.DataFrame(missing_values_count)
missing_df["% missing"] = missing_df[0] / 3140
missing_df["# available"] = 3140 - missing_df[0]
missing_df

Unnamed: 0,0,% missing,# available
American Indian and Alaska Native,2967,0.944904,173
Total,168,0.053503,2972
Asian,2474,0.787898,666
White,473,0.150637,2667
Equally minority/nonminority,2827,0.900318,313
Black or African American,2770,0.882166,370
Native Hawaiian and Other Pacific Islander,3103,0.988217,37
Minority,2099,0.668471,1041
Nonminority,488,0.155414,2652
Hispanic,2663,0.848089,477


In [32]:
def calc_weighted_avg(fips_list, col_name):
    if not isinstance(fips_list, list):
        return None

    total_sum = 0
    weighted_sum = 0

    for fips in fips_list:
        if fips in owner_df.index:
            weighted_sum += owner_df.loc[fips, col_name] * owner_df.loc[fips, 'Total']
            total_sum += owner_df.loc[fips, 'Total']

    return weighted_sum / total_sum if total_sum != 0 else None

In [33]:
demographic_cols = ['Emp Min Share', 
                    'Emp Black Share',
                    'Emp White Share',
                    'Emp Asian Share',
                    'Emp Hisp Share',
                    'Emp Non-Hisp Share']

for col in demographic_cols:
    new_col_name = 'W_Avg ' + col
    master_df[new_col_name] = master_df['STCOUNTYFP'].apply(calc_weighted_avg, col_name=col)

In [34]:
master_df.columns

Index(['Num Farms', 'Num Estabs', 'CBP Emp', 'Total Emp Bus', 'Min Share',
       'Min Share Excl B', 'Black Share', 'Hisp Share', 'Asian Share',
       'Native Share', 'White Share', 'Total Pop', 'Rural', 'PC Inc', 'Gini',
       'UR', '%BachOrMore', 'STCOUNTYFP', 'W_Avg Emp Min Share',
       'W_Avg Emp Black Share', 'W_Avg Emp White Share',
       'W_Avg Emp Asian Share', 'W_Avg Emp Hisp Share',
       'W_Avg Emp Non-Hisp Share'],
      dtype='object')

In [35]:
master_df.to_csv("ZIPdf.csv")

In [36]:
def weighted_average(df, values_col, weights_col):
    # Element-wise multiplication, ignoring NaNs
    weighted_values = df[values_col] * df[weights_col]
    
    # Sum of weighted values and weights, ignoring NaNs
    sum_weighted_values = weighted_values.sum(skipna=True)
    sum_weights = df[weights_col].sum(skipna=True)
    
    # Calculate weighted average, returning NaN if not computable
    if sum_weights == 0:
        return np.nan
    else:
        return sum_weighted_values / sum_weights

weighted_average(master_df, "Min Share", "Total Pop")

42.723896750764816

<p><b>Release Date:</b> 2020-05-19</p>

<p>The Census Bureau has reviewed this data product for unauthorized disclosure of confidential information and has approved the disclosure avoidance practices applied (Approval ID: CBDRB-FY20-008).</p>

<p><b>Release Schedule:</b><br />
Data in this file come from estimates of business ownership by sex, ethnicity, race, and veteran status from the 2018 Annual Business Survey (ABS) collection. Data are also obtained from administrative records, the 2017 Economic Census and other economic surveys.</p>

<p><i>Note:</i> The collection year is the year in which the data are collected. A reference year is the year that is referenced in the questions on the survey and in which the statistics are tabulated. For example, the 2018 ABS collection year produces statistics for the 2017 reference year. The "Year" column in the table is the reference year. The ABS has a larger sample size during the benchmark year of 2017. Due to the larger size, more detailed data are shown for reference year 2017.</p>

<p>For more information about ABS planned data product releases, see <a href="https://www.census.gov/programs-surveys/abs/newsroom/updates/tentative-schedule.html">Tentative ABS Schedule</a>.</p>

<p><b>Key Table Information:</b><br />
Includes U.S. firms with paid employees, operating during the reference year with receipts of $1,000 or more, which are classified in the North American Industry Classification System (NAICS), Sectors 11 through 99, except for NAICS 111, 112, 482, 491, 521, 525, 813, 814, and 92 which are not covered. Employer firms with more than one domestic establishment are counted in each geographic area and industry in which they operate, but only once in the U.S. and state totals for all sectors. Employment reflects the number of paid employees during the pay period in the reference year that included March 12.</p>

<p><b>Data Items and Other Identifying Records:</b><br />
Data include estimates on:
<ul><li>Number of employer firms (firms with paid employees)</li>
    <li>Sales and receipts of employer firms (reported in $1,000s of dollars)</li>
    <li>Number of employees (during the March 12 pay period)</li>
    <li>Annual payroll (reported in $1,000s of dollars)</li>
</ul></p>

<p>These data are aggregated by the following demographic classifications of firm for:
<ul><li>All firms
    <ul><li>Classifiable (firms classifiable by sex, ethnicity, race, and veteran status)
    <ul>
	<li>Sex
        <ul><li>Female</li>
            <li>Male</li>
            <li>Equally male/female</li>
        </ul></li>
    <li>Ethnicity
	    <ul><li>Hispanic</li>
            <li>Equally Hispanic/non-Hispanic</li>
            <li>Non-Hispanic</li>
        </ul></li>
    <li>Race
	    <ul><li>White</li>
            <li>Black or African American</li>
            <li>American Indian and Alaska Native</li>
            <li>Asian</li>
            <li>Native Hawaiian and Other Pacific Islander</li>
            <li>Minority (Firms classified as any race and ethnicity combination other than non-Hispanic and White)</li>
            <li>Equally minority/nonminority</li>
            <li>Nonminority (Firms classified as non-Hispanic and White)</li>
        </ul></li>
    <li>Veteran Status (defined as having served in any branch of the U.S. Armed Forces)
	    <ul><li>Veteran</li>
            <li>Equally veteran/nonveteran</li>
            <li>Nonveteran</li>
        </ul></li>
    </ul>
	</li>
        <li>Unclassifiable (firms not classifiable by sex, ethnicity, race, and veteran status)</li>
    </ul></li>
</ul></p>

<p>Moreover, the 2017 reference year statistics include detailed race and ethnicity data tabulated for:
<ul><li>Hispanic subgroups
    <ul><li>Mexican, Mexican American, Chicano</li>
        <li>Puerto Rican</li>
        <li>Cuban</li>
        <li>Other Hispanic, Latino, or Spanish</li>
    </ul></li>
    <li>Asian subgroups
    <ul><li>Asian Indian</li>
        <li>Chinese</li>
        <li>Filipino</li>
        <li>Japanese</li>
        <li>Korean</li>
        <li>Vietnamese</li>
        <li>Other Asian</li>
    </ul></li>
    <li>Native Hawaiian and Other Pacific Islander subgroups
	<ul><li>Native Hawaiian</li>
	<li>Guamanian or Chamorro</li>
        <li>Samoan</li>
        <li>Other Pacific Islander</li>
    </ul></li>
</ul></p>

<p><i>Data Notes:</i>
<ol>
    <li>Business ownership is defined as having 51 percent or more of the stock or equity in the business. Data are provided for businesses owned equally (50% / 50%) by men and women, by Hispanics and non-Hispanics, by minorities and nonminorities, and by veterans and nonveterans. Firms not classifiable by sex, ethnicity, race, and veteran status are counted and tabulated separately.</li>
    <li>The detail may not add to the total or subgroup total because a Hispanic or Latino firm may be of any race, and because a firm could be tabulated in more than one racial group. For example, if a firm responded as both Chinese and Black majority owned, the firm would be included in the detailed Asian and Black estimates but would only be counted once toward the higher level all firms' estimates.</li>
    <li>References such as "Mexican-owned," "Puerto Rican-owned," "Cuban-owned" or "other Hispanic- or Latino-owned" businesses refer only to businesses operating in the 50 states and the District of Columbia that self-identified 51 percent or more of their ownership in 2017 to be by individuals of Mexican, Puerto Rican, Cuban or other Hispanic or Latino origin. The ABS does not distinguish between U.S. residents and nonresidents. Companies owned by foreign governments or owned by other companies, foreign or domestic, are included in the category "Unclassifiable."</li>
</ol></p>

<p><b>Industry and Geography Coverage:</b><br />

The data are shown for the total for all sectors (00) and the 2-, 3-, 4-, 5-, and 6-digit NAICS code levels for:

<ul><li>United States</li>
    <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_38">States and the District of Columbia</a></li>
</ul></p>

<p>In addition, the total of all sectors (00) and the 2-digit NAICS code levels are shown for:

<ul><li>Metro Areas, including:
    <ul>
        <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_6">Metropolitan Statistical Areas</a></li>
        <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_7">Micropolitan Statistical Areas</a></li>
        <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_8">Metropolitan Divisions</a></li>
        <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_9">Combined Statistical Areas</a></li>
    </ul></li>
    <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage">Counties</a></li>
    <li><a href="https://www.census.gov/programs-surveys/economic-census/geographies/levels/2017-levels.html#par_textimage_19">Economic Places</a></li>
</ul></p>

<p>For more information about NAICS, see <a href="https://www.census.gov/programs-surveys/economic-census/guidance/understanding-naics.html">NAICS Codes & Understanding Industry Classification Systems</a>. For information about geographies used by economic programs at the Census Bureau, see <a href="https://www.census.gov/programs-surveys/economic-census/geographies.html">Economic Census: Economic Geographies.</a></p>

<p><b>Footnotes:</b><br />
Footnote 660 - Agriculture, forestry, fishing and hunting (Sector 11): Crop and Animal Production (NAICS 111 and 112) are out of scope.<br />
Footnote 661 - Transportation and warehousing (Sector 48-49): Rail Transportation (NAICS 482) and the Postal Service (NAICS 491) are out of scope.<br />
Footnote 662 - Finance and insurance (Sector 52): Monetary Authorities-Central Banks (NAICS 521) and Funds, Trusts, and Other Financial Vehicles (NAICS 525) are out of scope.<br />
Footnote 663 - Other services, except public administration (Sector 81): Religious, Grantmaking, Civic, Professional, and Similar Organizations (NAICS 813) and Private Households (NAICS 814) are out of scope.</p>

<p><b>FTP Download:</b><br />
Download the entire table at: <a href="https://www2.census.gov/programs-surveys/abs/data/2017/AB1700CSA01.zip">https://www2.census.gov/programs-surveys/abs/data/2017/AB1700CSA01.zip</a>.</p>

<p><b>API Information:</b><br />
Annual Business Survey data are housed in the Census Bureau API. For more information, see <a href="https://api.census.gov/data/2017/abscs.html ">https://api.census.gov/data/2017/abscs.html</a>.</p>

<p><b>Methodology:</b><br />
To maintain confidentiality, the Census Bureau suppresses data to protect the identity of any business or individual. The census results in this file contain sampling and/or nonsampling error. Data users who create their own estimates using data from this file should cite the Census Bureau as the source of the original data only. For information on confidentiality protection, sampling error, nonsampling error, and definitions, see <a href="https://www.census.gov/programs-surveys/abs/technical-documentation/methodology.html">Survey Methodology</a>.</p>

<p><b>Symbols:</b><br />
 <b>S</b> - Estimate does not meet publication standards because of high sampling variability, poor response quality, or other concerns about the estimate quality. Unpublished estimates derived from this table by subtraction are subject to these same limitations and should not be attributed to the U.S. Census Bureau. For a description of publication standards and the total quantity response rate, see link to program methodology <a href="https://www.census.gov/programs-surveys/abs/technical-documentation/methodology.html">page</a>.<br />
 <b>N</b> - Not available or not comparable<br />
 <b>X</b> - Not applicable<br />
For a complete list of all economic programs symbols, see the <a href="https://www.census.gov/programs-surveys/economic-census/technical-documentation/data-dictionary.html">Symbols Glossary</a>.</p>

<p><b>Source:</b><br />
U.S. Census Bureau, Annual Business Survey (ABS)<br />
For more information about the survey, please visit <a href="https://www.census.gov/programs-surveys/abs.html">https://www.census.gov/programs-surveys/abs.html</a>.</p>

<p><b>Contact Information:</b><br />
To contact the Annual Business Survey staff:
<ul><li>Email general, nonsecure, and unencrypted messages to <a href="mailto:adep.annual.business.survey@census.gov">adep.annual.business.survey@census.gov</a>.</li>
    <li>Call 301.763.3316 between 7 a.m. and 5 p.m. (EST), Monday through Friday.</li>
</ul>
</p>
