# Applying County Information to Pantry and Client Data Sets

In [1]:
import pandas as pd
import re
import json
import geopandas as gpd

In [2]:
masterAgency = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\agencyDfFinal.csv")
counties = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Raw Data\NCDOT_City_Boundaries.csv")

## Create Agenct Pivot Table

In [3]:
masterAgency = masterAgency.rename(columns={'Unnamed: 0':'ID'})

In [4]:
city = r',\s([A-Za-z]+),'
masterAgency['City'] = masterAgency['Address Master'].str.extract(city, expand=True)

In [5]:
masterAgency['City']

0           Raleigh
1           Raleigh
2         Henderson
3               NaN
4      Hillsborough
           ...     
883             NaN
884      Whiteville
885          Currie
886           Selma
887      Greenville
Name: City, Length: 888, dtype: object

In [6]:
masterAgency = masterAgency.set_index('City')

In [7]:
counties = counties[['MunicipalBoundaryName', 'CountyName1']]


In [8]:
counties = counties.set_index('MunicipalBoundaryName')

In [9]:
master = masterAgency.join(counties)

In [10]:
master.info()

<class 'pandas.core.frame.DataFrame'>
Index: 888 entries, Aberdeen to nan
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                886 non-null    object 
 1   Site Name Master  888 non-null    object 
 2   Lat               888 non-null    float64
 3   Lon               888 non-null    float64
 4   Address Master    436 non-null    object 
 5   CSFP              217 non-null    object 
 6   TEFAP             237 non-null    object 
 7   Score             888 non-null    float64
 8   CountyName1       337 non-null    object 
dtypes: float64(3), object(6)
memory usage: 69.4+ KB


In [11]:
#split off the null addresses to geocode and get city info
masterNone = master[master['Address Master'].isnull()]

In [12]:
masterNone.info()

<class 'pandas.core.frame.DataFrame'>
Index: 452 entries, nan to nan
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                452 non-null    object 
 1   Site Name Master  452 non-null    object 
 2   Lat               452 non-null    float64
 3   Lon               452 non-null    float64
 4   Address Master    0 non-null      object 
 5   CSFP              0 non-null      object 
 6   TEFAP             121 non-null    object 
 7   Score             452 non-null    float64
 8   CountyName1       0 non-null      object 
dtypes: float64(3), object(6)
memory usage: 35.3+ KB


In [13]:
masterNone.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\masterNone.csv")

**Inbetween these two steps I ran my pycharm reverse geocoder to get addresses and then read the file back in**

In [14]:
#this is the geocoded lat lons that had null addresses from above
countyFix = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\masterNoneFix.csv")

In [15]:
cityTwo = r',\s([A-Za-z]+?\s?[A-Za-z]+),'
countyFix['City'] = countyFix['Address'].str.extract(cityTwo, expand=True)

In [16]:
countyFix = countyFix.set_index('City')
countyFix = countyFix.join(counties, rsuffix='N')

In [17]:
countyFix = countyFix.drop(
    ['Unnamed: 0', 'Unnamed: 0.1', 'CountyName1'], axis=1).rename(
    columns={'CountyName1N':'CountyName1'})

In [18]:
countyFix['Address Master'] = countyFix['Address']

In [19]:
countyFix.info()

<class 'pandas.core.frame.DataFrame'>
Index: 452 entries, Aberdeen to nan
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                452 non-null    object 
 1   Site Name Master  452 non-null    object 
 2   Lat               452 non-null    float64
 3   Lon               452 non-null    float64
 4   Address Master    452 non-null    object 
 5   CSFP              0 non-null      float64
 6   TEFAP             121 non-null    object 
 7   Score             452 non-null    float64
 8   Address           452 non-null    object 
 9   CountyName1       422 non-null    object 
dtypes: float64(4), object(6)
memory usage: 38.8+ KB


In [20]:
master = master.dropna(subset=['Address Master'])

In [21]:
master.info()

<class 'pandas.core.frame.DataFrame'>
Index: 436 entries, Aberdeen to nan
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                434 non-null    object 
 1   Site Name Master  436 non-null    object 
 2   Lat               436 non-null    float64
 3   Lon               436 non-null    float64
 4   Address Master    436 non-null    object 
 5   CSFP              217 non-null    object 
 6   TEFAP             116 non-null    object 
 7   Score             436 non-null    float64
 8   CountyName1       337 non-null    object 
dtypes: float64(3), object(6)
memory usage: 34.1+ KB


In [22]:
conc = [master, countyFix]
master = pd.concat(conc)

In [23]:
master = master.drop('Address', axis=1).drop_duplicates(subset='Address Master')
master

Unnamed: 0,ID,Site Name Master,Lat,Lon,Address Master,CSFP,TEFAP,Score,CountyName1
Aberdeen,S1000,SFSP - Aberdeen Recreation,35.135607,-79.428411,"301 Lk Pk Xing, Aberdeen, NC 28315, USA",,,1.0,MOORE
Aberdeen,S1010,CKC - BGC Sandhills Aberdeen,35.139833,-79.425729,"503 N Sandhills Blvd, Aberdeen, NC 28315, USA",,,1.0,MOORE
Angier,R1012,Angier Area Food Pantry,35.508211,-78.739661,"455 W Depot St, Angier, NC 27501, USA",,TEFAP,5.0,HARNETT
Apex,R1418,Western Wake Crisis Ministry,35.730734,-78.851024,"1600 Olive Chapel Rd, Apex, NC 27502, USA",CSFP,,7.0,WAKE
Apex,R1419,White Oak Foundation,35.779971,-78.918960,"1621 White Oak Church Rd, Apex, NC 27523, USA",CSFP,TEFAP,7.0,WAKE
...,...,...,...,...,...,...,...,...,...
Youngsville,R1270,Oak Level Christian Church,36.039261,-78.448398,"2279 Cedar Creek Rd, Youngsville, NC 27596, USA",,,1.0,FRANKLIN
Zebulon,R1011,An Elegant Affair,35.821272,-78.312597,"100 W Vance St, Zebulon, NC 27597, USA",,,1.0,WAKE
,D1172,DD-Henderson Towers Resident,35.997846,-36.086900,8975XWX7+46,,,1.0,
,R1156,Fuquay-Varina Emergency,35.584398,-78.802087,"225 W Academy St, Fuquay-Varina, NC 27526, USA",,TEFAP,1.0,


In [24]:
master.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\masterAgencyV1.csv")

## Create table for Acency Data by County

In [25]:
master['CSFP'] = master['CSFP'].fillna('')
master['TEFAP'] = master['TEFAP'].fillna('')
master['TEFAP'] = master['TEFAP'].str.replace(' TEMP', '')
master = master.set_index('ID')
master['Services'] = master['CSFP'] + ' ' + master['TEFAP']
master['Services'] = master['Services'].str.strip()
andpat = r'\b[\s]\b'
master['Services'] = master['Services'].str.replace(andpat, '&', regex=True)
master

Unnamed: 0_level_0,Site Name Master,Lat,Lon,Address Master,CSFP,TEFAP,Score,CountyName1,Services
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
S1000,SFSP - Aberdeen Recreation,35.135607,-79.428411,"301 Lk Pk Xing, Aberdeen, NC 28315, USA",,,1.0,MOORE,
S1010,CKC - BGC Sandhills Aberdeen,35.139833,-79.425729,"503 N Sandhills Blvd, Aberdeen, NC 28315, USA",,,1.0,MOORE,
R1012,Angier Area Food Pantry,35.508211,-78.739661,"455 W Depot St, Angier, NC 27501, USA",,TEFAP,5.0,HARNETT,TEFAP
R1418,Western Wake Crisis Ministry,35.730734,-78.851024,"1600 Olive Chapel Rd, Apex, NC 27502, USA",CSFP,,7.0,WAKE,CSFP
R1419,White Oak Foundation,35.779971,-78.918960,"1621 White Oak Church Rd, Apex, NC 27523, USA",CSFP,TEFAP,7.0,WAKE,CSFP&TEFAP
...,...,...,...,...,...,...,...,...,...
R1270,Oak Level Christian Church,36.039261,-78.448398,"2279 Cedar Creek Rd, Youngsville, NC 27596, USA",,,1.0,FRANKLIN,
R1011,An Elegant Affair,35.821272,-78.312597,"100 W Vance St, Zebulon, NC 27597, USA",,,1.0,WAKE,
D1172,DD-Henderson Towers Resident,35.997846,-36.086900,8975XWX7+46,,,1.0,,
R1156,Fuquay-Varina Emergency,35.584398,-78.802087,"225 W Academy St, Fuquay-Varina, NC 27526, USA",,TEFAP,1.0,,TEFAP


In [26]:
master = master.rename(columns={'CountyName1': 'County'})
masterPivot = pd.pivot_table(master, index='County', columns='Services', aggfunc='size')

In [27]:
masterPivot = masterPivot.drop(['71', 'HOKE'])


In [28]:
masterPivot = masterPivot.rename(columns={'': 'No Services'}).fillna(0.0)

In [29]:
masterPivot[['No Services', 'CSFP', 'TEFAP', 'CSFP&TEFAP']] = masterPivot[['No Services', 'CSFP', 'TEFAP', 'CSFP&TEFAP']].astype(int)

In [30]:
masterPivot['Total Agencies'] = masterPivot.sum(axis=1).astype(int)

In [31]:
masterPivot

Services,No Services,CSFP,CSFP&TEFAP,TEFAP,Total Agencies
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALAMANCE,1,1,0,0,2
BEAUFORT,1,0,0,0,1
BRUNSWICK,12,1,1,3,17
CARTERET,2,0,0,3,5
CHATHAM,2,1,1,2,6
COLUMBUS,4,0,1,1,6
CRAVEN,7,1,0,4,12
DUPLIN,5,0,0,2,7
DURHAM,55,12,1,11,79
EDGECOMBE,2,1,1,4,8


In [32]:
masterPivot.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\agencyPivotForDash.csv")

## Clients

In [33]:
clients1 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress1.csv")
clients2 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress2.csv")
clients3 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress3.csv")
clients4 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress4.csv")
clients5 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress5.csv")
clients6 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress6.csv")
clients7 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress7.csv")
clients8 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress8.csv")
clients9 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress9.csv")
clients10 = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\clientAddress10.csv")

In [34]:
clientConcat = [clients1, clients2, clients3, clients4, clients5, clients6, clients7, clients8, clients9, clients10]
clients = pd.concat(clientConcat)

In [35]:
program = pd.read_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Processed Data\dashDemographicsUnclean.csv")

In [36]:
#there are many IDs that have duplicates so we will delete the duplicates and keep only the most recent
program.drop_duplicates(subset='Client ID', keep='last', inplace=True)

#now filter out all members who are not 'active' in the `client status` column
program = program[
    ((program['Client CSFP Status'] == 'active') 
    | (program['Client CSFP Status'] == 'closed') 
    | (program['Client CSFP Status'] == 'waiting-list') 
    | (program['Client CSFP Status'] == 'suspended'))]

In [37]:
program = program[['Client ID', 'Program Name']]

In [38]:
program = program.set_index('Client ID')

In [39]:
clients.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8076 entries, 0 to 84
Data columns (total 22 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Unnamed: 0.1                     8076 non-null   int64  
 1   Unnamed: 0                       8076 non-null   int64  
 2   Visit Date                       8076 non-null   object 
 3   Client ID                        8076 non-null   float64
 4   Client Status                    8076 non-null   object 
 5   Client Age                       8076 non-null   float64
 6   Client Gender                    8076 non-null   object 
 7   Client Ethnicities               8076 non-null   object 
 8   Client Disability                8076 non-null   object 
 9   Client Employment                8076 non-null   object 
 10  Highest Education Level          8076 non-null   object 
 11  Client CSFP Status               8076 non-null   object 
 12  City                  

In [40]:
clients = clients.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1)

In [41]:
cityThree = r',\s([A-Za-z]+?\s?-?[A-Za-z]+?\s?[A-Za-z]+),'
clients['City'] = clients['Address'].str.extract(cityThree, expand=True)

In [42]:
clients = clients.set_index('City')

In [43]:
clients = clients.join(counties)

In [44]:
clients['CountyName1'] = clients['CountyName1'].fillna(clients['County'])

In [45]:
nanCounty = clients[clients.loc[:,'CountyName1'].isna()]

In [46]:
clients = clients.dropna(subset=['CountyName1'])

In [47]:
nullPat = r',\s([A-Za-z]+?\s?-?[A-Za-z]+?\s?[A-Za-z]+),'
nullPat2 = r'(?<=\w\w\w\w\+\w\w\s)([A-Za-z]+?\s?-?[A-Za-z]+?\s?[A-Za-z]+)+'
nanCounty.loc[:,'City'] = nanCounty.loc[:,'Address'].str.extract(nullPat)
nanCounty.loc[:,'City2'] = nanCounty.loc[:,'Address'].str.extract(nullPat2)
nanCounty.loc[:,'City'] = nanCounty.loc[:,'City'].fillna(nanCounty['City2'])
nanCounty = nanCounty.drop('City2', axis=1)

In [48]:
nanCounty = nanCounty.set_index('City')

In [49]:
citcount = {'Ash':'Brunswick', 'Bahama':'Durham', 'Battleboro':'Nash', 'Bear Creek':'Chatham', 
            'Belmont':'Gaston', 'Bennett':'Chatham', 'Bullock':'Granville', 'Castle Hayne':'New Hanover', 'Cedar Grove':'Orange', 'Centerville':'Franklin', 
            'Eagle Springs':'Moore', 'Efland':'Orange', 'Hampstead':'Pender', 
            'Hollister':'Halifax', 'Hubert':'Onslow', 'Jackson Springs':'Moore', 
            'Laurel Hill':'Scotland', 'Longwood':'Brunswick', 'Marston':'Richmond', 
            'Moncure':'Chatham', 'Mt Olive':'Duplin', 'New Hill':'Wake', 
            'Rougemont':'Person', 'Supply':'Brunswick', 'Timberlake':'Person', 
            'West End':'Moore', 'Willard':'Pender', 'Willow Spring':'Wake', 'Winnabow':'Brunswick', 
            'Heathsville':'Halifax', 'Creedmoor':'Granville', 'Culbreth':'Granville', 'Butner':'Granville', 
            'Stem':'Granville', 'Scotland':'Scotland', 'Kinston':'Lenoir', 'Tillery':'Halifax', 'Oxford':'Granville',
            'Laurinburg':'Scotland', 'Wallace':'Pender', 'Pittsboro':'Chatham', 'Halifax':'Halifax', 'Moriah':'Person',
            'Siler':'Chatham', 'Justice':'Franklin', 'Castalia':'Nash', 'Ellerbe':'Richmond', 
            'Littleton':'Halifax', 'Norman':'Richmond', 'Airlie':'New Hanover', 'Sanford':'Lee', 'Enfield':'Halifax', 
            'Bonlee':'Chatham', 'Stovall':'Granville', 'Hobgood':'Halifax', 'Maysville':'Jones',
            'Rocky':'Nash', 'Goldston':'Chatham', 'Garland':'Sampson', 'Wendell':'Wake', 'Lockville':'Chatham', 
            'Rockingham':'Richmond'}

In [50]:
cntydf = pd.DataFrame.from_dict(citcount, orient='index', columns=['County'])

In [51]:
nanCounty = nanCounty.join(cntydf, rsuffix='NN')

In [52]:
nanCounty['CountyName1'] = nanCounty['CountyName1'].fillna(nanCounty['CountyNN'])

In [53]:
nanCounty = nanCounty.drop('County', axis=1)

In [54]:
nanCounty = nanCounty.rename(columns={'CountyName1':'County'}).drop('CountyNN', axis=1)

In [55]:
nanCounty = nanCounty.dropna(subset=['County'])

In [56]:
clients = clients.drop('County', axis=1).rename(columns={'CountyName1':'County'})

In [57]:
countyAdds = [clients, nanCounty]
clients = pd.concat(countyAdds)

In [58]:
clients = clients.dropna(subset=['County'])

In [59]:
clients['County'] = clients['County'].str.upper()

In [60]:
nonCountyList = ['ALAMANCE', 'BEAUFORT', 'BERTIE', 'BLADEN', 'CASWELL', 'CHOWAN', 'DAVIDSON', 'FORSYTH',
                                  'GASTON', 'GUILFORD', 'HOKE', 'HERTFORD', 'MARTIN', 'MONTGOMERY',
                                 'NORTHAMPTON', 'PASQUOTANK', 'RANDOLPH', 'ROBESON', 'WASHINGTON']
clients = clients[~clients['County'].isin(nonCountyList)]

In [61]:
clients.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7919 entries, Aberdeen to Winnabow
Data columns (total 19 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Visit Date                       7919 non-null   object 
 1   Client ID                        7919 non-null   float64
 2   Client Status                    7919 non-null   object 
 3   Client Age                       7919 non-null   float64
 4   Client Gender                    7919 non-null   object 
 5   Client Ethnicities               7919 non-null   object 
 6   Client Disability                7919 non-null   object 
 7   Client Employment                7919 non-null   object 
 8   Highest Education Level          7919 non-null   object 
 9   Client CSFP Status               7919 non-null   object 
 10  Household Primary Income Source  7919 non-null   object 
 11  Total Monthly Gross Income       7919 non-null   float64
 12  Dietary Consid

In [62]:
clients = clients.reset_index().set_index('Client ID')

In [63]:
clients = clients.join(program)

In [64]:
moneyPivot = pd.pivot_table(clients, values='Total Monthly Gross Income', index='County', aggfunc='median')

In [65]:
moneyPivot = moneyPivot.rename(columns={'Total Monthly Gross Income': 'Median Monthly Income'})

In [66]:
agePivot = pd.pivot_table(clients, values='Client Age', index='County', aggfunc='mean')

In [67]:
agePivot = agePivot.rename(columns={'Client Age': 'Average Age'}).astype(int)

In [68]:
genderPivot = pd.pivot_table(clients, index='County', columns='Client Gender', aggfunc='size')

In [69]:
programPivot = pd.pivot_table(clients, index='County', columns='Program Name', aggfunc='size')

In [70]:
programPivot['Total Clients'] = programPivot.sum(axis=1).astype(int)

In [71]:
ethnicityPivot = pd.pivot_table(clients, index='County', columns='Client Ethnicities', aggfunc='size')
ethnicityPivot = ethnicityPivot[['American Indian', 'Asian', 'Black', 'Hispanic Latino', 'White Anglo']].fillna(0.0).astype(int)

In [72]:
finalClientPivot = programPivot.join(moneyPivot)

In [73]:
finalClientPivot = finalClientPivot.join(agePivot)

In [74]:
finalClientPivot = finalClientPivot.join(genderPivot)

In [75]:
finalClientPivot = finalClientPivot.join(ethnicityPivot)

In [76]:
finalClientPivot = finalClientPivot.fillna(0).astype(int).rename(columns={'TEFAP Visit': 'Client CSFP Visit', 'Food Pantry Visit':'Client Pantry Visit', 'CSFP Visit':'Client CSFP Visit', 'female':'Female', 'male':'Male'}).drop('undisclosed', axis=1)

In [77]:
finalClientPivot

Unnamed: 0_level_0,Client CSFP Visit,Client Pantry Visit,TEFAP Pantry Visit,Total Clients,Median Monthly Income,Average Age,Female,Male,American Indian,Asian,Black,Hispanic Latino,White Anglo
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
BRUNSWICK,108,80,29,217,1045,70,129,87,0,1,34,6,159
CHATHAM,192,84,40,316,1079,70,215,100,2,2,171,11,117
COLUMBUS,4,2,0,6,500,69,4,2,0,0,4,0,2
CRAVEN,224,0,0,224,948,71,163,61,1,0,171,0,48
DUPLIN,8,1,1,10,1244,74,10,0,0,0,7,1,1
DURHAM,662,8,15,685,975,71,455,230,4,4,586,8,68
EDGECOMBE,71,0,1,72,902,73,59,13,0,0,72,0,0
FRANKLIN,106,1,2,109,1000,73,73,36,3,0,83,0,22
GRANVILLE,447,1,2,450,987,72,316,130,1,0,349,1,75
GREENE,39,0,116,155,1067,71,116,39,0,1,90,0,60


In [78]:
finalClientPivot.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\clientPivot.csv")

## Join the Agency and Client pivot Tables

In [79]:
thePivot = masterPivot.join(finalClientPivot).rename(
    columns={
        'No Services': 'Standard Pantry', 
        'CSFP':'CSFP Pantry',
        'CSFP&TEFAP':'CSFP&TEFAP Pantry', 
        'TEFAP':'TEFAP Pantry'}
).drop(['ALAMANCE', 'BEAUFORT', 'MONTGOMERY'], axis=0
      ).fillna(0).reset_index()

In [80]:
thePivot.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\thePivot.csv")
thePivot


Unnamed: 0,County,Standard Pantry,CSFP Pantry,CSFP&TEFAP Pantry,TEFAP Pantry,Total Agencies,Client CSFP Visit,Client Pantry Visit,TEFAP Pantry Visit,Total Clients,Median Monthly Income,Average Age,Female,Male,American Indian,Asian,Black,Hispanic Latino,White Anglo
0,BRUNSWICK,12,1,1,3,17,108.0,80.0,29.0,217.0,1045.0,70.0,129.0,87.0,0.0,1.0,34.0,6.0,159.0
1,CARTERET,2,0,0,3,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,CHATHAM,2,1,1,2,6,192.0,84.0,40.0,316.0,1079.0,70.0,215.0,100.0,2.0,2.0,171.0,11.0,117.0
3,COLUMBUS,4,0,1,1,6,4.0,2.0,0.0,6.0,500.0,69.0,4.0,2.0,0.0,0.0,4.0,0.0,2.0
4,CRAVEN,7,1,0,4,12,224.0,0.0,0.0,224.0,948.0,71.0,163.0,61.0,1.0,0.0,171.0,0.0,48.0
5,DUPLIN,5,0,0,2,7,8.0,1.0,1.0,10.0,1244.0,74.0,10.0,0.0,0.0,0.0,7.0,1.0,1.0
6,DURHAM,55,12,1,11,79,662.0,8.0,15.0,685.0,975.0,71.0,455.0,230.0,4.0,4.0,586.0,8.0,68.0
7,EDGECOMBE,2,1,1,4,8,71.0,0.0,1.0,72.0,902.0,73.0,59.0,13.0,0.0,0.0,72.0,0.0,0.0
8,FRANKLIN,4,1,1,2,8,106.0,1.0,2.0,109.0,1000.0,73.0,73.0,36.0,3.0,0.0,83.0,0.0,22.0
9,GRANVILLE,5,1,1,3,10,447.0,1.0,2.0,450.0,987.0,72.0,316.0,130.0,1.0,0.0,349.0,1.0,75.0


In [81]:
countylist = thePivot['County'].to_list()

**Make a geodf

In [82]:
geo = open(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Raw Data\NCDOT_County_Boundaries.geojson")
geojson = json.load(geo)



In [83]:
geodf = gpd.GeoDataFrame.from_features(geojson).rename(columns={'UpperCountyName':'County'})

In [84]:
geodf = geodf[geodf['County'].isin(countylist)]

In [85]:
geodf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 34 entries, 6 to 92
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   geometry       34 non-null     geometry
 1   OBJECTID       34 non-null     int64   
 2   FIPS           34 non-null     int64   
 3   CountyName     34 non-null     object  
 4   County         34 non-null     object  
 5   SapCountyId    34 non-null     object  
 6   DOTDistrictID  34 non-null     int64   
 7   DOTDivisionID  34 non-null     int64   
 8   SAP_CNTY_NBR   34 non-null     int64   
 9   CNTY_NBR       34 non-null     int64   
 10  DSTRCT_NBR     34 non-null     int64   
 11  DIV_NBR        34 non-null     int64   
 12  NAME           34 non-null     object  
 13  SHPNumber      34 non-null     int64   
 14  ShapeSTArea    34 non-null     float64 
 15  ShapeSTLength  34 non-null     float64 
dtypes: float64(2), geometry(1), int64(9), object(4)
memory usage: 4.5+ KB


In [86]:
geodf = geodf.merge(thePivot, on='County')

In [87]:
geodf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 34 entries, 0 to 33
Data columns (total 34 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   geometry               34 non-null     geometry
 1   OBJECTID               34 non-null     int64   
 2   FIPS                   34 non-null     int64   
 3   CountyName             34 non-null     object  
 4   County                 34 non-null     object  
 5   SapCountyId            34 non-null     object  
 6   DOTDistrictID          34 non-null     int64   
 7   DOTDivisionID          34 non-null     int64   
 8   SAP_CNTY_NBR           34 non-null     int64   
 9   CNTY_NBR               34 non-null     int64   
 10  DSTRCT_NBR             34 non-null     int64   
 11  DIV_NBR                34 non-null     int64   
 12  NAME                   34 non-null     object  
 13  SHPNumber              34 non-null     int64   
 14  ShapeSTArea            34 non-null  

In [88]:
geodf.to_csv(r"C:\Users\htwal\Jupyter Projects\6a.food_bank_client_mapping\Final Data\thePivotPlusGeodata.csv")

In [89]:
geodf.columns.to_list()

['geometry',
 'OBJECTID',
 'FIPS',
 'CountyName',
 'County',
 'SapCountyId',
 'DOTDistrictID',
 'DOTDivisionID',
 'SAP_CNTY_NBR',
 'CNTY_NBR',
 'DSTRCT_NBR',
 'DIV_NBR',
 'NAME',
 'SHPNumber',
 'ShapeSTArea',
 'ShapeSTLength',
 'Standard Pantry',
 'CSFP Pantry',
 'CSFP&TEFAP Pantry',
 'TEFAP Pantry',
 'Total Agencies',
 'Client CSFP Visit',
 'Client Pantry Visit',
 'TEFAP Pantry Visit',
 'Total Clients',
 'Median Monthly Income',
 'Average Age',
 'Female',
 'Male',
 'American Indian',
 'Asian',
 'Black',
 'Hispanic Latino',
 'White Anglo']

In [90]:
geodf

Unnamed: 0,geometry,OBJECTID,FIPS,CountyName,County,SapCountyId,DOTDistrictID,DOTDivisionID,SAP_CNTY_NBR,CNTY_NBR,...,Total Clients,Median Monthly Income,Average Age,Female,Male,American Indian,Asian,Black,Hispanic Latino,White Anglo
0,"POLYGON ((-78.90607 35.86810, -78.90611 35.867...",7,183,Wake,WAKE,92,1,5,92,91,...,1045.0,1028.0,72.0,749.0,294.0,4.0,27.0,791.0,21.0,161.0
1,"POLYGON ((-78.25598 35.81813, -78.25583 35.818...",8,69,Franklin,FRANKLIN,35,3,5,35,34,...,109.0,1000.0,73.0,73.0,36.0,3.0,0.0,83.0,0.0,22.0
2,"POLYGON ((-78.01193 34.73198, -78.01213 34.732...",9,141,Pender,PENDER,71,1,3,71,70,...,31.0,849.0,75.0,19.0,12.0,0.0,0.0,30.0,0.0,1.0
3,"POLYGON ((-77.71049 34.29797, -77.71080 34.298...",10,129,New Hanover,NEW HANOVER,65,3,3,65,64,...,149.0,1007.0,74.0,124.0,25.0,0.0,0.0,60.0,3.0,76.0
4,"POLYGON ((-78.80234 36.23580, -78.80268 36.235...",11,63,Durham,DURHAM,32,2,5,32,31,...,685.0,975.0,71.0,455.0,230.0,4.0,4.0,586.0,8.0,68.0
5,"POLYGON ((-78.96899 36.13432, -78.96894 36.134...",12,135,Orange,ORANGE,68,1,7,68,67,...,267.0,1014.0,72.0,198.0,68.0,2.0,10.0,163.0,4.0,78.0
6,"POLYGON ((-79.01630 35.86322, -79.02654 35.862...",13,37,Chatham,CHATHAM,19,1,8,19,18,...,316.0,1079.0,70.0,215.0,100.0,2.0,2.0,171.0,11.0,117.0
7,"POLYGON ((-79.09623 35.19199, -79.09590 35.192...",15,85,Harnett,HARNETT,43,2,6,43,42,...,5.0,1000.0,72.0,3.0,2.0,0.0,0.0,4.0,0.0,1.0
8,"POLYGON ((-79.45876 35.04365, -79.45841 35.044...",16,125,Moore,MOORE,63,2,8,63,62,...,63.0,1125.0,70.0,40.0,23.0,0.0,0.0,36.0,0.0,27.0
9,"POLYGON ((-77.18912 35.41837, -77.18945 35.418...",19,147,Pitt,PITT,74,1,2,74,73,...,625.0,997.0,72.0,467.0,153.0,1.0,0.0,531.0,2.0,61.0
