In [6]:
from arcgis.gis import GIS
import pandas as pd
import requests
import json
import sys
import numpy as np
import io
from datetime import date
np.set_printoptions(threshold=sys.maxsize)

## Get raw data from [map](http://acgov.org/maps/food-services.htm)

Tried to [download data directly](https://developers.arcgis.com/labs/python/download-data/) but doesn't appear to be available. 

Map ID = `4ce2efca2cae4334bb1a2c667ea68374`

ArcGIS REST API [documentation](https://developers.arcgis.com/rest/services-reference/query-feature-service-layer-.htm)

In [3]:
url = "https://services5.arcgis.com/ROBnTHSNjoZ2Wm1P/arcgis/rest/services/COVID19_Services_v2/FeatureServer/0/query"
# Query params copied from network requests
params = {
    "f":"json",
    "where":"1=1",
    "returnGeometry":"true",
    "spatialRel":"esriSpatialRelIntersects",
    "geometry":"{\"xmin\":-13660615.650249345,\"ymin\":4501059.565786781,\"xmax\":-13490925.447456336,\"ymax\":4570770.135582829,\"spatialReference\":{\"wkid\":102100,\"latestWkid\":3857}}",
    "geometryType":"esriGeometryEnvelope",
    "inSR":"102100",
    "outFields":"USER_Area_Served,USER_Provided_by_Agency,USER_Provided_by__add_website_l,USER_Category,USER_Service,USER_Who_is_Eligible_,USER_Site_Location,USER_Hours_of_Operation,USER_Phone__,USER_Address,USER_City,USER_Zip_Code,USER_Special_Indicator,OBJECTID",
    "orderByFields":"OBJECTID ASC",
    "outSR":"102100"
}
res = requests.get(url, params=params)

In [4]:
raw_df = pd.json_normalize(json.loads(res.text)['features'])
raw_df.head(10)

Unnamed: 0,attributes.USER_Area_Served,attributes.USER_Provided_by_Agency,attributes.USER_Provided_by__add_website_l,attributes.USER_Category,attributes.USER_Service,attributes.USER_Who_is_Eligible_,attributes.USER_Site_Location,attributes.USER_Hours_of_Operation,attributes.USER_Phone__,attributes.USER_Address,attributes.USER_City,attributes.USER_Zip_Code,attributes.USER_Special_Indicator,attributes.OBJECTID,geometry.x,geometry.y
0,Berkeley and Albany,Berkeley Food Pantry,https://www.berkeleyfoodpantry.org/,Food Pick-Up Distribution Site,Food pantry,Qualifying Albany and Berkeley residents,Berkeley Food Pantry,"M,W,F, 2-4pm",510-525-2280,1600 Sacramento St.,Berkeley,94702,,1,-13612490.0,4561976.0
1,Albany,City of Albany,https://www.albanyca.org/our-city/covid-19-res...,Services,Portable toilets and hand washing stations,Unsheltered,Albany Community Center,,510-524-9122,1249 Marin Ave.,Albany,94706,,2,-13613590.0,4563608.0
2,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Longfellow Middle School,"M,W,F, 11am-1pm",510-644-6360,1522 Ward St.,Berkeley,94703,,3,-13612000.0,4559344.0
3,Berkeley,City of Berkeley,https://www.cityofberkeley.info/Parks_Rec_Wate...,Services,Shower program,Unsheltered,West Campus Pool,"M-F, 5;30-8:30pm; S & Su, 9am-12pm",510-981-5150,2701 Telegraph Ave.,Berkeley,94704,,4,-13609800.0,4559854.0
4,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Rosa Parks Elementary,"M,W,F, 11am-1pm",510-644-8812,920 Allston Way,Berkeley,94710,,5,-13613790.0,4560466.0
5,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Berkeley High School,"M,W,F, 11am-1pm",510-644-6120,1980 Allston Way,Berkeley,94704,,6,-13611130.0,4560908.0
6,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Martin Luther King Jr. Middle School,"M,W,F, 11am-1pm",510-644-6280,1781 Rose St.,Berkeley,94703,,7,-13611900.0,4562810.0
7,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distributions,Youth age 18 and under,Willard Middle School,"M,W,F, 11am-1pm",510-644-6330,2425 Stuart St.,Berkeley,94705,,8,-13609710.0,4559694.0
8,Berkeley,Berkeley Unified School District,BUSD,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Berkeley Arts Magnet Elementary,"M,W,F, 11am-1pm",510-644-6225,2015 Virginia St.,Berkeley,94709,,9,-13611070.0,4562047.0
9,Dublin,Dublin Unified School District,https://cityservetrivalley.org/,Food Pick-Up Distribution Site,Meal distribution,Youth age 18 and under,Elenor Murray Fallon Middle School,"M-F, 10am-12pm",925-875-9376,3601 Kohnen Way,Dublin,94568,Friday grocery pick-up for any community member,10,-13566170.0,4539323.0


In [7]:
today = date.today()

filename = 'alameda_raw_%s.csv' % (today.strftime("%m_%d_%Y"))
raw_df.to_csv(filename)

## Convert columns
Manual mappings from Alameda columns to mega map database columns based on data dictionary.

### Resource

In [9]:
services_grocery = list(map(lambda x: x.lower(), [
'Delivery of free groceries',
'Food delivery',
'Emergency Food Distribution program', 
'Emergency Food Provider', 
'Emergency food pantry access', 
'Emergency food provider', 
'Food Pantry',
'Food pantry',
'Grab & Go Bags of Groceries',
'Non-perishable food for child care providers caring for essential worker\'s children',
'Produce pick-up',
'Provide transportation for grocery shopping'
]))

services_meal = list(map(lambda x: x.lower(), [
'Delivery of hot and frozen lunches', 
'Food distribution', 
'Home delivered meals', 
'Home-delivered meals', 
'Bagged dinners', 
'Emergency Food Provider', 
'Emergency food pantry access', 
'Emergency food provider', 
'Family meal program', 
'Food distirbution', 
'Food Distribution', 
'Food distribution', 
'Food distribution and daily bagged lunch', 
'Food pick-up distribution site', 
'Grab and Go Meals', 
'Home delivered meals', 
'Lunch meal distribution', 
'Meal distirbution', 
'Meal distribution', 
'Meal distribution', 
'Meal distribution Curbside-To-Go', 
'Meal distributions', 
'Meal pick-up', 
'Meal-to-go', 
'Take home meals', 
'Take-out breakfast', 
'Breakfast to-go, showers, and laundry', 
'Clothing, emergency transportation, and limited food delivery', 
'Sat.: Hot food, hygiene bags, clothing, snack bags, & other necessities Sun.: Hot coffee, breakfast food & pastries, snacks & other necessities', 
'Showers and lunch distribution',
'Showers, meals-to-go, and laundry', 
'Showers, meals-to-go, laundry, and Abode services', 
'To go breakfast, lunch, shower, and laundry services', 
'To go food options, hygiene products, and diapers'
]))

services_enrollment = list(map(lambda x: x.lower(), [
'Adult Protective Services, Area Agency on Aging', 
'Food vouchers, breastfeeding services, and breast pump pick-up', 
'Foster Care benefits', 
'In-Home Supportive Services (IHSS)', 
'Public Benefits--CalFresh, CalWORKs, General Assistance, Medi-Cal, & Refugee Cash Assistance'
]))

services_health = list(map(lambda x: x.lower(), [
'Portable toilets and hand washing stations',
'Shower Program',
'Shower program'
]))

def code_resource(Category, Service):
  # First, check explicit categories 
  if Category == 'COVID-19 Testing':
    return 'health'
  if Category == 'Food Pick-Up and Home Delivery':
    return 'grocery'
  if Category == 'Hygiene Stations':
    return 'health'
  if Category == 'Shelter':
    return 'housing'

  Service = Service.lower() # Make case insensitive

  # Next, check for general patterns in services we didn't explicitly cover
  if 'meal' in Service or 'breakfast' in Service or 'lunch' in Service or 'dinner' in Service:
    return 'meal'
  if 'grocery' in Service or 'groceries' in Service:
    return 'grocery'

  # Finally, check explicit services
  if Service in services_grocery:
    return 'grocery'
  if Service in services_meal:
    return 'meal'
  if Service in services_enrollment:
    return 'enrollment_support'
  if Service in services_health:
    return 'health'

  # If nothing matches, return empty str
  return ''

In [10]:
raw_df['resource'] = raw_df.apply(lambda x: code_resource(x['attributes.USER_Category'], x['attributes.USER_Service']), axis=1)

### Who is eligible

In [11]:
eligible_children = [
'Youth age 18 and under', 
'Youth ages 18-24', 
'Children and caregivers', 
]

eligible_clients = [
'Tri City Café/ Meals on Wheels Homebond Seniors ( 60 and older /Older adults in need in the 94608', 
'Parents of youth enrolled in the program.', 
'200 families that are already served by the organization and classified as high need', 
'Kaiser patients only', 
'ONLY Kaiser Patients', 
'Tri-City Health Members', 
'La Clinica Alta Vista Members', 
'Stanford Health members and community partners', 
'Asian Health Services patients', 
'Tri-City patients', 
'Davis St. patients', 
'Open to existing members and those without a primary doctor.', 
'Axis Community patients only', 
'Sutter Health patients only', 
'Tiburcio Vasquez patients only',
'Lifelong members & Unsheltered',
'Roots patients and General public',
]

eligible_disabled = [
'Age 60+ homebound older adults',
'Older adults and disabled individuals',
'Homebound older adults',
]

eligible_health_provider = [
'First Responders, Health Care Workers, & General Public with symptoms',
'First Responders, General Public with symptoms,Uninsured, & Unsheltered',
'Long Term Care Facilities residents, unsheltered, Health Care workers, and outbreaks',
]

eligible_homeless = [
'Unsheltered', 
'Lifelong members & Unsheltered', 
'First Responders, General Public with symptoms,Uninsured, & Unsheltered', 
'Long Term Care Facilities residents, unsheltered, Health Care workers, and outbreaks', 
'Self-reported unsheltered or low income status', 
]

eligible_immigrants = ['Immigrants']

eligible_low_income = [
'Self-reported unsheltered or low income status',
'Low income familes',
'D6 Low income familes',
'Low income older adults',
]

eligible_native_american = ['Native American patients only']

eligible_public = [
'All', 
'Families/individuals in need', 
'Families and individuals in need', 
'Families', 
'Everyone', 
'General Public', 
'Roots patients and General public', 
]

eligible_residents = [
'Qualifying Albany and Berkeley residents', 
'For on-site residents only', 
'For on-site resiedents only', 
'For on-site residents onlu', 
'For on site residents only', 
'San Leandro residents', 
'Fremont, Newark, and Union City residents', 
'Cherryland residents', 
'Berkeley and Albany residents', 
'Pleasanton, Livermore, and Dublin residents', 
'Livermore residents', 
'Lifelong members and/or individuals who live and work in Berkeley', 
]

eligible_seniors = [
'Older adults',
'Older adults and families',
'Ages 55+ during COVID-19 Shelter in Place',
'Older adults and single parents',
'Older adults and disabled individuals',
'Older Adults',
'Age 60+ Older Adults',
'Homebound older adults',
'Age 60+ homebound older adults',
'Low income older adults',
]

eligible_women = [
'Pregnant, breastfeeding wormen, infants, and children under 5',
'Pregnant, breastfeeding wormen, infants, and children under 5',
'Caregivers and providers to a foster child',
]

In [12]:
raw_df['attributes.USER_Who_is_Eligible_'] = raw_df['attributes.USER_Who_is_Eligible_'].str.strip()

In [13]:
raw_df['children'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_children else 0, axis=1)
raw_df['clients'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_clients else 0, axis=1)
raw_df['disabled'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_disabled else 0, axis=1)
raw_df['health_provider'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_health_provider else 0, axis=1)
raw_df['homeless'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_homeless else 0, axis=1)
raw_df['immigrants'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_immigrants else 0, axis=1)
raw_df['low income'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_low_income else 0, axis=1)
raw_df['native_american'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_native_american else 0, axis=1)
raw_df['public'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_public else 0, axis=1)
raw_df['residents'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_residents else 0, axis=1)
raw_df['seniors'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_seniors else 0, axis=1)
raw_df['women'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Who_is_Eligible_'] in eligible_women else 0, axis=1)

### Med testing

In [14]:
raw_df['med_testing'] = raw_df.apply(lambda x: 1 if x['attributes.USER_Category'] == 'COVID-19 Testing' else 0, axis=1)

### Renamed columns

In [15]:
raw_df['notes'] = raw_df['attributes.USER_Special_Indicator']
raw_df['weblink'] = raw_df['attributes.USER_Provided_by__add_website_l']
raw_df['contact'] = raw_df['attributes.USER_Phone__']
raw_df['zip'] = raw_df['attributes.USER_Zip_Code']
raw_df['city'] = raw_df['attributes.USER_City']
raw_df['address'] = raw_df['attributes.USER_Address']
raw_df['provider_addloc'] = raw_df['attributes.USER_Site_Location']
raw_df['provider_name'] = raw_df['attributes.USER_Provided_by_Agency']

### Days hours

Example values
```
normal, range: 'M-F, 12-1pm'
normal, specific days: 'M & W, 9am-12p'
normal, diff hours: 'M, T, & W, 1-5pm; Th, 1-5pm; F, 12-4pm', 'M-Sat., 8:30am-4pm & Sun. 1-4pm'
----------------------------------------------------------------
no time specified: 'M & TH meals delivered (7 meals total, 3 on Monday and Thursdays)'
time in words: 'Th, evenings'
call ahead: 'call ahead to confirm', 'Pending'
two times in a day: 'M-F, 8:30am-12pm  & 1-4:30pm'
certain days of the month: '2nd & 4th Tues., 2:30-4pm'
```

In [16]:
import re

abbrev_to_index = {
    'm': 0, 'mon': 0, 
    't': 1, 'tues': 1,
    'w': 2, 'wed': 2, 
    'th': 3, 'thr': 3, 'thurs': 3, 
    'f': 4, 'fri': 4, 
    's': 5, 'sat': 5, 
    'su': 6, 'sun': 6
}

In [17]:
def parse_days_hours(dh_str):
  day_cols = ['']*7 + [0]

  if not isinstance(dh_str, str):
    return day_cols

  dh_str = dh_str.strip().lower()

  if 'call' in dh_str:
    return ['call number']*7 + [1]

  for dh in dh_str.split(';'):
    dh = dh.strip()
    dh = re.sub('\.', '', dh) # Remove periods

    # Parse hours
    hours = re.search('\\d.*[pam]', dh)
    if not hours:
      return ['']*7 + [0]
    else:
      hours_str = dh[hours.start():hours.end()]   # Extract hours
      dh = dh[:hours.start()] + dh[hours.end():]  # And remove from dh
    
    # Assign hours to the correct day(s)
    days = re.split('[,&]', dh)
    for day in days:
      day = day.strip()
      if len(day) == 0:             # Empty day
        continue
      elif '-' in day:              # Day range (ex. M-F)
        day_range = day.split('-')
        if len(day_range) == 2 and day_range[0] in abbrev_to_index and day_range[1] in abbrev_to_index:
          start_index = abbrev_to_index[day_range[0]]
          end_index = abbrev_to_index[day_range[1]]
          for i in range(start_index, end_index+1):
            day_cols[i] = hours_str
      elif day in abbrev_to_index:  # Single day
        index = abbrev_to_index[day]
        day_cols[index] = hours_str
  
  return day_cols

In [18]:
raw_df['mon'], raw_df['tues'], raw_df['wed'], raw_df['thr'], raw_df['fri'], raw_df['sat'], raw_df['sun'], raw_df['call_in_advance'] = zip(*raw_df['attributes.USER_Hours_of_Operation'].map(parse_days_hours))

### Lat long
Need to convert between different coordinate systems

In [19]:
from pyproj import Transformer
transformer = Transformer.from_crs("epsg:3857", "epsg:4326")

def transform_geometry_coords(x, y):
  return transformer.transform(x, y)

raw_df['lat'], raw_df['lon'] = zip(*raw_df.apply(lambda x: transform_geometry_coords(x['geometry.x'], x['geometry.y']), axis=1))

### Download converted data

In [20]:
today = date.today()
filename = 'alameda_transformed_%s.csv' % (today.strftime("%m_%d_%Y"))
raw_df.to_csv(filename)