In [3]:
# Date: Dec 6, 2022
# Author: Sashka Warner
# Desc: Prep sites data for use in web app
import pandas as pd

In [4]:
# Load the site data
sites_raw = pd.read_csv("../../uwwi_datasets/uwwi_dataset_sites.csv")

# Inspect the data
sites_raw.head()

Unnamed: 0,AgencySystem_Name,Agency_Id,Site_Id,Site_AgencyId,Site_CreateStamp,Site_EditStamp,Site_AuditStamp,Site_Status,SiteSystem_Active,SiteSystem_Name,...,SiteAddressus_SiteAddressus_validated,SiteAddressus_SiteAddressus_custom_location,SiteAddressus_SiteAddressus_zip_latitude,SiteAddressus_SiteAddressus_zip_longitude,SiteAddressus_SiteAddressus,SiteOption_PermanentlyInactiveSite,SiteOption_RecordOwner,SiteOption_Accessibility,SiteCustom_NonStandardHoursText,SiteHoursofoperation_ModuleHoursofoperation.open
0,INTEGRATED COMMUNITY SOLUTIONS,1,414.0,1.0,2017-03-16T10:32:21.216012-05:00,2022-05-12T12:53:25.967449-05:00,,active,Yes,INTEGRATED COMMUNITY SOLUTIONS,...,True,False,44.489906,-88.06991,"2605 S ONEIDA ST Suite 106 GREEN BAY WI, 54304",[],['BCUW'],[],Monday-Friday 8am-4:30pm,"[{'day': -1, 'end_min': None, 'end_hour': None..."
1,INTEGRATED COMMUNITY SOLUTIONS,1,418.0,1.0,2017-03-16T10:32:22.867221-05:00,2021-10-21T19:48:22.74763-05:00,,active,No,ZZZINACTIVE LEAVING HOMELESSNESS BEHIND,...,False,False,44.489906,-88.06991,2605 South Oneida Street Suite 106 Green Bay W...,[],['BCUW'],[],"Monday-Friday, 8:00am-4:30pm","[{'day': -1, 'end_min': None, 'end_hour': None..."
2,INTERIM HEALTH CARE,2,419.0,2.0,2017-03-16T10:32:23.227182-05:00,2022-11-30T13:36:50.613023-06:00,,active,Yes,INTERIM HEALTH CARE,...,False,False,44.542973,-88.05582,"1600 Shawano Avenue Suite 201 Green Bay WI, 54303",[],['BCUW'],[],Monday-Friday 8:30am-5:00pm,"[{'day': -1, 'end_min': None, 'end_hour': None..."
3,zzinactive_INTERNATIONAL TRANSLATORS,3,420.0,3.0,2017-03-16T10:32:23.567045-05:00,2021-07-27T23:57:41.974815-05:00,,active,No,zzinactive_VILLA REAL DBA INTERNATIONAL TRANSL...,...,False,False,44.483376,-88.02269,529 South Jefferson Street Suite 203 Green Bay...,[],['BCUW'],"['Elevators', 'Outside Ramps']","Monday-Friday, 8:00am-5:00pm; Interpreters ava...","[{'day': -1, 'end_min': None, 'end_hour': None..."
4,JACKIE NITSCHKE CENTER,4,421.0,4.0,2017-03-16T10:32:23.869975-05:00,2022-09-28T10:20:58.151543-05:00,,active,Yes,JACKIE NITSCHKE CENTER,...,True,False,44.483376,-88.02269,"630 CHERRY STREET GREEN BAY WI, 54301",[],['BCUW'],[],Business hours: Monday-Thursday 7am-6:30pm; Fr...,"[{'day': -1, 'end_min': None, 'end_hour': None..."


In [5]:
# Make a copy of the data
sites = sites_raw.copy()

# Extract columns of interest
sites = sites[[
    "AgencySystem_Name", 
    "SiteSystem_Name", 
    "Agency_Id", 
    "Site_Id",
    "Site_AgencyId", 
    "Site_Status", 
    "SiteAddressus_SiteAddressus_latitude", 
    "SiteAddressus_SiteAddressus_longitude", 
    "SiteAddressus_SiteAddressus_zip_latitude", 
    "SiteAddressus_SiteAddressus_zip_longitude", 
    "SiteAddressus_SiteAddressus",
    ]]

# Rename location columns
sites.rename(columns = {
    "SiteAddressus_SiteAddressus_latitude": "site_lat",
    "SiteAddressus_SiteAddressus_longitude": "site_long",
    "SiteAddressus_SiteAddressus_zip_latitude": "site_zip_lat",
    "SiteAddressus_SiteAddressus_zip_longitude": "site_zip_long",
    "SiteAddressus_SiteAddressus": "site_address",
}, inplace=True)
#sites.head()

# Check if lat/long data has nulls
#any(sites["site_lat"].isna())
#any(sites["site_long"].isna())

# Remove null lat/long records
sites.dropna(subset=["site_lat", "site_long"], inplace=True)

# Remove sites that have zero values for lat/long TODO: Go back and get zip long/lat if exists?
sites = sites[(sites["site_lat"] != 0) & (sites["site_long"] != 0)]

# Reset index after removing null and zero values
sites.reset_index(drop=True, inplace=True)

# Make sure no nulls exist
#any(sites["site_lat"].isna())
#any(sites["site_long"].isna())
#len(sites[sites["site_lat"] == 0])
#len(sites[sites["site_long"] == 0])

# Check that all of the lat/long data is of numeric type
#all(sites["site_lat"].apply(lambda x: isinstance(x, float)))
#ll(sites["site_long"].apply(lambda x: isinstance(x, float)))

# Check if any of the Site IDs have nulls
#any(sites["Site_Id"].isna())

In [12]:
# Check that all sites are active (even though some names have inactive?) TODO: ask about active status?
#sites["Site_Status"].unique()

array(['active'], dtype=object)

In [6]:
# Inspect the sites data
#print(sites.shape)

# Save to csv
sites.to_csv("../data/uwwi_dataset_sites_cleaned.csv", index=False, header=False)

In [5]:
# Save to JSON
#sites_json = sites[["SiteSystem_Name", "site_lat", "site_long"]]
#sites_json.to_json("./getsites/data/sites.json", orient="index")


In [7]:
#print(sites.shape)

(19478, 11)
