In [31]:
using HTTP
using JSON
using CSV, DataFrames
using ProgressMeter

In [5]:
ENV["COLUMNS"] = 1000;

In [3]:
rawdata = DataFrame(CSV.File("../rawdata/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201221_0.csv"));

In [49]:
data = select(rawdata,
    :hospital_name => ByRow(titlecase) => :hospital,
    :hospital_pk => :hospital_id,
    :state,
    :address => ByRow(x -> ismissing(x) ? x : titlecase(x)) => :address,
    :city => ByRow(x -> ismissing(x) ? x : titlecase(x)) => :city,
    :zip,
    :fips_code,
)
unique!(data, :hospital_id);

In [50]:
first(data, 5)

Unnamed: 0_level_0,hospital,hospital_id,state,address,city,zip,fips_code
Unnamed: 0_level_1,String,String,String,String?,String?,Int64?,Int64?
1,Surgery Center Of Zachary,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412e69f912f826ae12879,LA,missing,missing,70791,missing
2,Healthpromed,ff9032c172057190bf52f523d253c79af2d5f74d007084e83bb0995c637403d1,PR,missing,missing,775,missing
3,Hospital San Antonio,df2363988746840134806aa7b2accf9c89259601776986d49f0c579f6d84532e,PR,"#18 Norte, Calle Dr",Mayagüez,680,missing
4,Cdt Susana Centeno,36db6bad3679dfdcccc301fc608438109ad907943c702fb3d57dc50cb7b5ba0b,PR,missing,missing,765,missing
5,Centro Medico Correccional De Bayamon,9922a3f069ff4686fe467885206fb1172f609233cfe517f5de214545d2458253,PR,missing,missing,960,missing


In [51]:
function get_address(row)
    components = []
    if !ismissing(row.address)
        push!(components, row.address)
    end
    if !ismissing(row.city)
        push!(components, row.city)
    end
    if !ismissing(row.state)
        push!(components, row.state)
    end
    if !ismissing(row.zip)
        push!(components, string(row.zip))
    end
    addr = join(components, ", ")
    return addr
end;

In [28]:
apikey = "AIzaSyDvYJtcQg2GMcpxWC5NFUA9-e8hIzTsGg0"; # fill in API key

In [29]:
urltemplate = x -> "https://maps.googleapis.com/maps/api/geocode/json?address=$(HTTP.escapeuri(x))&key=$(apikey)";

In [None]:
raw_responses = []
hospital_locations = []

In [61]:
@showprogress for (i,row) in enumerate(eachrow(data))
    if i < 2280
        continue
    end
    
    addr = get_address(row)
    url = urltemplate(addr)
    
    r = HTTP.request("GET", url)
    rawcontent = String(r.body)
    
    response = JSON.parse(rawcontent)
    push!(raw_responses, response)
    
    if !isempty(response["results"])
        r = response["results"][1]
        loc = (
            hospital_name = row.hospital,
            hospital_id = row.hospital_id,
            address_v1 = addr,
            address_v2 = r["formatted_address"],
            lat = r["geometry"]["location"]["lat"],
            long = r["geometry"]["location"]["lng"],
            place_id = r["place_id"],
        )
        push!(hospital_locations, loc)
    else
        loc = (
            hospital_name = row.hospital,
            hospital_id = row.hospital_id,
            address_v1 = addr,
            address_v2 = missing,
            lat = missing,
            long = missing,
            place_id = missing,
        )
        push!(hospital_locations, loc)
        println("Error! No matching place found for: $(row.hospital)")
    end
end

[32mProgress:  46%|███████████████████                      |  ETA: 0:00:04[39m

Error! No matching place found for: Sanford Worthington Medical Center


[32mProgress:  61%|█████████████████████████▏               |  ETA: 0:03:31[39m

Error! No matching place found for: Peconic Bay Medical Center


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:20:20[39m


In [63]:
open("../rawdata/hhs_hospital_locs_raw.json", "w") do f
    JSON.print(f, raw_responses, 4)
end

In [64]:
open("../rawdata/hhs_hospital_locs_processed.json", "w") do f
    JSON.print(f, unique(hospital_locations), 4)
end

In [66]:
hospital_locations_dict = Dict(h.hospital_id => h for h in hospital_locations);

In [67]:
data_v2 = deepcopy(data);
data_v2.lat = [hospital_locations_dict[h].lat for h in data_v2.hospital_id];
data_v2.long = [hospital_locations_dict[h].long for h in data_v2.hospital_id];
data_v2.address_alt_1 = [hospital_locations_dict[h].address_v1 for h in data_v2.hospital_id];
data_v2.address_alt_2 = [hospital_locations_dict[h].address_v2 for h in data_v2.hospital_id];
data_v2.google_place_id = [hospital_locations_dict[h].place_id for h in data_v2.hospital_id];

In [76]:
mean(xs) = sum(skipmissing(xs)) / (length(xs) - count(ismissing.(xs)));

In [79]:
data_v2.lat[ismissing.(data_v2.lat)] .= mean(data_v2.lat);
data_v2.long[ismissing.(data_v2.long)] .= mean(data_v2.long);

In [80]:
data_v2 |> CSV.write("../data/hhs_hospital_locations.csv")

"../data/hhs_hospital_locations.csv"