In [1]:
using HTTP
using JSON
using Dates
using CSV, DataFrames
using ProgressMeter

In [2]:
ENV["COLUMNS"] = 1000;

In [3]:
rawdata = DataFrame(CSV.File("../rawdata/md_data_2020_12_01.csv"));

In [4]:
compute_id(h) = h |> hash |> string |> (x -> "h" * x[end-7:end]);

In [5]:
hospitals = sort(unique(rawdata[!,"Facility Name"]));
convert_date(d) = Date(d, dateformat"m/d/y") + Year(2000);

data = select(rawdata,
    "Facility Name" => :hospital,
    "Facility Name" => ByRow(compute_id) => :hospital_id,
    "Region" => :healthcare_region,
    "County" => :county_name,
);

unique!(data, :hospital);
sort!(data, :hospital);

In [6]:
forecast_data = DataFrame(CSV.File("../data/data_forecast.csv"))
forecast_hosp = sort(intersect(unique(forecast_data.hospital), unique(data.hospital)))
data.in_forecast = map(h -> h in forecast_hosp, data.hospital);

In [7]:
first(data, 5)

Unnamed: 0_level_0,hospital,hospital_id,healthcare_region,county_name,in_forecast
Unnamed: 0_level_1,String,String,String,String,Bool
1,Anne Arundel Medical Center,h39740093,Region III,Anne Arundel County,1
2,Atlantic General Hospital,h92616877,Region IV,Worcester County,1
3,Baltimore Convention Center Field Hospital,h94720559,Region III,Baltimore City,0
4,Baltimore Washington Medical Center,h75227743,Region III,Anne Arundel County,1
5,Bowie Medical Center,h26261837,Region V,Prince Georges County,0


In [8]:
function get_address(h)
    return "$(h.hospital), $(h.county_name), Maryland"
end;

In [9]:
function find_component(components, key)
    components = filter(c -> key in c["types"], components)
    if length(components) > 0
        return components[1]["long_name"]
    else
        return missing
    end
end;

In [10]:
apikey = "AIzaSyBpC5FOT6YviO5UlbVqEHnlJH_X61QxP0k"; # fill in API key

In [11]:
urltemplate = x -> "https://maps.googleapis.com/maps/api/geocode/json?address=$(HTTP.escapeuri(x))&key=$(apikey)";

In [12]:
raw_responses = [];
hospital_locations = [];

In [13]:
@showprogress for (i,row) in enumerate(eachrow(data))
    addr = get_address(row)
    url = urltemplate(addr)
    
    r = HTTP.request("GET", url)
    rawcontent = String(r.body)
    
    response = JSON.parse(rawcontent)
    push!(raw_responses, response)
    
    if !isempty(response["results"])
        r = response["results"][1]
        loc = (
            hospital = row.hospital,
            hospital_id = row.hospital_id,
            in_forecast = row.in_forecast,
            address = r["formatted_address"],
            county = row.county_name,
            healthcare_region = row.healthcare_region,
            city = find_component(r["address_components"], "locality"),
            zipcode = find_component(r["address_components"], "postal_code"),
            lat = r["geometry"]["location"]["lat"],
            long = r["geometry"]["location"]["lng"],
            place_id = r["place_id"],
        )
        push!(hospital_locations, loc)
    else
        loc = (
            hospital = row.hospital,
            hospital_id = row.hospital_id,
            in_forecast = row.in_forecast,
            address = missing,
            county = missing,
            healthcare_region = missing,
            city = missing,
            zipcode = missing,
            lat = missing,
            long = missing,
            place_id = missing,
        )
        push!(hospital_locations, loc)
        println("Error! No matching place found for: $(row.hospital)")
    end
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:04[39m


In [14]:
open("../rawdata/hospital_locs_raw.json", "w") do f
    JSON.print(f, raw_responses, 4)
end

In [15]:
open("../rawdata/hospital_locs_processed.json", "w") do f
    JSON.print(f, unique(hospital_locations), 4)
end

In [16]:
data_out = DataFrame(hospital_locations);

In [17]:
data_out.state = fill("Maryland", nrow(data_out));
data_out.state_abbrev = fill("MD", nrow(data_out));

In [18]:
zipcode_cvt(z) = lpad(z, 5, '0');

In [19]:
hsahrr_data = DataFrame(CSV.File("../rawdata/ZipHsaHrr18.csv"))
hsahrr_dict = Dict(zipcode_cvt(row.zipcode18) => row for row in eachrow(hsahrr_data));

missing_row = (hsanum=missing, hsacity=missing, hsastate=missing, hrrnum=missing, hrrcity=missing, hrrstate=missing)
hsahrrs = [haskey(hsahrr_dict,z) ? hsahrr_dict[z] : missing_row for z in data_out.zipcode];

data_out.hsa_id = [h.hsanum for h in hsahrrs];
data_out.hsa_name = [h.hsacity * ", " * h.hsastate for h in hsahrrs];
data_out.hrr_id = [h.hrrnum for h in hsahrrs];
data_out.hrr_name = [h.hrrcity * ", " * h.hrrstate for h in hsahrrs];

In [20]:
data_out

Unnamed: 0_level_0,hospital,hospital_id,in_forecast,address,county,healthcare_region,city,zipcode,lat,long,place_id,state,state_abbrev,hsa_id,hsa_name,hrr_id,hrr_name
Unnamed: 0_level_1,String,String,Bool,String,String,String,String,String?,Float64,Float64,String,String,String,Int64?,String?,Int64?,String?
1,Anne Arundel Medical Center,h39740093,1,"2002 Medical Pkwy, Annapolis, MD 21401, USA",Anne Arundel County,Region III,Annapolis,21401,38.9904,-76.5374,ChIJwQbdhNf2t4kR9wSFLYAT0Go,Maryland,MD,21001,"Annapolis, MD",113,"Washington, DC"
2,Atlantic General Hospital,h92616877,1,"9733 Healthway Dr, Berlin, MD 21811, USA",Worcester County,Region IV,Berlin,21811,38.3396,-75.2115,ChIJrTSS5gInuYkRtT2krn-1-Jw,Maryland,MD,21029,"Salisbury, MD",225,"Salisbury, MD"
3,Baltimore Convention Center Field Hospital,h94720559,0,"1 W Pratt St, Baltimore, MD 21201, USA",Baltimore City,Region III,Baltimore,21201,39.2854,-76.6171,ChIJqSGk9WADyIkRDVkLuWmZKdM,Maryland,MD,21002,"Baltimore, MD",223,"Baltimore, MD"
4,Baltimore Washington Medical Center,h75227743,1,"301 Hospital Dr, Glen Burnie, MD 21061, USA",Anne Arundel County,Region III,Glen Burnie,21061,39.1379,-76.6218,ChIJofV1-bz8t4kRUG1GNyt_7cA,Maryland,MD,21016,"Glen Burnie, MD",223,"Baltimore, MD"
5,Bowie Medical Center,h26261837,0,"15001 Health Center Dr, Bowie, MD 20716, USA",Prince Georges County,Region V,Bowie,20716,38.9494,-76.746,ChIJn0x8bI7rt4kR2KeO6HUDCGY,Maryland,MD,21020,"Lanham, MD",226,"Takoma Park, MD"
6,Calvert Memorial Hospital,h51968182,1,"100 Hospital Rd, Prince Frederick, MD 20678, USA",Calvert County,Region V,Prince Frederick,20678,38.5594,-76.5963,ChIJGf93tYmCt4kRUHcfYxctVoM,Maryland,MD,21025,"Prince Frederick, MD",113,"Washington, DC"
7,Carroll Hospital Center,h73144964,1,"200 Memorial Ave, Westminster, MD 21157, USA",Carroll County,Region III,Westminster,21157,39.5576,-76.9903,ChIJX6DoR2s4yIkR_eqhMva6JnY,Maryland,MD,21033,"Westminster, MD",223,"Baltimore, MD"
8,Charles Regional (UM),h61059656,1,"5 Garrett Ave, La Plata, MD 20646, USA",Charles County,Region V,La Plata,20646,38.529,-76.9722,ChIJL0cYbucIt4kR8Bh1L4nlJtc,Maryland,MD,21019,"La Plata, MD",113,"Washington, DC"
9,Chestertown (UMSRH),h95946725,1,"223 High St, Chestertown, MD 21620, USA",Kent County,Region IV,Chestertown,21620,39.2085,-76.0657,ChIJ_1Qaz0OLx4kRb8E80Y10zW0,Maryland,MD,21005,"Chestertown, MD",223,"Baltimore, MD"
10,Doctors Community Hospital,h82233897,1,"8118 Good Luck Rd, Lanham, MD 20706, USA",Prince Georges County,Region V,Lanham,20706,38.9823,-76.865,ChIJlxkz022lt4kR4BGhrREiUM0,Maryland,MD,21020,"Lanham, MD",226,"Takoma Park, MD"


In [21]:
data_out |> CSV.write("../data/hospital_meta.csv")

"../data/hospital_meta.csv"