In [1]:
#Import libraries
import requests
import json
import pandas as pd
import geopy
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim

ModuleNotFoundError: No module named 'geopy'

In [16]:
###This method for data retrieval does not take into account SSL Certificate validation but is easier

#Connect to CMS POS API
# query = ("https://data.cms.gov/resource/wiec-cn98.json?"
#         "$$app_token=GrSIGDag6lvWk0o9Yv4uxCVYA"  #App token not required but created to dispel throttling
#         "&$select=prvdr_ctgry_cd,city_name,fac_name,state_cd,st_adr,phne_num,zip_cd,bed_cnt" #Only categories pertinent to project
#         "&state_cd=NE") #Only need data from the state of Nebraska

# #Read the data in
# raw_data = pd.read_json(query)

# raw_data.head(10)
# raw_data

In [13]:
###This method for data retrieval asks for SSL certificate validation and is more secure

#Search filters
payload = {"$$app_token": "GrSIGDag6lvWk0o9Yv4uxCVYA", #App token not required but created to dispel throttling
           "$select": "prvdr_ctgry_cd,city_name,fac_name,state_cd,st_adr,phne_num,zip_cd,bed_cnt", #Only categories pertinent to project
           "$limit": 2000, #ensure getting all facilities
           "state_cd": "NE"} #Only need data from the state of Nebraska

#Retrieve data from CMS POS API
url = requests.get("https://data.cms.gov/resource/wiec-cn98.json", params=payload)

#Check status code for proper data return
if url.status_code == requests.codes.ok:
    
    #Brings url into panda dataframe
    data = pd.json_normalize(url.json())

#Filter to providers of interest
providers = ["1", "12", "21"]
data = data[data.prvdr_ctgry_cd.isin(providers)]

#reset index
data.reset_index(drop=True, inplace=True)

#Create full address for geocoding
data['geo_address'] = data['st_adr'] + ", " + data['city_name'] + ", " + data['state_cd'] + ", " + data['zip_cd']
data.head()

Unnamed: 0,prvdr_ctgry_cd,city_name,fac_name,state_cd,st_adr,phne_num,zip_cd,bed_cnt,geo_address
0,1,OMAHA,VA HOSPITAL,NE,4101 WOOLWORTH AVE,4024490600.0,68105,486,"4101 WOOLWORTH AVE, OMAHA, NE, 68105"
1,1,LINCOLN,UNIVERSITY HEALTH CENTER,NE,15TH + U STS,,68508,32,"15TH + U STS, LINCOLN, NE, 68508"
2,1,FAIRBURY,JEFFERSON COMMUNITY HEALTH CENTER,NE,2200 NORTH H STREET PO BOX 277,4027293351.0,68352,33,"2200 NORTH H STREET PO BOX 277, FAIRBURY, NE..."
3,1,LINCOLN,BRYAN MEDICAL CENTER,NE,1600 SOUTH 48TH ST,4024811111.0,68506,630,"1600 SOUTH 48TH ST, LINCOLN, NE, 68506"
4,1,LOUP CITY,SACRED HEART HOSPITAL INC,NE,626 N STREET,3087450503.0,68853,26,"626 N STREET, LOUP CITY, NE, 68853"


In [14]:
#create CSV for data backup
data.to_csv('.\data\pos_nebraska.csv', index=False) 

In [None]:
# BEFORE CONTINUING #
# Go through csv manually and edit out PO boxes, suites, and non-addresses, save as pos_geocode.csv

In [22]:
#Load in data to geocode
readydata = pd.read_csv('.\data\pos_geocode.csv')
    
#Use Nominatim from OSM
locator = Nominatim(user_agent="Andrew Laws")

#Set delay to 1 second as per Nominatim use license
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

#Apply geocode to address and put returned opbject in new column
readydata["loc"] = readydata["geo_address"].apply(geocode)

In [25]:
# Convert location to point and lat/long
readydata["longitude"]= readydata["loc"].apply(lambda location: location.longitude if location else None)
readydata['latitude'] = readydata['loc'].apply(lambda location: location.latitude if location else None)

#drop no lat long values (not geocode)
readydata = readydata.dropna(subset=['longitude', 'latitude'])

#Save to csv
readydata.to_csv("data\pos_locations_ready.csv", index=False)
readydata.head()

Unnamed: 0,prvdr_ctgry_cd,city_name,fac_name,state_cd,st_adr,phne_num,zip_cd,bed_cnt,geo_address,loc,longitude,latitude
0,1,OMAHA,VA HOSPITAL,NE,4101 WOOLWORTH AVE,4024491000.0,68105,486.0,"4101 WOOLWORTH AVE, OMAHA, NE, 68105","(Omaha VA Medical Center, 4101, Woolworth Aven...",-95.973635,41.243687
3,1,LINCOLN,BRYAN MEDICAL CENTER,NE,1600 SOUTH 48TH ST,4024811000.0,68506,630.0,"1600 SOUTH 48TH ST, LINCOLN, NE, 68506","(Bryan Health East Campus, 1600, South 48th St...",-96.651305,40.797095
4,1,LOUP CITY,SACRED HEART HOSPITAL INC,NE,626 N STREET,3087451000.0,68853,26.0,"626 N STREET, LOUP CITY, NE, 68853","(626, N Street, Loup City, Sherman County, Neb...",-98.965485,41.275926
5,1,LINCOLN,BRYANLGH MEDICAL CENTER WEST,NE,2300 S 16TH ST,4024751000.0,68502,252.0,"2300 S 16TH ST, LINCOLN, NE, 68502","(2300, South 16th Street, Irvingdale, Lincoln,...",-96.698538,40.792039
6,1,KEARNEY,CHI HEALTH GOOD SAMARITAN,NE,"P O BOX 1990, 10 EAST 31ST ST",3088657000.0,68848,165.0,"10 EAST 31ST ST, KEARNEY, NE, 68848","(CHI Health Good Samaritan, 10, East 31st Stre...",-99.081395,40.708266
