In [2]:
import requests
import json
import pandas as pd
from config import geoapify_key
import time
import re

#Show all 100 rows
pd.set_option('display.max_rows', 100)


In [3]:
# Read the cleaned data
df = pd.read_csv('../data/nhomes_cleaned.csv', delimiter=',')

In [4]:
# Set the timeout between the attempts
timeout = 10

# Limit the number of attempts
maxAttempt = 10

def getLocations(locations):
    url = "https://api.geoapify.com/v1/batch/geocode/search?apiKey=" + geoapify_key
    response = requests.post(url, json = locations)
    result = response.json()
    
    # The API returns the status code 202 to indicate that the job was accepted and pending
    status = response.status_code
    if (status != 202):
        print('Failed to create a job. Check if the input data is correct.')
        return
    jobId = result['id']
    getResultsUrl = url + '&id=' + jobId

    time.sleep(timeout)
    result = getLocationJobs(getResultsUrl, 0)
    if (result):
        print(result)
        print('You can also get results by the URL - ' + getResultsUrl)
        return result
    else:
        print('You exceeded the maximal number of attempts. Try to get results later. You can do this in a browser by the URL - ' + getResultsUrl)

def getLocationJobs(url, attemptCount):
    response = requests.get(url)
    result = response.json()
    status = response.status_code
    if (status == 200):
        print('The job is succeeded. Here are the results:')
        return result
    elif (attemptCount >= maxAttempt):
        return
    elif (status == 202):
        print('The job is pending...')
        time.sleep(timeout)
        return getLocationJobs(url, attemptCount + 1)

In [5]:
# Get list of nurinsg home addresses
addresses = df['Name & Address'].tolist()

# Extract Coordinates form API response
data = getLocations(addresses)


The job is pending...
The job is pending...
The job is pending...
The job is pending...
The job is pending...
The job is succeeded. Here are the results:
[{'query': {'text': 'Abbeygate Care Centre 2 Leys Road Brockmoor Brierley Hill DY5 3UR', 'parsed': {'house': 'abbeygate care centre', 'housenumber': '2', 'street': 'leys road', 'suburb': 'brockmoor', 'postcode': 'dy5 3ur', 'city': 'brierley hill', 'expected_type': 'amenity'}}, 'datasource': {'sourcename': 'openstreetmap', 'attribution': '© OpenStreetMap contributors', 'license': 'Open Database License', 'url': 'https://www.openstreetmap.org/copyright'}, 'name': 'Leys Road', 'ref': 'B4180', 'country': 'United Kingdom', 'country_code': 'gb', 'state': 'England', 'county': 'West Midlands Combined Authority', 'city': 'Dudley', 'village': 'Brierley Hill', 'postcode': 'DY5 3UR', 'suburb': 'Brockmoor', 'quarter': 'Dudley Fields', 'street': 'Leys Road', 'lon': -2.1341733, 'lat': 52.4857514, 'state_code': 'ENG', 'result_type': 'street', 'format

In [6]:
data_dict = {}
# data_dict = {d['query']['text']: [d['lat'], d['lon']] for d in data}
for d in data:
    if 'query' in d and 'text' in d['query'] and 'lat' in d and 'lon' in d:
        data_dict[d['query']['text']] = [d['lat'], d['lon']]
# Add coordinates to dataframe
df['Coordinates'] = df['Name & Address'].map(data_dict)

In [7]:
df.head(100)

Unnamed: 0,Name & Address,Email,Telephone No,Age Range,CQC Registered For,Provide \nNursing,Capacity,Post Code,Care Home Name,Min Age,Max Age,Coordinates
0,Abbeygate Care Centre 2 Leys Road Brockmoor Br...,abbeygatecare1@gmail.com,01384 571295,Age 65+,Dementia \nMental Health Condition \nOld Age \...,No,17.0,DY5 3UR,Abbeygate Care,65.0,,"[52.4857514, -2.1341733]"
1,Abbeymere 12 Eggington Road Wollaston Stourbri...,abbeymere@karelink.co.uk,01384 395195,Ages 65+,Dementia \nMental Health Condition \nOld Age \...,No,18.0,DY8 2QJ,Abbeymere,65.0,,"[52.4584, -2.16295]"
2,Allenbrook Nursing Home 209 Spies Lane Halesow...,manager@allenbrooknursing\nhome.co.uk,0121 422 5844,Ages 55+,Dementia \nMental Health Condition \nNo Medica...,Yes,36.0,B62 9SJ,Allenbrook Nursing,55.0,,"[52.4613003, -2.0139452]"
3,Amberley Care Home 481-483 Stourbridge Road Ha...,amberleycarehome@hotmail\n.co.uk,01384 482365,Ages 65+,Dementia \nOld Age,No,25.0,DY5 1LB,Amberley Care Home,65.0,,"[52.4927718, -2.1164204]"
4,Arcare For Forte 440 Birmingham New Road Bilst...,ksharma@arcarehomes.co.u\nk,01902 880108,18+,Learning Disability \nMental Health Condition ...,No,9.0,WV14 9QB,Arcare For Forte,18.0,,"[52.5481562, -2.094362]"
5,Ashbourne Care Ltd Lightwood Road Dudley DY1 2RS,ashbourne.m@fshc.co.uk,01384 242200,Ages 65+,Dementia \nOld Age,No,38.0,DY1 2RS,Ashbourne Care Ltd,65.0,,"[52.517467, -2.101861]"
6,Ashgrove Nursing Home 9 Dudley Wood Road Nethe...,cea@ashgrovecare.com,01384 413913,Ages 65+,Dementia \nOld Age \nSensory Impairment,Yes,57.0,DY2 0DA,Ashgrove Nursing,65.0,,"[52.4777428, -2.0859231]"
7,Avondale 45 Norton Road Norton Stourbridge DY8...,avondaleresthome@hotmail.\ncom,01384 442731,Ages 65+,Old Age,No,15.0,DY8 2AH,Avondale,65.0,,"[52.4453886, -2.157669]"
8,Beatrice House 25 Bell Street Pensnett Brierle...,beatricehouse@alphonsusse\nrvices.co.uk,01384 482963,Not Stated,Learning Disability,No,3.0,DY5 4HG,Beatrice House,,,"[52.4809461, -2.12446]"
9,Belvidere 41-43 Stourbridge Road Holly Hall Du...,belvidere@gmail.com,01384 211850,Ages 55+,Dementia \nDetention Under Mental \nHealth Act...,No,28.0,DY1 2DH,Belvidere,55.0,,"[52.5037783, -2.1017361]"


In [8]:
# Showing any coordinates with a null value to be able to manually check
# and fix them.
df[df['Coordinates'].isnull()]

Unnamed: 0,Name & Address,Email,Telephone No,Age Range,CQC Registered For,Provide \nNursing,Capacity,Post Code,Care Home Name,Min Age,Max Age,Coordinates
24,Halas Homes Wassell Road Hasbury Halesowen B63...,info@halashomes.co.uk,0121 550 8778,18+,Learning Disability,No,30.0,B63 4JX,Halas Homes,18.0,,


In [15]:
# print name and address from row number 24
print(df.iloc[58]['Name & Address'])

Riverside Care (Kingswinford) Ltd Wolverhampton Road Wall Heath Kingswinford DY6 7DA


In [10]:
# Manually fixing the coordinates for row 24
# Corrdinates obtaine form the web
df.at[24, 'Coordinates'] = [52.44464220440522, -2.0660808868916276]

In [11]:
# Splitting the coordinates into two columns
df[['Latitude', 'Longitude']] = pd.DataFrame(df['Coordinates'].tolist(), index=df.index)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74 entries, 0 to 73
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Name & Address      74 non-null     object 
 1   Email               74 non-null     object 
 2   Telephone No        72 non-null     object 
 3   Age Range           73 non-null     object 
 4   CQC Registered For  73 non-null     object 
 5   Provide 
Nursing    73 non-null     object 
 6   Capacity            73 non-null     float64
 7   Post Code           74 non-null     object 
 8   Care Home Name      74 non-null     object 
 9   Min Age             66 non-null     float64
 10  Max Age             3 non-null      float64
 11  Coordinates         74 non-null     object 
 12  Latitude            74 non-null     float64
 13  Longitude           74 non-null     float64
dtypes: float64(5), object(9)
memory usage: 8.2+ KB


In [13]:
# Save the data to a new csv file
df.to_csv('../data/nhomes_cleaned_coordinates.csv', index=False)