In [None]:
#title: amenities_osm.ipynb
# get amenities per district meeting certain criteria from openstreetmap
# first get the district relation ids from OSM
# then sort them in alphabetical order of name
# then for each district get the amenities of interest within the district boundary
# then filter by certain criteria (e.g. name contains certain keywords)
# update so that few cells can be rerun with manually updating the attempt of OT request that failed

In [None]:
# %%
# get district relation ids of Andhra Pradesh (rel_id: 2022095) from OSM
#

import requests

# %%
# Define the Overpass API endpoint and query
overpass_url = "https://overpass-api.de/api/interpreter"

# Your Overpass Turbo query

# Compose QT query
query="""
[out:csv(::id, name)][timeout:60];
area(%d)->.searchArea;
(   relation["boundary"="administrative"]["admin_level"="5"](area.searchArea););
out body;

""" % (3600000000 +2022095)
# %%
# Send the request to Overpass API
response = requests.post(overpass_url, data={"data": query})

# Check for successful response
if response.status_code == 200:
    print("âœ… Query executed successfully.")
else:
    raise Exception(f"Overpass API request failed with status code {response.status_code}")

# %%
# get the "response.content" into a pandas dataframe
import pandas as pd
from io import StringIO
df = pd.read_csv(StringIO(response.text), sep='\t')  

# rename '@id' column to 'relation_id'
df.rename(columns={'@id': 'relation_id'}, inplace=True)
# sort by name
df = df.sort_values(by='name').reset_index(drop=True)

In [None]:
# initialisation so that next two cells can be rerun, if the api requests fail in between
import time
amenity_df = pd.DataFrame()

In [None]:
# manually initialise and run this and next cell, till the processing is complete
# note the index for which the http response failed from the log of responses, update dist_index to <dist_index_failed-1> and df['relation_id'][<dist_index failed-1>: <length of df>] abd rerun this and next cell
dist_index=0

In [None]:
# rerun the above cell after suitably modifying the above cell. Then rerun this cell, if processing is interrupted 
for rel in df['relation_id'][0:len(df)]:
    dist_index =dist_index+1 
    print(f"Processing relation_id: {rel}, district_index: {dist_index} of {len(df)}")
    # Your Overpass Turbo query

    # Compose QT query
    query="""
    [out:csv(::id, ::type,  ::lat, ::lon, name)]
    [timeout:90];
    area(%d)->.searchArea;
    (
    nw
        [office=government]
        [name]
        (area.searchArea);
    );
    out center;

    """ % (3600000000 +rel)
    # %%
    attempt=1
    print(f"Try query for relation_id: {rel}")  
    # Send the request to Overpass API
    response = requests.post(overpass_url, data={"data": query})

    while response.status_code != 200:

            print(f" Query failed, attempt: {attempt} response status: {response.status_code}")
            if (attempt>3):
                raise Exception(f"Overpass API request failed with status code after several tries {response.status_code}")
            else:
                print(f"Will wait for 10 seconds and retry query")         
            time.sleep(10)  # wait for 10 seconds before retrying
            attempt=attempt+1    
            response = requests.post(overpass_url, data={"data": query}) 
    # Check for successful response
    if response.status_code == 200:
        print(f" Query success, attempt: {attempt} response status: {response.status_code}")
    else:
        raise Exception(f"Overpass API request failed with status code {response.status_code}")

    # %%
    # get the "response.content" into a pandas dataframe
    import pandas as pd
    from io import StringIO
    ddf = pd.read_csv(StringIO(response.text), sep='\t')

    # remove rows where name does not contain 'Sachi' ignoring case
    mask=ddf['name'].str.contains('Sachi', case=False, na=False)
    ddf=ddf[mask]    
    ddf['district']=rel
    ddf['district_name']=df[df['relation_id']==rel]['name'].values[0]
    #append to amenity_df dataframe
    try:
        amenity_df = pd.concat([amenity_df, ddf], ignore_index=True)
    except NameError:
        amenity_df = ddf 
    time.sleep(5)  # to avoid overloading the server

In [None]:
# categorize amenity_df by 'district_name' and 'type' and display counts
district_counts = amenity_df.groupby(['district_name']).size()
# total number of amenities found
total_amenities =  amenity_df.groupby(['district_name']).size().sum()
print(f"Total number of amenities found: {total_amenities}")

print(district_counts)
# count total number of amenities found
print(f"Total number of districts with amenities: {len(district_counts)}")

# list zero count districts by district_name not in df['district_name']
zero_count_districts = df[~df['name'].isin(amenity_df['district_name'])]['name'].tolist()
print("Districts with zero amenities found:")
print(zero_count_districts)

In [None]:
# categorize amenity_df by 'district_name' and 'type' and display counts
type_counts = amenity_df.groupby(['@type','district_name']).size()
print(type_counts)
# type of amenities
total_type_amenities=amenity_df.groupby(['@type']).size()
print(total_type_amenities)
# total number of amenities found
total_amenities =  amenity_df.groupby(['@type','district_name']).size().sum()
print(f"Total number of amenities found: {total_amenities}")