<h1 align='center'>Collecting POI data of Bengaluru City from open street maps (OSM)</h1>

We will use overpy library which is a python client for OverpassAPI 

In [1]:
import overpy
import pandas as pd

### Let's define a dictionary to store fetched POI data

In [2]:
# Defining dictionary to store data
data = {"id":[] , "category":[], "sub_category":[], "lat":[], "lon":[] }

### Let's define some functions to automate the process of fetching POI data from OverpassAPI

In [3]:
def add_data(category, sub_category, ways):
    """
    Helper function to add results of API call to data dictionary
    """
    for way in ways:
        data["id"].append(way.id)
        data["category"].append(category)
        data["sub_category"].append(sub_category)
        data["lat"].append(float(way.center_lat))
        data["lon"].append(float(way.center_lon))

In [4]:
def get_query_string(key, value=None, city="Bengaluru"):
    """
    Helper function to create query string
    """
    if value is None:
        query_string = f"""
        [out:json];
        area[name="{city}"];
        way["{key}"](area);
        out center;    
    """
    else:
        query_string = f"""
            [out:json];
            area[name="{city}"];
            way["{key}"="{value}"](area);
            out center;    
        """
        
    return query_string     

In [5]:
def api_call(key, category="", value_list= None, value=None, city="Bengaluru"):
    """
    Custom Function to call OverpassAPI
    """
    api = overpy.Overpass()
    
    if value_list is not None:
        for val in value_list:
            query_string = get_query_string(key=key, value=val, city=city)
            result = api.query(query_string)
            add_data(category, val, result.ways)
    elif value is not None:
        get_query_string(key, value, city)
        result = api.query(query_string)
        add_data(category, key, result.ways)
    else:
        query_string = get_query_string(key=key, city=city)
        result = api.query(query_string)
        add_data(category, key, result.ways)

### Fetch all POIs related to Sustenance

In [6]:
# Sustenance
sustenance = ["bar", "bbq", "cafe", "biergarten", "fast_food", "food_court", "pub", "restaurant", "ice_cream"]
api_call(key="amenity", category="sustenance", value_list=sustenance)
len(data["id"])

140

### Fetch all POIs related to Education

In [7]:
# Education
education = ["college", "school", "university", "library", "language_school", "kindergarten", "driving_school"]
api_call(key="amenity", category="education", value_list=education)
len(data["id"])

745

### Fetch all POIs related to Healthcare

In [8]:
# Healthcare
healthcare = ["hospital", "clinic", "nursing_home", "pharmacy", "veterinary", "doctors", "dentist"]
api_call(key="amenity", category="healthcare", value_list=healthcare)
len(data["id"])

923

### Fetch all POIs related to Entertainment

In [9]:
# Entertainment
entertainment = ["arts_centre", "casino", "cinema", "communtity_centre", "gambling", "nightclub", "planetarium", "theater"]
api_call(key="amenity", category="entertainment", value_list=entertainment)
len(data["id"])

967

In [10]:
api_call(key="leisure", category="entertainment")
len(data["id"])

2789

### Fetch all POIs related to Accomodation

In [11]:
# Accomodation
accomodation = ["apartments", "bunglow", "dormitory", "house", "residential"]
api_call(key="building", category="accomodation", value_list=accomodation)
len(data["id"])

5750

### Fetch all POIs related to Commercial Activites

In [12]:
# Commercial
commercial = ["commercial", "industrial", "kiosk", "office", "retail", "supermarket", "warehouse"]
api_call(key="building", category="commercial", value_list=commercial)
len(data["id"])

6685

### Fetch all POIs related to Religious Activities

In [13]:
# Religious
religious = ["cathedral", "chapel", "church", "mosque", "religious", "shrine", "temple"]
api_call(key="building", category="religious", value_list=religious)
len(data["id"])

6765

### Fetch all POIs related to Agriculture

In [14]:
# Agriculture
agriculture = ["barn", "conservatory", "cowshed", "farm_auxiliary", "greenhouse", "stable", "sty"]
api_call(key="building", category="agriculture", value_list=agriculture)
len(data["id"])

6767

### Fetch all POIs related to History

In [15]:
# Historic
api_call(key="historic", category="historic")
len(data["id"])

6786

### Fetch all POIs related to Tourism

In [16]:
# Tourism
api_call(key="tourism", category="tourism")
len(data["id"])

6932

### Creating pandas dataframe to store all POI data fetched from OSM

In [17]:
# Creating dataframe to store data
df = pd.DataFrame(data)

In [18]:
df.head()

Unnamed: 0,id,category,sub_category,lat,lon
0,346745629,sustenance,bar,12.960247,77.646907
1,347188390,sustenance,bar,12.933091,77.630678
2,351811154,sustenance,bar,13.002424,77.633937
3,609903172,sustenance,bar,12.91295,77.683123
4,181695452,sustenance,cafe,12.979373,77.714872


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6932 entries, 0 to 6931
Data columns (total 5 columns):
id              6932 non-null int64
category        6932 non-null object
sub_category    6932 non-null object
lat             6932 non-null float64
lon             6932 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 270.9+ KB


In [22]:
df.shape

(6932, 5)

## Removing duplicates rows based on their IDs

In [24]:
# Deleting duplicate rows based on id
df.drop_duplicates(subset ="id", 
                     keep = False, inplace = True) 

In [25]:
df.shape

(6879, 5)

## Save the collected POI data as a CSV file: *bengaluru_geo_data.csv*

In [27]:
df.to_csv("bengaluru_geo_data.csv", index=False)

<hr>