In [1]:
import requests
import json
import overpy as opy
import pandas as pd
from bs4 import BeautifulSoup

Here will be using an example, to see the capabilites of what we can do with OSM data. I selected some vineyards in France

In [2]:
#How to access the overpass query interperter

api = opy.Overpass()
overpass_query = api.query("""
[bbox:44.453388800301774,-0.56304931640625,46.240651955001695,2.3345947265625]
[timeout:25]
;
(
  node["landuse"="vineyard"];
  way["landuse"="vineyard"];
  relation["landuse"="vineyard"];
);
out center;
""")




I'll also be adding a function that generates a query statement, based on a user defined bounding box

In [2]:
def overpass_query_constructor_bbox(min_lat, min_lon, max_lat, max_lon, key = 'landuse', value = 'vineyard', timeout = 25):

    '''
    Constructs a query using overpy. First a bounding box using a min_lat min_lon, max_lat and max long. and then you
    can select the catefory

        Parameters:
            min_lat (float): Minimum Latitude (WGS-84)
            min_lon (float): Minimum Longitude (WGS-84)
            max_lat (float): Maximum Latitude (WGS-84)
            max_lon (float): Maximum Longitude (WGS-84)
            key (str): Key for OSM category (ex: https://wiki.openstreetmap.org/wiki/Key:landuse)
            value (str): Value for OSM category
            timeout (int): Timeout for query (seconds), if your query is timeout might be a lot of data.


        Returns:
            query_creator (str): Query to run using the overpy api


    '''

    #constructs a query based on a key, value and you can set timeout also
    query_creator = """
    [bbox: {0}, {1}, {2}, {3}]
    [timeout:{4}]
    ;
    (
        node[{5} = {6}];
        way[{5} = {6}];
        relation[{5} = {6}];
    );
    out center;

    """.format(str(min_lat), str(min_lon), str(max_lat), str(max_lon), str(timeout), key, value)

    return query_creator
    

It might be useful to see what kind of values exist for specific keys as well. For that we will scrape OSM's wikipedia and
pull out the table from the values column

In [8]:
def view_values(key, base_url = "http://wiki.openstreetmap.org/wiki/Key:"):
    '''
    Scrape OSM data given a specific key. Returns a table containg, the key, value, element, description, and rendering

        Parameters:
            key (str): A key from OSM. Here is
            an example of the key landuse https://wiki.openstreetmap.org/wiki/Key:landuse


        Returns:
            final_table (str): Returns a table containing all of the different valuies, elements and descriptions.


    '''


    full_url = base_url + key
    #send a requests to the created URL
    response = requests.get(full_url)

    #instantiate our beautiful soup object
    soup = BeautifulSoup(response.text, 'html.parser')
    soup_response = soup.find('table', {'class':'wikitable'})

    #put into a table pandas table
    read_html = pd.read_html(str(soup_response))
    final_frame = pd.DataFrame(read_html[0])

    return final_frame


Now let's see the keys from the landuse value.

In [9]:
landuse_table = view_values('landuse')
landuse_table

Unnamed: 0_level_0,Key,Value,Element,Description,Rendering,Photo,Unnamed: 6_level_0
Unnamed: 0_level_1,Common landuse key values - developed land,Common landuse key values - developed land,Common landuse key values - developed land,Common landuse key values - developed land,Common landuse key values - developed land,Common landuse key values - developed land,Common landuse key values - developed land
0,landuse,commercial,,Predominantly commercial businesses and their ...,,,
1,landuse,construction,,A site which is under active development and c...,,,
2,landuse,education,,An area predominately used for educational pur...,,,
3,landuse,industrial,,Predominantly industrial landuses such as work...,,,
4,landuse,residential,,Land where people reside; predominantly reside...,,,
5,landuse,retail,,Predominantly retail businesses such as shops....,,,
6,landuse,institutional,,"Land used for institutional purposes, see Inst...",,,
7,Key,Value,Element,Description,Rendering,Photo,
8,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...,Common landuse key values - rural and agricult...
9,landuse,aquaculture,,Aquaculture is the farming of freshwater and s...,currently not rendered by osm-carto,,


Example Box
44.453388800301774,-0.56304931640625,46.240651955001695,2.3345947265625

In [4]:
the_query = overpass_query_constructor_bbox(44.453388800301774, -0.56304931640625, 46.240651955001695, 2.3345947265625, timeout = 30)
print(the_query)


    [bbox: 44.453388800301774, -0.56304931640625, 46.240651955001695, 2.3345947265625]
    [timeout:30]
    ;
    (
        node[landuse = vineyard];
        way[landuse = vineyard];
        relation[landuse = vineyard];
    );
    out center;

    


Let's now save all of the output from OSM into coordinates. For the ways and relations, we
are taking the center polygon.

In [3]:
node_coords = [(float(node.lon), float(node.lat)) 
           for node in overpass_query.nodes]
way_coords = [(float(way.center_lon), float(way.center_lat)) 
           for way in overpass_query.ways]
relation_coords = [(float(rel.center_lon), float(rel.center_lat)) 
           for rel in overpass_query.relations]

Let's see what information we can get out of this using the overpy package

In [4]:
def metadata_info(result_overpass):

    coords = []

    print("Number of ways: ", len(overpass_query.ways))
    print("Number of nodes: ", len(overpass_query.nodes))
    print("Number of relations: ", len(overpass_query.relations))



metadata_info(overpass_query)

Number of ways:  10436
Number of nodes:  2
Number of relations:  460


In [5]:
print(overpass_query.ways[0])

<overpy.Way id=25308659 nodes=[275673915, 275673935, 275673936, 275673964, 2156225512, 275673915]>


In [6]:
print(overpass_query.ways[0].get_nodes(resolve_missing = True))

[<overpy.Node id=275673915 lat=45.8371811 lon=1.1282429>, <overpy.Node id=275673935 lat=45.8368817 lon=1.1325448>, <overpy.Node id=275673936 lat=45.8357616 lon=1.1319743>, <overpy.Node id=275673964 lat=45.8352961 lon=1.1306867>, <overpy.Node id=2156225512 lat=45.8365380 lon=1.1279466>, <overpy.Node id=275673915 lat=45.8371811 lon=1.1282429>]


Let's see the same thing except with some of the relations

In [7]:
print(overpass_query.relations[0].members)

[<overpy.RelationWay ref=326355422 role=outer>, <overpy.RelationWay ref=326360676 role=outer>, <overpy.RelationWay ref=326328889 role=outer>, <overpy.RelationWay ref=326382332 role=outer>, <overpy.RelationWay ref=326328887 role=outer>, <overpy.RelationWay ref=326240440 role=outer>, <overpy.RelationWay ref=41647159 role=outer>, <overpy.RelationWay ref=326356418 role=outer>, <overpy.RelationWay ref=327061500 role=outer>, <overpy.RelationWay ref=326355627 role=outer>, <overpy.RelationWay ref=326336181 role=outer>, <overpy.RelationWay ref=326329682 role=outer>, <overpy.RelationWay ref=326360675 role=outer>, <overpy.RelationWay ref=41647162 role=inner>, <overpy.RelationWay ref=41647163 role=inner>, <overpy.RelationWay ref=41647164 role=inner>]


In [8]:
print(overpass_query.relations[1].members[0])

<overpy.RelationWay ref=41647395 role=outer>


In [9]:
print(overpass_query.relations[1].members[1].resolve(resolve_missing = True))

<overpy.Way id=41647398 nodes=[511284551, 511284553, 511284556, 511284559, 511284561, 511284566, 511284568, 511284571, 511284574, 511284579, 511284582, 511284585, 511284606, 511284551]>


In [10]:
print(overpass_query.relations[1].members[1].resolve(resolve_missing = True).get_nodes(resolve_missing = True))

[<overpy.Node id=511284551 lat=45.2957196 lon=-0.5742611>, <overpy.Node id=511284553 lat=45.2963902 lon=-0.5740606>, <overpy.Node id=511284556 lat=45.2966443 lon=-0.5739994>, <overpy.Node id=511284559 lat=45.2970968 lon=-0.5754785>, <overpy.Node id=511284561 lat=45.2989639 lon=-0.5761570>, <overpy.Node id=511284566 lat=45.2997947 lon=-0.5804391>, <overpy.Node id=511284568 lat=45.2993153 lon=-0.5838307>, <overpy.Node id=511284571 lat=45.2985870 lon=-0.5835819>, <overpy.Node id=511284574 lat=45.2962429 lon=-0.5839311>, <overpy.Node id=511284579 lat=45.2957817 lon=-0.5761114>, <overpy.Node id=511284582 lat=45.2951633 lon=-0.5762590>, <overpy.Node id=511284585 lat=45.2948259 lon=-0.5746477>, <overpy.Node id=511284606 lat=45.2957218 lon=-0.5742980>, <overpy.Node id=511284551 lat=45.2957196 lon=-0.5742611>]


Let's construct an OSM class on top of this overpy wrapper so that we can extract any data we want given a bounding box or
we can even do it for districts or for other things.

In [None]:
class OSM:

    def __init__()