In [13]:
from requests import request
import json
from os import makedirs, path
from geopy.geocoders import Nominatim

In [3]:
with open("../datasets/original/organization_en.json", 'r') as fp:
    dep_data = json.load(fp)

In [11]:
def save_dataset(dataset, name, file_format):
    """
    Save the dataset given in input

    :param dataset: Dataset to save
    :param name: Name of the dataset
    :param file_format: Format the dataset should be saved in
    """
    filename = f'../datasets/{name}.{file_format}'
    makedirs(path.dirname(filename), exist_ok=True)
    with open(filename, 'w', encoding='utf-8') as f:
        if file_format == 'json':
            json.dump(dataset, f, indent=2)

def set_total_size(dataset):
    """
    Sets the values of the 'total' and 'size' fields in a dictionary

    :param dataset: Dataset to manipulate
    """
    dataset['value']['total'] = len(dataset['value']['data'])
    dataset['value']['size'] = len(dataset['value']['data'])

def get_address_information(address):
    """
    Fetch and return the address information from OpenStreetMap

    :param address: Address to look up in OSM
    :return: Dictionary with the information for the input address
    """
    # Check the Nominatim TOS before using this, it allows maximum 1 request per second
    # https://operations.osmfoundation.org/policies/nominatim/
    # Also check the OSM wiki regarding the API
    # https://wiki.openstreetmap.org/wiki/API_v0.6
    geolocator = Nominatim(
        user_agent="Mozilla/5.0 (Windows NT 10.0; rv:105.0) Gecko/20100101 Firefox/105.0")
    query = geolocator.geocode(query=address)
    if query:
        r = request('get', f"https://www.openstreetmap.org/api/0.6/way/{query.raw.get('osm_id')}.json", headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:106.0) Gecko/20100101 Firefox/106.0'})
        if r.ok:
            return json.loads(r.text)
        else:
            return {}
    else:
        return {}

def append_data(dataset, to_append):
    """
    Utility function to mask away some code and make it more readable

    :param dataset: Dataset to append data to
    :param to_append: Data to append
    """
    dataset['value']['data'].append(to_append)

In [14]:
addresses = set()
for organization in dep_data['value']['data']:
    addresses.add(organization['address'])

addresses.remove('')

osm_data = {
        "value": {
            "total": 0,
            "size": 0,
            "language": "en",
            "data": []
        }
    }
tags_to_use = {'addr:city',
               'addr:country',
               'addr:housenumber',
               'addr:postcode',
               'addr:street',
               'alt_name',
               'amenity',
               'email',
               'long_name',
               'name',
               'name:en',
               'name:it',
               'old_name',
               'opening_hours',
               'phone',
               'short_name',
               'website',
               'wheelchair'}
for address in addresses:
    info = get_address_information(address)
    tags = {}
    if bool(info):
        for tag in tags_to_use:
            tags[tag] = info['elements'][0]['tags'][tag] if tag in info['elements'][0]['tags'] else ''
        tags['timestamp'] = info['elements'][0]['timestamp']
    append_data(osm_data, {'address': address, 'osm_tags': tags})
    time.sleep(1.5)
set_total_size(osm_data)
save_dataset(osm_data, 'generated/buildings', 'json')
