In [1]:
import pandas as pd
import io
import requests
from retrying import retry
import pickle
from collections import defaultdict


################################
# Useful functions
#################################

def retry_if_connection_error(exception):
    return isinstance(exception, ConnectionError)

def retry_if_status_code_not_200(result):
    return result.status_code!=200

# if exception retry with 2 second wait  
@retry(retry_on_exception=retry_if_connection_error, retry_on_result=retry_if_status_code_not_200, wait_fixed=2000)
def safe_request(url, **kwargs):
    return requests.get(url, **kwargs)


######################
# Obtain US state ids
######################
overpass_url = "https://overpass-api.de/api/interpreter"

# Residential lines
overpass_query = f"""
[out:csv(::id,"ISO3166-2",name)];
rel[boundary=administrative][admin_level=4]["ISO3166-2"~"^US"];
map_to_area;
out tags;
"""
response = requests.get(overpass_url, params={'data': overpass_query})

us_state_names = pd.read_csv(io.StringIO(response.text))['@id\tISO3166-2\tname'].str.split('\t',expand=True)
us_state_names.columns = ['overpass_id', 'state_short', 'state']
us_state_names = us_state_names.sort_values('state')

# Not states:
# American Samoa, District of Columbia,
# Guam, Northern Mariana Islands,
# Puerto Rico, United States Virgin Islands
us_state_names = us_state_names[(us_state_names['state']!='American Samoa') &
                                (us_state_names['state']!='District of Columbia') &
                                (us_state_names['state']!='Guam') &
                                (us_state_names['state']!='Northern Mariana Islands') &
                                (us_state_names['state']!='Puerto Rico') &
                                (us_state_names['state']!='United States Virgin Islands')]

state_overpass_ids = list(us_state_names['overpass_id'])

print(len(state_overpass_ids))

50


In [14]:
feature_query_dict = {}

####################
# roads (3 features): total length (in km)
####################
# residential
types = ['residential']
query_string = ""
for type in types:
    query_string+=f"way(area.searchArea)[highway={type}]; "
feature_query_dict['residential_roads'] = query_string

# other
types = ['primary', 'secondary', 'tertiary', 'unclassified', 'service', 'primary_link',
'secondary_link', 'tertiary_link','living_street', 'pedestrian', 'track', 'road']
query_string = ""
for type in types:
    query_string+=f"way(area.searchArea)[highway={type}]; "
feature_query_dict['other_roads'] = query_string

# main
main_road_type = ['motorway', 'trunk', 'motorway_link', 'trunk_link']
query_string = ""
for type in types:
    query_string+=f"way(area.searchArea)[highway={type}]; "
feature_query_dict['main_roads'] = query_string

################
# land use (5 features): total area (in km2)
################
# residential
types = ['residential']
query_string = ""
for type in types:
    query_string+=f"relation(area.searchArea)[landuse={type}]; "
feature_query_dict['residential_land_use'] = query_string
# commercial
types = ['commercial']
query_string = ""
for type in types:
    query_string+=f"relation(area.searchArea)[landuse={type}]; "
feature_query_dict['commercial_land_use'] = query_string
# industrial
types = ['industrial', 'garages', 'port', 'quarry']
query_string = ""
for type in types:
    query_string+=f"relation(area.searchArea)[landuse={type}]; "
feature_query_dict['industrial_land_use'] = query_string
# retail
types = ['retail']
query_string = ""
for type in types:
    query_string+=f"relation(area.searchArea)[landuse={type}]; "
feature_query_dict['retail_land_use'] = query_string
# natural
landuse_types = ['farmland', 'farmyard', 'forest', 'grass',
        'greenfield', 'greenhouse_horticulture',
        'meadow', 'orchard', 'plant_nursery',
        'recreation_ground', 'village_green', 'vineyard']
leisure_types = ['park', 'garden', 'common', 'dog_park','nature_reserve', 'playground']
boundary_types = ['national_park', 'protected_area']
building_types = ['greenhouse']
query_string = ""
for type in landuse_types:
    query_string+=f"relation(area.searchArea)[landuse={type}]; "
for type in leisure_types:
    query_string+=f"relation(area.searchArea)[leisure={type}]; "
for type in boundary_types:
    query_string+=f"relation(area.searchArea)[boundary={type}]; "
for type in building_types:
    query_string+=f"relation(area.searchArea)[building={type}]; "
feature_query_dict['natural_land_use'] = query_string

################
# transport (2 features): count
################
# point
amenity_types = ['bus_station', 'car_rental', 'ferry_terminal']
public_transport_types = ['station', 'platform']
query_string = ""
for type in amenity_types:
    query_string+=f"node(area.searchArea)[amenity={type}]; "
for type in public_transport_types:
    query_string+=f"node(area.searchArea)[public_transport={type}]; "
feature_query_dict['point_transport'] = query_string
# building
amenity_types = ['bus_station', 'car_rental', 'ferry_terminal']
building_types = ['train_station', 'transportation', 'parking']
public_transport_types = ['station', 'platform']
query_string = ""
for type in amenity_types:
    query_string+=f"relation(area.searchArea)[amenity={type}]; "
for type in building_types:
    query_string+=f"relation(area.searchArea)[building={type}]; "
for type in public_transport_types:
    query_string+=f"relation(area.searchArea)[public_transport={type}]; "
feature_query_dict['building_transport'] = query_string

################
# food (2 features): count
################
# point
amenity_types = ['bar', 'biergarten', 'cafe', 'fast_food',
                                       'food_court', 'ice_cream', 'pub', 'restaurant']
shop_types = ['alcohol', 'bakery', 'beverages',
                                    'brewing_supplies', 'butcher', 'cheese',
                                    'chocolate', 'coffee', 'confectionery',
                                    'convenience', 'deli', 'dairy', 'farm',
                                    'frozen_food', 'greengrocer', 'health_food',
                                    'ice_cream', 'organic', 'pasta', 'pastry',
                                    'seafood', 'spices', 'tea', 'water',
                                    'department_store', 'general', 'kiosk', 'mall',
                                    'supermarket', 'wholesale']
query_string = ""
for type in amenity_types:
    query_string+=f"node(area.searchArea)[amenity={type}]; "
for type in shop_types:
    query_string+=f"node(area.searchArea)[shop={type}]; "
feature_query_dict['point_food'] = query_string
# building
amenity_types = ['bar', 'biergarten', 'cafe', 'fast_food',
                                      'food_court', 'ice_cream', 'pub', 'restaurant']
shop_types = ['alcohol', 'bakery', 'beverages',
                                   'brewing_supplies', 'butcher', 'cheese',
                                   'chocolate', 'coffee', 'confectionery',
                                   'convenience', 'deli', 'dairy', 'farm',
                                   'frozen_food', 'greengrocer', 'health_food',
                                   'ice_cream', 'organic', 'pasta', 'pastry',
                                   'seafood', 'spices', 'tea', 'water',
                                   'department_store', 'general', 'kiosk', 'mall',
                                   'supermarket', 'wholesale']
query_string = ""
for type in amenity_types:
    query_string+=f"relation(area.searchArea)[amenity={type}]; "
for type in shop_types:
    query_string+=f"relation(area.searchArea)[shop={type}]; "
feature_query_dict['building_food'] = query_string

################
# health (2 features): count
################
# point
types = ['clinic', 'dentist', 'doctors', 'hospital',
        'pharmacy', 'social_facility', 'veterinary']
query_string = ""
for type in types:
    query_string+=f"node(area.searchArea)[amenity={type}]; "
feature_query_dict['point_health'] = query_string
# building
amenity_types = ['clinic', 'dentist', 'doctors', 'hospital',
                                        'pharmacy', 'social_facility', 'veterinary']
building_types = ['hospital']
query_string = ""
for type in amenity_types:
    query_string+=f"relation(area.searchArea)[amenity={type}]; "
for type in building_types:
    query_string+=f"relation(area.searchArea)[building={type}]; "
feature_query_dict['building_health'] = query_string

################
# education (2 features): count
################
# point
types = ['college', 'kindergarten', 'library',
        'school', 'university', 'research_institute',
        'music_school', 'language_school']
query_string = ""
for type in types:
    query_string+=f"node(area.searchArea)[amenity={type}]; "
feature_query_dict['point_education'] = query_string
# building
amenity_types = ['college', 'kindergarten', 'library',
                'school', 'university', 'research_institute',
                'music_school', 'language_school']
building_types = ['kindergarten', 'school', 'university']
query_string = ""
for type in amenity_types:
    query_string+=f"relation(area.searchArea)[amenity={type}]; "
for type in building_types:
    query_string+=f"relation(area.searchArea)[building={type}]; "
feature_query_dict['building_education'] = query_string

################
# retail (2 features): count
################
# point
shop_types = ['alcohol', 'bakery', 'beverages',
                                      'brewing_supplies', 'butcher', 'cheese',
                                      'chocolate', 'coffee', 'confectionery',
                                      'convenience', 'deli', 'dairy', 'farm',
                                      'frozen_food', 'greengrocer', 'health_food',
                                      'ice_cream', 'organic', 'pasta', 'pastry',
                                      'seafood', 'spices', 'tea', 'water',
                                      'department_store', 'general', 'kiosk', 'mall',
                                      'supermarket', 'wholesale']
amenity_type = ['marketplace', 'post_office']
highway_types = ['rest_area']
query_string = ""
for type in shop_types:
    query_string+=f"node(area.searchArea)[shop={type}]; "
for type in amenity_type:
    query_string+=f"node(area.searchArea)[amenity={type}]; "
for type in highway_types:
    query_string+=f"node(area.searchArea)[highway={type}]; "
feature_query_dict['point_retail'] = query_string
# building
shop_types = ['alcohol', 'bakery', 'beverages',
                                     'brewing_supplies', 'butcher', 'cheese',
                                     'chocolate', 'coffee', 'confectionery',
                                     'convenience', 'deli', 'dairy', 'farm',
                                     'frozen_food', 'greengrocer', 'health_food',
                                     'ice_cream', 'organic', 'pasta', 'pastry',
                                     'seafood', 'spices', 'tea', 'water',
                                     'department_store', 'general', 'kiosk', 'mall',
                                     'supermarket', 'wholesale']
amenity_type = ['marketplace', 'post_office']
highway_types = ['rest_area']
query_string = ""
for type in shop_types:
    query_string+=f"relation(area.searchArea)[shop={type}]; "
for type in amenity_type:
    query_string+=f"relation(area.searchArea)[amenity={type}]; "
for type in highway_types:
    query_string+=f"relation(area.searchArea)[highway={type}]; "
feature_query_dict['building_retail'] = query_string

print(len(feature_query_dict.keys()))
print(feature_query_dict.keys())

18
dict_keys(['residential_roads', 'other_roads', 'main_roads', 'residential_land_use', 'commercial_land_use', 'industrial_land_use', 'retail_land_use', 'natural_land_use', 'point_transport', 'building_transport', 'point_food', 'building_food', 'point_health', 'building_health', 'point_education', 'building_education', 'point_retail', 'building_retail'])


In [124]:
response_data_dict = defaultdict(dict)
final_data_dict = defaultdict(dict)

for overpass_id in state_overpass_ids:
  
  # roads
  for road_type in ['residential_roads', 'other_roads', 'main_roads']:
    overpass_query = f"""
    [out:json];
    area({overpass_id})->.searchArea;
    (
      {feature_query_dict[road_type] }
    );
    make stat number=count(ways),length=sum(length());
    out;
    """

    response = safe_request(overpass_url, params={'data': overpass_query})
    data = response.json()

    response_data_dict[overpass_id][road_type] = data
    #final_data_dict[overpass_id][road_type+'number'] = data['elements'][0]['tags']['number']
    try:
      final_data_dict[overpass_id][road_type+'_length'] = data['elements'][0]['tags']['length'] 
    except:
      print(overpass_id, road_type)

  # land use TBA (5 features)

  # all other
  all_other_list = ['point_transport', 'building_transport', 'point_food', 'building_food',
                    'point_health', 'building_health', 'point_education', 'building_education',
                    'point_retail', 'building_retail']
  for type in all_other_list:
    overpass_query = f"""
    [out:json];
    area({overpass_id})->.searchArea;
    (
      {feature_query_dict[type]}
    );
    out count;
    """
    response = safe_request(overpass_url, params={'data': overpass_query})
    data = response.json()

    response_data_dict[overpass_id][type] = data

    try:
      final_data_dict[overpass_id][type] = data['elements'][0]['tags']['total']
    except:
      print(overpass_id, type)

with open('response_data_dict.pickle', 'wb') as handle:
    pickle.dump(response_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('final_data_dict.pickle', 'wb') as handle:
    pickle.dump(final_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

3600165475 other_roads
3600165475 main_roads
3600165475 point_food
3600165475 point_retail
3600162050 other_roads
3600162050 main_roads
3600162050 point_food
3600162050 point_retail
3600161957 other_roads
3600161957 main_roads
3600122586 other_roads
3600165789 main_roads
3600061320 other_roads
3600061320 main_roads
3600224045 other_roads
3600224045 main_roads
3600162061 other_roads
3600162109 main_roads
3600114690 other_roads
3600114690 main_roads
3600224042 other_roads
3600224042 main_roads


In [2]:
with open('../Input_datasets/Mobility_flow_prediction_shared/Large_files/response_data_dict.pickle', 'rb') as f:
    response_data_dict = pickle.load(f)

with open('../Input_datasets/Mobility_flow_prediction_shared/Large_files/final_data_dict.pickle', 'rb') as f:
    final_data_dict = pickle.load(f)

In [13]:
pd.DataFrame(final_data_dict).transpose().isna().sum()

residential_roads_length    0
other_roads_length          9
main_roads_length           9
point_transport             0
building_transport          0
point_food                  2
building_food               0
point_health                0
building_health             0
point_education             0
building_education          0
point_retail                2
building_retail             0
dtype: int64

In [21]:
final_df = pd.DataFrame(final_data_dict).transpose().fillna(0).reset_index().rename(columns={'index':'overpass_id'})
final_df = pd.merge(final_df, us_state_names, on='overpass_id')
final_df.to_csv("../Input_datasets/Mobility_flow_prediction_shared/overpass_features.csv")

# Debugging unparsed data

In [8]:
response_data_dict.keys()

dict_keys(['3600161950', '3601116270', '3600162018', '3600161646', '3600165475', '3600161961', '3600165794', '3600162110', '3600162050', '3600161957', '3600166563', '3600162116', '3600122586', '3600161816', '3600161650', '3600161644', '3600161655', '3600224922', '3600063512', '3600162112', '3600061315', '3600165789', '3600165471', '3600161943', '3600161638', '3600162115', '3600161648', '3600165473', '3600067213', '3600224951', '3600162014', '3600061320', '3600224045', '3600161653', '3600162061', '3600161645', '3600165476', '3600162109', '3600392915', '3600224040', '3600161652', '3600161838', '3600114690', '3600161993', '3600060759', '3600224042', '3600165479', '3600162068', '3600165466', '3600161991'])

In [18]:
final_data_dict['3600161950']

{'residential_roads_length': '126645806.987',
 'other_roads_length': '108306315.09',
 'main_roads_length': '108306315.09',
 'point_transport': '900',
 'building_transport': '1',
 'point_food': '2512',
 'building_food': '11',
 'point_health': '358',
 'building_health': '21',
 'point_education': '1778',
 'building_education': '169',
 'point_retail': '949',
 'building_retail': '7'}

In [16]:
overpass_query = f"""
    [out:json];
    area({3600165475})->.searchArea;
    (
      {feature_query_dict['other_roads'] }
    );
    make stat number=count(ways),length=sum(length());
    out;
    """

response = safe_request(overpass_url, params={'data': overpass_query})
data = response.json()

In [19]:
feature_query_dict['other_roads']

'way(area.searchArea)[highway=primary]; way(area.searchArea)[highway=secondary]; way(area.searchArea)[highway=tertiary]; way(area.searchArea)[highway=unclassified]; way(area.searchArea)[highway=service]; way(area.searchArea)[highway=primary_link]; way(area.searchArea)[highway=secondary_link]; way(area.searchArea)[highway=tertiary_link]; way(area.searchArea)[highway=living_street]; way(area.searchArea)[highway=pedestrian]; way(area.searchArea)[highway=track]; way(area.searchArea)[highway=road]; '

In [21]:
data

{'version': 0.6,
 'generator': 'Overpass API 0.7.59.2 0994154d',
 'osm3s': {'timestamp_osm_base': '2023-03-05T11:09:38Z',
  'timestamp_areas_base': '2023-03-05T10:46:23Z',
  'copyright': 'The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.'},
 'elements': [],
 'remark': 'runtime error: Query timed out in "area-query" at line 5 after 181 seconds.'}

In [20]:
us_state_names

Unnamed: 0,overpass_id,state_short,state
19,3600161950,US-AL,Alabama
53,3601116270,US-AK,Alaska
25,3600162018,US-AZ,Arizona
10,3600161646,US-AR,Arkansas
38,3600165475,US-CA,California
21,3600161961,US-CO,Colorado
42,3600165794,US-CT,Connecticut
31,3600162110,US-DE,Delaware
26,3600162050,US-FL,Florida
20,3600161957,US-GA,Georgia


# Ad-hoc, messy

In [None]:
import requests

def get_restaurant_count(city):
    overpass_url = "https://overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
    area["name"="{city}"]->.searchArea;
    (
      node["amenity"="restaurant"](area.searchArea);
    );
    out count;
    """
    response = requests.get(overpass_url, params={'data': overpass_query})
    print(response.status_code)
    data = response.json()
    return data['elements'][0]

print("Number of restaurants in New York:", get_restaurant_count("Budapest"))

In [60]:
overpass_url = "https://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
area["name"="Alabama"]->.searchArea;
(
  relation(area.searchArea)[boundary='national_park'];
);
out geom;
"""
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()

In [71]:
data['elements'][0]['members'][3]

{'type': 'way',
 'ref': 129319790,
 'role': 'outer',
 'geometry': [{'lat': 34.7925014, 'lon': -87.3695333},
  {'lat': 34.7929821, 'lon': -87.3694672},
  {'lat': 34.7931946, 'lon': -87.3695481},
  {'lat': 34.7935666, 'lon': -87.3691274},
  {'lat': 34.7942575, 'lon': -87.3704055},
  {'lat': 34.7944435, 'lon': -87.3711012},
  {'lat': 34.7948553, 'lon': -87.3716836},
  {'lat': 34.7952406, 'lon': -87.3725734},
  {'lat': 34.7954664, 'lon': -87.3734146},
  {'lat': 34.7960776, 'lon': -87.3744338},
  {'lat': 34.7967364, 'lon': -87.3760367}]}

In [13]:
# Residential lines
final_data_dict = {}

for overpass_id in state_overpass_ids:
  
  overpass_query = f"""
  [out:json];
  area({overpass_id}})->.searchArea;
  (
    {query_string}
  );
  make stat number=count(ways),length=sum(length());
  out;
  """
  response = requests.get(overpass_url, params={'data': overpass_query})
  data = response.json()

In [72]:
# roads
road_types = ['residential', 'primary', 'secondary', 'tertiary',
                'unclassified', 'service', 'primary_link',
              'secondary_link', 'tertiary_link',
               'living_street', 'pedestrian', 'track', 'road',
             'motorway', 'trunk',
             'motorway_link', 'trunk_link']
query_string = ""
for road_type in road_types:
    query_string+=f"way(area.searchArea)[highway={road_type}]; "

overpass_query = f"""
[out:json];
area(3600161950)->.searchArea;
(
  {query_string}
);
make stat number=count(ways),length=sum(length());
out;
"""
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()

In [73]:
data['elements'][0]['tags']['number']
data['elements'][0]['tags']['length']

'244810760.77399'

In [22]:
len(data['elements'])

1

In [10]:
len(data['elements'])

614272

In [111]:
len(data['elements'])

11518

In [25]:
data['elements'][0]

{'type': 'way',
 'id': 6197466,
 'timestamp': '2012-12-27T02:31:55Z',
 'version': 2,
 'changeset': 14420692,
 'user': 'bot-mode',
 'uid': 451693,
 'nodes': [51579309, 51579310, 51579312, 51579315],
 'tags': {'highway': 'residential',
  'name': 'Odell Street',
  'tiger:cfcc': 'A41',
  'tiger:county': 'Autauga, AL',
  'tiger:name_base': 'Odell',
  'tiger:name_type': 'St',
  'tiger:reviewed': 'no',
  'tiger:zip_left': '36066',
  'tiger:zip_right': '36066'}}

In [None]:
overpass_url = "https://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
area["name"="Alabama"]->.searchArea;
(
  node(area.searchArea)[building];
  node(area.searchArea)[highway];
  node(area.searchArea)[railway];
);
out body;
"""
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()

In [97]:
data['elements'][0]

{'type': 'way',
 'id': 6227017,
 'nodes': [51952114, 51952117, 51881208],
 'tags': {'highway': 'unclassified',
  'name': 'J J Taylor Trail',
  'note:old_railway_operator': 'Gulf Ports Terminal Railway',
  'old_railway_operator': 'Frisco',
  'railway': 'abandoned',
  'surface': 'unpaved',
  'tiger:cfcc': 'A41',
  'tiger:county': 'Baldwin, AL',
  'tiger:name_base': 'J J Taylor',
  'tiger:name_type': 'Trl',
  'tiger:reviewed': 'no'}}

In [93]:
len(data['elements'])

6926

In [22]:
import geojson
with open('myfile.geojson') as f:
    gj = geojson.load(f)
features = gj['features'][0]

In [32]:
gj['features'][520]

{"geometry": {"coordinates": [-117.072603, 33.019965], "type": "Point"}, "properties": {"access": null, "aeroway": null, "amenity": "restaurant", "beds": null, "building": null, "capacity": null, "denomination": null, "emergency": null, "fuel": null, "health_facility_bed": null, "health_facility_level": null, "health_facility_type": null, "healthcare": null, "historic": null, "isced_level": null, "landuse": null, "leisure": null, "medical_system_western": null, "name": "Casa Lahori", "opening_hours": null, "operator": null, "operator_type": null, "osm_id": 2397160592, "public_transport": null, "religion": null, "rooms": null, "shop": null, "staff_count_doctors": null, "staff_count_nurses": null, "status": null, "toilets_disposal": null, "toilets_handwashing": null, "tourism": null}, "type": "Feature"}