# Countries

In [None]:
import pandas as pd

URL = 'https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv'
# na_filter=False to avoid interpreting country code "NA" as numpy's NaN value
countries = pd.read_csv(URL, dtype=str, na_filter=False)
countries

In [None]:
def to_ascii(val):
    return val.lower().replace(' ', '-')
    
countries['region'] = countries['region'].apply(to_ascii)
countries['sub-region'] = countries['sub-region'].apply(to_ascii)
countries = countries[['alpha-2', 'name', 'region', 'sub-region']]
countries


In [None]:
countries.to_csv('countries.tsv', encoding='utf8', sep='\t', index=None)

# Borders

In [None]:
# Extracts all country boundaries
! time osmium tags-filter --remove-tags --overwrite --output="temp/country-borders-raw.osm.pbf" planet-*.osm.pbf "wr/admin_level=2"

In [None]:
# Converts to GeoJson
! time osmium export --overwrite --output-format=geojsonseq --output="temp/country-borders-raw.geo.jsonseq" "temp/country-borders-raw.osm.pbf"


In [None]:
import geopandas as gpd

COUNTRY_TAGS = [
        #'boundary',
        #'admin_level',
        'ISO3166-1',
        #'ISO3166-1:alpha2',
        #'ISO3166-1:alpha3',
        #'ISO3166-1:numeric',
        #'ISO3166-2',
        #'name:en',
        #'name',
        #'alt_name',
        #'wikidata',
        #'wikipedia',
        #'geometry',
]

borders = gpd.read_file('temp/country-borders-raw.geo.jsonseq', engine="pyogrio", columns=COUNTRY_TAGS)
borders

In [None]:
borders = borders.dropna()
borders

In [None]:
borders.plot(figsize=(20,20), cmap='tab20')

In [None]:
import json
from tqdm import tqdm

for index in tqdm(range(len(borders))):
    row = borders.iloc[index]
    CC = row['ISO3166-1']
    borders.iloc[[index]].to_file(f"extracts/{CC}.borders.geojson")

# Regions & sub-regions

In [None]:

borders = borders.merge(countries, left_on='ISO3166-1', right_on='alpha-2').drop(columns=['alpha-2'])
borders

In [None]:
regions = borders[['ISO3166-1','region','geometry']].dissolve(by='region', aggfunc=';'.join).reset_index()
regions.plot(figsize=(20,20), cmap='tab20')

In [None]:
regions

In [None]:
sub_regions = borders[['ISO3166-1','sub-region','geometry']].dissolve(by='sub-region', aggfunc=';'.join).reset_index()
sub_regions.plot(figsize=(20,20), cmap='tab20')

In [None]:
#from shapely.geometry import box
#bbox = box(-180, -90, +180, +90)

#sub_regions['geometry'] = sub_regions['geometry'].simplify(0.1)
#sub_regions['geometry'] = sub_regions['geometry'].buffer(1).simplify(1).clip(bbox)
#sub_regions.geometry.concave_hull.plot(cmap='tab20', figsize=(20,20))
#sub_regions.plot(cmap='tab20', figsize=(20,20))

In [None]:
sub_regions

# Split planet into sub-regions

In [None]:
import json
from tqdm import tqdm

extracts = []

for index in tqdm(range(len(sub_regions))):
    row = sub_regions.iloc[index]
    SR = row['sub-region']
    #print(SR)
    
    sub_regions.iloc[[index]].to_file(f"temp/{SR}.borders.geojson")

    extracts.append({
        'output': f'temp/{SR}.osm.pbf',
        'output_format': 'pbf',
        'multipolygon': {
            "file_name": f"{SR}.borders.geojson", # Path relative to config!
            "file_type": "geojson"
        }
    })

config = {
    "extracts": extracts
}

fp = open('temp/extracts-into-sub-regions.json', 'wt', encoding='UTF8')
json.dump(config, fp, sort_keys=True, indent=4, ensure_ascii=False)
fp.close()

In [None]:
# Beware, this line splits the planet in multiple sub-regions and takes hours
! time osmium extract --overwrite --strategy=simple --config='temp/extracts-into-sub-regions.json' planet-*.osm.pbf

# Split sub-regions into countries

In [None]:
import json
from tqdm import tqdm

extracts = {}

for index,row in sub_regions.iterrows():
    SR = row['sub-region']
    extracts = []

    for CC in row['ISO3166-1'].split(';'):
        extracts.append({
            'output': f'extracts/{CC}.osm.pbf',
            'output_format': 'pbf',
            'output_header': {
                'generator': 'https://openstreetdata.org'
            },
            'multipolygon': {
                "file_name": f"../extracts/{CC}.borders.geojson", # relative to config file
                "file_type": "geojson"
            }
        })

    config = {
        "extracts": extracts
    }

    fp = open(f'temp/extracts-config-{SR}.json', 'wt', encoding='UTF8')
    json.dump(config, fp, sort_keys=True, indent=4, ensure_ascii=False)
    fp.close()

In [None]:
for SR in sub_regions['sub-region']:
    print(SR)
    ! time osmium extract --overwrite --strategy=simple --config="temp/extracts-config-{SR}.json" temp/{SR}.osm.pbf
