# PBF document converted to json file which contains nodes, ways and relation data.

- Use curl to download the .pbf file in a Jupyter Notebook cell
- !curl -o kiel.osm.pbf https://download.geofabrik.de/europe/germany/schleswig-holstein-latest.osm.pbf


In [None]:
import osmium
import json

# Define an OSM handler class to process nodes, ways, and relations
class OSMHandler(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.nodes = []
        self.ways = []
        self.relations = []
        self.node_count = 0
        self.way_count = 0
        self.relation_count = 0

    def node(self, n):
        self.node_count += 1
        self.nodes.append({
            'id': n.id,
            'location': {'lat': n.location.lat, 'lon': n.location.lon},
            'tags': dict(n.tags)
        })

    def way(self, w):
        self.way_count += 1
        self.ways.append({
            'id': w.id,
            'nodes': [node.ref for node in w.nodes], 
            'tags': dict(w.tags)
        })

    def relation(self, r):
        self.relation_count += 1
        self.relations.append({
            'id': r.id,
            'members': [{'type': m.type, 'ref': m.ref, 'role': m.role} for m in r.members],
            'tags': dict(r.tags)
        })

# Function to process the OSM PBF file and count/extract nodes, ways, and relations
def process_osm_pbf(osm_pbf_file, json_output_file):
    handler = OSMHandler()

    # Apply the handler to the input OSM PBF file
    handler.apply_file(osm_pbf_file)

    # Print the counts of nodes, ways, and relations
    print(f"Node count: {handler.node_count}")
    print(f"Way count: {handler.way_count}")
    print(f"Relation count: {handler.relation_count}")

    osm_data = {
        'nodes': handler.nodes,
        'ways': handler.ways,
        'relations': handler.relations
    }

    # Write the data to a JSON file
    with open(json_output_file, 'w', encoding='utf-8') as f:
        json.dump(osm_data, f, ensure_ascii=False, indent=4)

    print(f"Conversion complete. JSON saved to {json_output_file}")

# File paths
osm_pbf_file = 'schleswig_holstein.osm.pbf' 
json_output_file = 'schleswig_holstein_data.json'  

# Process the OSM PBF file and extract the data
process_osm_pbf(osm_pbf_file, json_output_file)

# Storing in MOngoDB in batch

In [None]:
import ijson
from pymongo import MongoClient
from decimal import Decimal

# Function to convert Decimal to float in the item dictionary
def convert_decimals(item):
    if isinstance(item, dict):
        return {k: convert_decimals(v) for k, v in item.items()}
    elif isinstance(item, list):
        return [convert_decimals(i) for i in item]
    elif isinstance(item, Decimal):
        return float(item) 
    return item

# Connect to the MongoDB server
client = MongoClient('mongodb://mongodb:27017') 

# Access your database 
db = client['Schleswig_Holstein']

# Access your collection (create it if it doesn't exist)
nodes_collection = db['nodes_collection']
ways_collection = db['ways_collection']
relations_collection = db['relations_collection']

nodes_collection.delete_many({})
ways_collection.delete_many({})
relations_collection.delete_many({})

json_file_path = 'schleswig_holstein_data.json'

# Initialize variables for bulk insert
batch_size = 20000  
batch_nodes = []
batch_ways = []
batch_relations = []

total_nodes = 0
total_ways = 0
total_relations = 0

try:
    with open(json_file_path, 'r', encoding='utf-8') as file:
        # Process 'nodes'
        for item in ijson.items(file, 'nodes.item'):
            item = convert_decimals(item) 
            batch_nodes.append(item)
            
            if len(batch_nodes) >= batch_size:
                nodes_collection.insert_many(batch_nodes)
                total_nodes += len(batch_nodes)
                batch_nodes = [] 

        if batch_nodes:
            nodes_collection.insert_many(batch_nodes)
            total_nodes += len(batch_nodes)

        
        file.seek(0)

        # Process 'ways'
        for item in ijson.items(file, 'ways.item'):
            item = convert_decimals(item)  
            batch_ways.append(item)
            
            if len(batch_ways) >= batch_size:
                ways_collection.insert_many(batch_ways)
                total_ways += len(batch_ways)
                batch_ways = []  

        if batch_ways:
            ways_collection.insert_many(batch_ways)
            total_ways += len(batch_ways)

        file.seek(0)

        # Process 'relations'
        for item in ijson.items(file, 'relations.item'):
            item = convert_decimals(item)  
            batch_relations.append(item)
            
            if len(batch_relations) >= batch_size:
                relations_collection.insert_many(batch_relations)
                total_relations += len(batch_relations)
                batch_relations = []  

        if batch_relations:
            relations_collection.insert_many(batch_relations)
            total_relations += len(batch_relations)

    print(f"Inserted {total_nodes} nodes, {total_ways} ways, and {total_relations} relations into MongoDB.")

except Exception as e:
    print(f'Error: {e}')