In [None]:
import pandas as pd
from pyrosm import OSM

# Initialize the OSM object with your PBF file
osm = OSM("your_local_path/UK.osm.pbf")

# Extract the data using Pyrosm methods
pois = osm.get_pois()  # Points of interest (e.g., amenities)
buildings = osm.get_buildings()  # Buildings
network = osm.get_network()  # Roads, highways, railways, etc. (network data)

# Check the columns of the dataframes to understand the structure
print("Network Columns:", network.columns)
print("Buildings Columns:", buildings.columns)
print("POIs Columns:", pois.columns)

# Define a function to filter amenities and other features
def extract_features(pois, buildings, network):
    features = {
        "amenities": [],
        "highways": [],
        "parks": [],
        "lakes": [],
        "forests": [],
        "residential": [],
        "commercial": [],
        "railways": [],
        "schools": [],
        "hospitals": [],
        "monuments": [],
        "shops": [],
        "waterways": [],
        "other_features": []
    }

    # Extract amenities from points of interest (POIs)
    amenities = pois[pois['amenity'].notna()]
    for _, row in amenities.iterrows():
        features['amenities'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': row['amenity'],
            'latitude': row['lat'],
            'longitude': row['lon']
        })

    # Extract highways (roads) from network data (filter by highway tag)
    highways = network[network['highway'].notna()]
    for _, row in highways.iterrows():
        features['highways'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': row['highway'],
            'nodes': row['geometry']  # contains the list of coordinates of the way
        })

    # Check if 'railway' exists in the columns of the network DataFrame
    if 'railway' in network.columns:
        railways = network[network['railway'].notna()]
        for _, row in railways.iterrows():
            features['railways'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': row['railway'],
                'nodes': row['geometry']
            })
    else:
        print("No 'railway' tag found in the network data.")

    # Extract waterways (waterway=*) from network data (filter by waterway tag)
    if 'waterway' in network.columns:
        waterways = network[network['waterway'].notna()]
        for _, row in waterways.iterrows():
            features['waterways'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': row['waterway'],
                'nodes': row['geometry']
            })
    else:
        print("No 'waterway' tag found in the network data.")

    # Check if 'leisure' exists in buildings data
    if 'leisure' in buildings.columns:
        parks = buildings[buildings['leisure'] == 'park']
        for _, row in parks.iterrows():
            features['parks'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': 'park',
                'nodes': row['geometry']
            })
    else:
        print("No 'leisure' tag found in the buildings data.")

    # Check if 'natural' exists in POIs
    if 'natural' in pois.columns:
        # Extract lakes (natural=water) from POIs
        lakes = pois[pois['natural'] == 'water']
        for _, row in lakes.iterrows():
            features['lakes'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': 'lake',
                'latitude': row['lat'],
                'longitude': row['lon']
            })

        # Extract forests (natural=forest) from POIs
        forests = pois[pois['natural'] == 'forest']
        for _, row in forests.iterrows():
            features['forests'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': 'forest',
                'latitude': row['lat'],
                'longitude': row['lon']
            })
    else:
        print("No 'natural' tag found in POIs.")

    # Check if 'historic' exists in POIs
    if 'historic' in pois.columns:
        # Extract monuments (historic=monument) from POIs
        monuments = pois[pois['historic'] == 'monument']
        for _, row in monuments.iterrows():
            features['monuments'].append({
                'id': row['id'],  # Use the 'id' column instead of 'osmid'
                'type': 'monument',
                'latitude': row['lat'],
                'longitude': row['lon']
            })
    else:
        print("No 'historic' tag found in POIs.")

    # Extract residential and commercial land use from buildings
    residential = buildings[buildings['landuse'] == 'residential']
    for _, row in residential.iterrows():
        features['residential'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': 'residential',
            'nodes': row['geometry']
        })

    commercial = buildings[buildings['landuse'] == 'commercial']
    for _, row in commercial.iterrows():
        features['commercial'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': 'commercial',
            'nodes': row['geometry']
        })

    # Extract schools (amenity=school) from POIs
    schools = pois[pois['amenity'] == 'school']
    for _, row in schools.iterrows():
        features['schools'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': 'school',
            'latitude': row['lat'],
            'longitude': row['lon']
        })

    # Extract hospitals (amenity=hospital) from POIs
    hospitals = pois[pois['amenity'] == 'hospital']
    for _, row in hospitals.iterrows():
        features['hospitals'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': 'hospital',
            'latitude': row['lat'],
            'longitude': row['lon']
        })

    # Extract shops (shop=*) from POIs
    shops = pois[pois['shop'].notna()]
    for _, row in shops.iterrows():
        features['shops'].append({
            'id': row['id'],  # Use the 'id' column instead of 'osmid'
            'type': row['shop'],
            'latitude': row['lat'],
            'longitude': row['lon']
        })

    return features

# Extract the features from the OSM data
features = extract_features(pois, buildings, network)

# Convert the extracted features to DataFrames and export to CSV
all_features = []
for category, items in features.items():
    category_df = pd.DataFrame(items)
    all_features.append(category_df)

# Concatenate all categories into one DataFrame
all_features_df = pd.concat(all_features, ignore_index=True)

# Export to CSV
all_features_df.to_csv('osm_features_pyrosm.csv', index=False)

# Print out the first few rows of the resulting DataFrame for inspection
print(all_features_df.head())
