In [1]:
# General OSM highway "type" Statistiken als Tabelle, gesamter Datensatz DB Rad+
# Zur Erstellung des Codes wurde die generative Künstliche Intelligenz (KI) „Claude AI“ des Anbieters Anthropic in Version 3.7 genutzt

import pandas as pd
import pyarrow.parquet as pq

# Load your data
PARQUET_FILE = "data/network_all_months_plus_25833_length_with_fahrradstrasse.parquet"
COLUMN_TYPE = 'type'  # Column for street type
COLUMN_LENGTH = 'length_m'  # Column for segment length

def get_all_unique_street_types():
    # Initialize counters and length sums for each street type
    street_type_data = {}
    
    # Open parquet file
    parquet_file = pq.ParquetFile(PARQUET_FILE)
    num_row_groups = parquet_file.metadata.num_row_groups
    
    # Process each row group
    print("Processing parquet file to identify unique street types...")
    for rg in range(num_row_groups):
        # Read the street type and length columns
        df_chunk = parquet_file.read_row_group(rg, columns=[COLUMN_TYPE, COLUMN_LENGTH]).to_pandas()
        
        # Drop rows with missing length
        df_chunk = df_chunk.dropna(subset=[COLUMN_LENGTH])
        
        # Group by street type and aggregate
        for street_type, group in df_chunk.groupby(COLUMN_TYPE):
            if street_type not in street_type_data:
                street_type_data[street_type] = {'count': 0, 'length_m': 0}
            
            street_type_data[street_type]['count'] += len(group)
            street_type_data[street_type]['length_m'] += group[COLUMN_LENGTH].sum()
    
    # Create a DataFrame with the results
    result_data = []
    for street_type, data in street_type_data.items():
        # Convert length to km
        length_km = data['length_m'] / 1000
        
        result_data.append({
            'street_type': street_type,
            'count': data['count'],
            'length_km': length_km
        })
    
    # Convert to DataFrame and sort by count (descending)
    result_df = pd.DataFrame(result_data)
    result_df = result_df.sort_values('count', ascending=False)
    
    # Print the human-readable table
    print(f"\nAll unique OSM street types in the dataset:")
    print(f"{'Street Type':<25} {'Count':<12} {'Length (km)':<15}")
    print("-" * 53)
    
    for _, row in result_df.iterrows():
        print(f"{row['street_type']:<25} {row['count']:<12,d} {row['length_km']:<15,.2f}")
    
    print(f"\nTotal unique street types: {len(result_df)}")
    
    # Create Excel-friendly output
    excel_output = "street_type\tcount\tlength_km\n"
    for _, row in result_df.iterrows():
        excel_output += f"{row['street_type']}\t{row['count']}\t{row['length_km']:.2f}\n"
    
    print("\n\nExcel-friendly table (tab-delimited, copy the following):")
    print("------------------------------------------------------------")
    print(excel_output)
    
    return result_df

# Execute the function
street_types_df = get_all_unique_street_types()

Processing parquet file to identify unique street types...

All unique OSM street types in the dataset:
Street Type               Count        Length (km)    
-----------------------------------------------------
footway                   212,443      6,125.76       
residential               85,972       3,261.74       
service_driveway          29,602       499.59         
secondary                 26,965       963.41         
service                   23,330       847.52         
tertiary                  18,014       603.71         
cycleway                  15,294       424.69         
path                      14,564       990.31         
primary                   8,942        355.91         
service_parking_aisle     8,147        237.13         
track                     7,439        722.35         
living_street             4,487        167.02         
steps                     3,761        34.04          
unclassified              3,156        134.70         
service_emergency