In [89]:
import pandas as pd
import requests
import os
import warnings
warnings.filterwarnings("ignore")

storage_path = "../../data/raw/kba/"
os.makedirs(storage_path, exist_ok=True)

base_url = "https://www.kba.de/SharedDocs/Downloads/DE/Statistik/Fahrzeuge/FZ10/"
report_type = "fz10"

year_months = ["2025_10", "2025_09", "2025_08"]

# Download Excel File
for year_month in year_months:
    file_path = os.path.join(storage_path, f"{report_type}_{year_month}.xlsx")
    
    # Check if file already exists to avoid re-downloading
    if os.path.exists(file_path):
        print(f"File already exists: {file_path}")
        continue
        
    print("Downloading data for", year_month)
    url = f"{base_url}{report_type}_{year_month}.xlsx?__blob=publicationFile&v=2"

    response = requests.get(url)

    # Save the file locally
    with open(file_path, "wb") as file:
        file.write(response.content)
    print(f"File downloaded and saved to {file_path}")


File already exists: ../../data/raw/kba/fz10_2025_10.xlsx
File already exists: ../../data/raw/kba/fz10_2025_09.xlsx
File already exists: ../../data/raw/kba/fz10_2025_08.xlsx


In [86]:
# List all donwnloaded files
available_files = os.listdir(storage_path)
print("Downloaded files:", available_files)

Downloaded files: ['fz10_2025_08.xlsx', 'fz10_2025_09.xlsx', 'fz10_2025_10.xlsx']


In [151]:
def data_cleaning(file_path: str, 
                  sheet_name: str = "FZ 10.1", 
                  skiprows: int = 7 
                  ) -> pd.DataFrame:
    """Cleans the KBA Excel file and returns a cleaned DataFrame."""

    # Rename German columns to short English versions
    column_mapping = {
        'Insgesamt': 'Total',
        'mit Dieselantrieb': 'Diesel',
        'mit Hybridantrieb \n(incl. Plug-in-Hybrid)': 'Hybrid_All',
        'Benzin-Hybridantrieb \n(incl. Plug-in-Hybrid)': 'Hybrid_Petrol_All',
        'Diesel-Hybridantrieb \n(incl. Plug-in-Hybrid)': 'Hybrid_Diesel_All',
        'Hybridantrieb \n(ohne Plug-in-Hybrid)': 'Hybrid_NonPlugin',
        'Benzin-Hybridantrieb \n(ohne Plug-in-Hybrid)': 'Hybrid_Petrol_NonPlugin',
        'Diesel-Hybridantrieb \n(ohne Plug-in-Hybrid)': 'Hybrid_Diesel_NonPlugin',
        'Plug-in-Hybridantrieb': 'Hybrid_Plugin',
        'Benzin-Plug-in-Hybridantrieb': 'Hybrid_Petrol_Plugin',
        'Diesel-Plug-in-Hybridantrieb': 'Hybrid_Diesel_Plugin',
        'mit Elektroantrieb (BEV)': 'Electric_BEV',
        'mit Allradantrieb': 'All_Wheel_Drive',
        'Cabriolets': 'Convertibles'
    }

    year, month = file_path.split("_")[1], file_path.split("_")[-1].split(".")[0]
    year_month_date = pd.to_datetime(f"{year}-{month}-01") + pd.offsets.MonthEnd(0)

    # Load the Excel file
    df = pd.read_excel(file_path, 
                       sheet_name=sheet_name, 
                       skiprows=skiprows)

    # Rename columns
    df.rename(columns={"Unnamed: 1": "OEM", 
                    "Unnamed: 2": "Model"}, inplace=True)

    # Filter out 1st row 
    df = df.iloc[1:-5]

    # Select only columns with absolute numbers
    selected_columns = [x for x in df.columns if not 'Unnamed' in x]
    df = df[selected_columns].copy()

    # Filter out rows where OEM is 'ZUSAMMEN'
    df = df[~df['OEM'].str.contains('ZUSAMMEN', na=False)]

    # Fill out missing values in 'OEM' column
    df['OEM'].ffill(inplace=True)

    # Fill out missing values "-" with 0 
    df.replace("-", 0, inplace=True)

    # Validate no missing values remain
    try:
        assert df.isnull().any().any() == False
    except AssertionError:
        print("Warning: There are still missing values in the DataFrame.")
        print(df.isnull().sum())

    df.rename(columns=column_mapping, inplace=True)

    df = df.melt(id_vars=["OEM", "Model"], 
        var_name="drive_type", 
        value_name="Value")
    
    df["Date"] = year_month_date

    df['ts_key'] = df['OEM'] +  "_" + df['Model'] + "_" + df['drive_type']

    return df

In [152]:
dfs = []

for file_name in available_files:

    file_path = os.path.join(storage_path, file_name)
    
    print("Cleaning data for file ", file_name)
    
    try:
        df_cleaned = data_cleaning(file_path)
    except Exception as e:
        print(e) 
    
    dfs.append(df_cleaned)
    
    del df_cleaned
    
# Join all dataframes
df = pd.concat(dfs)

Cleaning data for file  fz10_2025_08.xlsx
Cleaning data for file  fz10_2025_09.xlsx
Cleaning data for file  fz10_2025_10.xlsx


In [153]:
df.shape

(19390, 6)

In [154]:
df.head()

Unnamed: 0,OEM,Model,drive_type,Value,Date,ts_key
0,ALFA ROMEO,GIULIA,Total,85,2025-08-31,ALFA ROMEO_GIULIA_Total
1,ALFA ROMEO,JUNIOR,Total,278,2025-08-31,ALFA ROMEO_JUNIOR_Total
2,ALFA ROMEO,STELVIO,Total,118,2025-08-31,ALFA ROMEO_STELVIO_Total
3,ALFA ROMEO,TONALE,Total,88,2025-08-31,ALFA ROMEO_TONALE_Total
4,ALPINE,A110,Total,14,2025-08-31,ALPINE_A110_Total


In [155]:
df['OEM'].unique()

array(['ALFA ROMEO', 'ALPINE', 'ASTON MARTIN', 'AUDI', 'BENTLEY', 'BMW',
       'BYD', 'CADILLAC', 'CHERY', 'CITROEN', 'DACIA', 'DAF TRUCKS', 'DS',
       'FERRARI', 'FIAT', 'FISKER', 'FORD', 'GWM', 'HONDA', 'HYUNDAI',
       'INEOS', 'IVECO', 'JAGUAR', 'JEEP', 'KGM', 'KIA', 'LADA',
       'LAMBORGHINI', 'LANCIA', 'LAND ROVER', 'LEAPMOTOR', 'LEXUS',
       'LOTUS', 'LUCID', 'LYNK & CO', 'MAN', 'MASERATI', 'MAXUS', 'MAZDA',
       'MERCEDES', 'MG ROEWE', 'MINI', 'MITSUBISHI', 'MORGAN', 'NIO',
       'NISSAN', 'OPEL', 'PEUGEOT', 'POLESTAR', 'PORSCHE', 'RENAULT',
       'ROLLS ROYCE', 'SEAT', 'SKODA', 'SMART', 'SUBARU', 'SUZUKI',
       'TESLA', 'TOGG', 'TOYOTA', 'VINFAST', 'VOLVO', 'VW', 'XPENG',
       'DEEPAL', 'OMODA'], dtype=object)

In [156]:
df[df['ts_key'] == 'ALFA ROMEO_GIULIA_Total']

Unnamed: 0,OEM,Model,drive_type,Value,Date,ts_key
0,ALFA ROMEO,GIULIA,Total,85,2025-08-31,ALFA ROMEO_GIULIA_Total
0,ALFA ROMEO,GIULIA,Total,46,2025-09-30,ALFA ROMEO_GIULIA_Total
0,ALFA ROMEO,GIULIA,Total,55,2025-10-31,ALFA ROMEO_GIULIA_Total


In [157]:
df[df['ts_key'] == 'MERCEDES_A-KLASSE_Total']

Unnamed: 0,OEM,Model,drive_type,Value,Date,ts_key
242,MERCEDES,A-KLASSE,Total,1354,2025-08-31,MERCEDES_A-KLASSE_Total
248,MERCEDES,A-KLASSE,Total,2062,2025-09-30,MERCEDES_A-KLASSE_Total
251,MERCEDES,A-KLASSE,Total,2454,2025-10-31,MERCEDES_A-KLASSE_Total
