In [2]:
import requests
import zipfile
import io
import os

url = "https://opendata.traficom.fi/Content/Ajoneuvorekisteri.zip"
response = requests.get(url)
response.raise_for_status()
zip_bytes = io.BytesIO(response.content)
with zipfile.ZipFile(zip_bytes) as z:
    z.extractall(os.getcwd())

In [3]:
import pandas as pd

input_filename = "Ajoneuvojen_avoin_data_5_23.csv"
input_file = os.path.join(os.getcwd(), input_filename)

columns = {
    "ensirekisterointipvm": "registration_date",
    "ajoneuvoluokka": "classification",
    "vari": "color",
    "kayttovoima": "driving_force",
    "sahkohybridi": "is_hybrid",
    "merkkiSelvakielinen": "maker",
    "kunta": "municipality",
    "matkamittarilukema": "odometer",
}

df = pd.read_csv(
    input_file,
    sep=";",
    quotechar='"',
    encoding="latin",
    low_memory=False,
    memory_map=True,
    usecols=columns.keys(),
    dtype={"vari": str, "kayttovoima": str, "kunta": str, "sahkohybridi": str},
)

df.rename(mapper=columns, axis=1, inplace=True)

# Filter to only M1 class
df = df[df["classification"] == "M1"]
df.drop(labels="classification", axis=1, inplace=True)
df.reset_index(inplace=True, drop=True)

# Clean up dates
df["registration_date"] = pd.to_datetime(df["registration_date"], format="%Y-%m-%d",
                                         cache=True, exact=True, yearfirst=True, errors="coerce")

# Driving power grouping
df["is_hybrid"] = df["is_hybrid"].map(lambda x: x == "true")

def group_driving_force(row):
    if row["is_hybrid"]:
        return 3 # Hybrid
    
    if row["driving_force"] == "01":
        return 1 # Petrol
    elif row["driving_force"] == "02":
        return 2 # Diesel
    elif row["driving_force"] == "04":
        return 4 # Electricity
    else:
        return 5 # Other
    
df["driving_force"] = df.apply(group_driving_force, axis=1)

# Odometer to integer
df["odometer"] = pd.to_numeric(df["odometer"], errors="coerce")

df.head()

Unnamed: 0,registration_date,color,driving_force,is_hybrid,maker,municipality,odometer
0,1984-07-09,1,1,False,Ford,740,
1,1990-05-08,9,1,False,Citroen,91,
2,2003-10-02,6,1,False,Honda,837,284104.0
3,2006-03-17,Y,1,False,Toyota,989,155944.0
4,2007-01-05,2,2,False,Toyota,694,2692651.0


In [4]:
grouped = df.groupby(["driving_force", "municipality"]).size().reset_index(name="count")
total = grouped["count"].sum()
grouped["share"] = grouped.apply(lambda row: row["count"] / total * 100, axis=1)

driving_grouped = grouped.groupby(["driving_force"]).agg({"count": "sum", "share": "sum"}).reset_index()
driving_totals = pd.DataFrame({
    "driving_force": ["total"],
    "count": [driving_grouped["count"].sum()],
    "share":  [driving_grouped["share"].sum()]
})
driving = pd.concat([driving_grouped, driving_totals], ignore_index=False).reset_index(drop=True)

def driving_force_text(x):
    if x == 1:
        return "petrol"
    elif x == 2:
        return "diesel"
    elif x == 3:
        return "hybrid"
    elif x == 4:
        return "electricity"
    elif x == 5:
        return "other"
    else:
        return x
    
def format_with_whitespace(value):
    if isinstance(value, float) and value.is_integer():
        value = int(value)
    formatted_value = f"{value:,}".replace(',', ' ')
    return formatted_value
    
disp = driving.style.format({
    "driving_force": driving_force_text,
    "count": format_with_whitespace,
    "share": lambda x: f"{round(x, 2)}%"
})
disp.set_caption("M1 class vehicles")
disp.hide(axis="index")

driving_force,count,share
petrol,1 635 748,61.99%
diesel,617 932,23.42%
hybrid,272 990,10.35%
electricity,90 823,3.44%
other,21 024,0.8%
total,2 638 517,100.0%
