# Webscrapping Car data from Statistik Austria

Link: https://www.statistik.at/statistiken/tourismus-und-verkehr/fahrzeuge/kfz-bestand

In [8]:
from bs4 import BeautifulSoup
import json

# Load the local HTML file
file_path = 'Kfz-Bestand - STATISTIK AUSTRIA - Die Informationsmanager.htm'
with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

# Parse the HTML content
soup = BeautifulSoup(content, 'html.parser')

# Initialize a dictionary to store the data
data = {}

# Find all rows in the table
rows = soup.find_all("tr", class_=["datatable__tr odd", "datatable__tr even"])

print(f"Found {len(rows)} rows")

for row in rows:
    # Extract the year
    year_td = row.find("td", class_="datatable__td dtr-control")
    if year_td:
        year_text = year_td.get_text().strip()
        try:
            year = int(year_text)
            if year in range(2000, 2021):  # Interested in years 2000 to 2020
                # Extract the second value for Personenkraftwagen column
                values = row.find_all("td", class_="datatable__td datatable__td--right")
                if values and len(values) >= 2:
                    # Get the raw data for Personenkraftwagen
                    raw_value = values[1].get_text().strip().replace('\xa0', '')
                    data[year] = raw_value
        except ValueError:
            continue  # Skip rows where the year is not a valid integer

print(data)

# Convert the data dictionary to JSON format
json_data = json.dumps(data, indent=4)

# Save the JSON data to a file
json_file_path = 'personenkraftwagen_2000_2020.json'
with open(json_file_path, 'w') as json_file:
    json_file.write(json_data)

print(f"Data scraped and saved to {json_file_path}")


Found 60 rows
{2000: '4097145', 2005: '4156743', 2006: '4204969', 2007: '4245583', 2008: '4284919', 2009: '4359944', 2010: '4441027', 2011: '4513421', 2012: '4584202', 2013: '4641308', 2014: '4694921', 2015: '4748048', 2016: '4821557', 2017: '4898578', 2018: '4978852', 2019: '5039548', 2020: '5091827'}
Data scraped and saved to personenkraftwagen_2000_2020.json
