# Daten der Raddauerzählstellen München - Jahreszahlen

## Wie finde ich die Daten?

1. Datensatz im Portal öffnen:
https://open.bydata.de/datasets/https-opendata-muenchen-de-dataset-daten-der-raddauerzaehlstellen-muenchen-jahreszahlen?locale=de
2. Link in Beschreibung (Über diesen Datensatz) öffnen:
https://opendata.muenchen.de/pages/raddauerzaehlstellen
3. Karte in externem Link öffnen:
https://opendata.muenchen.de/dataset/raddauerzaehlstellen-muenchen/resource/211e882d-fadd-468a-bf8a-0014ae65a393?view_id=11a47d6c-0bc1-4bfa-93ea-126089b59c3d

![Karte](img/map.png)

## Wie viele Radfahrende wurden insgesamt seit 2020 bis 2024 gemessen?
Lösung:
1. Datensatz im Portal öffnen:
https://open.bydata.de/datasets/https-opendata-muenchen-de-dataset-daten-der-raddauerzaehlstellen-muenchen-jahreszahlen?locale=de
2. Geeignete Distribution finden und Link kopieren:
https://opendata.muenchen.de/dataset/022a11ff-4dcb-4f03-b7dd-a6c94a094587/resource/66be7619-a672-4382-bf88-e3688c5abc2b/download/rad_2008_15min_06_06_23_r.csv
(Link ist für alle Jahre gleich, nur die Jahreszahl muss geändert werden)
3. Einlesen der Werte Downloadlinks mit open.bydata-API
4. Einlesen der CSVs mit pandas
5. Radfahrende aufsummieren

In [None]:
import requests
import pandas as pd

# URL to get the full dataset details
api_url = "https://open.bydata.de/api/hub/search/datasets/https-opendata-muenchen-de-dataset-daten-der-raddauerzaehlstellen-muenchen-jahreszahlen"

# Function to get download links from the API response
def get_download_links(api_url):
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        distributions = data['result']['distributions']
        download_links = {}
        for dist in distributions:
            title = dist['title']['de']
            year = title.split()[-1]  # Extract year from the title
            if "15 Minuten Werte" in title or "15 Minuten Werte" in title:  # Ensure we only get relevant links
                download_links[int(year)] = dist['access_url'][0]
        return download_links
    else:
        print(f"Failed to retrieve data from API. Status code: {response.status_code}")
        return {}

# Get the download links
download_links = get_download_links(api_url)
print(download_links)

In [None]:
# Function to download and process the data for a given year
def process_year(year, url):
    try:
        df = pd.read_csv(url, low_memory=False)
        print(f"Processed year {year}.")
        return df
    except (pd.errors.ParserError, FileNotFoundError) as e:
        print(f"Error processing year {year}: {e}")
        return pd.DataFrame()  # Return an empty dataframe in case of error

# Create a dictionary to store the dataframe for each year
dataframes_per_year = {}

# Process each year and store the dataframe
for year, url in download_links.items():
    if year in range(2020,2025):
        dataframes_per_year[year] = process_year(year, url)

In [None]:
dataframes_per_year[2022].info()

In [None]:
dataframes_per_year[2022].head()

In [None]:
# Calculate the total number of cyclists over all years
total_cyclists = sum([df['gesamt'].dropna().sum() for df in dataframes_per_year.values()])

# Output the total number of cyclists measured from 2009 to 2023
print(f"Total number of cyclists measured from 2020 to 2024: {total_cyclists}")

## Wie viele Radfahrer wurden in München im Schnitt pro Jahr gemessen

In [None]:
# Calculate the average number of cyclists per year
average_cyclists_per_year = total_cyclists / len(dataframes_per_year)

# Print the average number of cyclists per year
print(f"Average number of cyclists per year from 2020 to 2024: {average_cyclists_per_year}")

## Wie viele Radfahrer gab es im Schnitt pro Tag im gegebenen Zeitraum?

In [None]:

unique_dates = set(value for sublist in [dataframes_per_year[year].datum.unique() for year in dataframes_per_year.keys()] for value in sublist)

total_days = len(unique_dates)

avg_cyclists_per_day = total_cyclists / total_days

# Print the average number of cyclists per day
print(f"Average number of cyclists per day from 2020 to 2024: {avg_cyclists_per_day:.2f}")

##  Wie sieht die Gesamtentwicklung der Radfahrenden pro Jahr im Zeitraum von 2022-2024 aus (alle Zählstellen)?

In [None]:
import matplotlib.pyplot as plt

# Calculate the total number of cyclists per year correctly
total_cyclists_per_year = {}
for year, df in dataframes_per_year.items():
    if not df.empty:
        total_cyclists_per_year[year] = df['gesamt'].dropna().sum()

# Sort the dictionary by year to ensure correct chronological order
sorted_total_cyclists_per_year = dict(sorted(total_cyclists_per_year.items()))

# Extract years and totals into lists for plotting
years = list(sorted_total_cyclists_per_year.keys())
totals = list(sorted_total_cyclists_per_year.values())

# Plot the total number of cyclists per year
plt.figure(figsize=(10, 6))
plt.plot(years, totals)
plt.xlabel('Year')
plt.ylabel('Total Number of Cyclists')
plt.title('Total Number of Cyclists per Year (2020-2024)')
plt.grid(True)
plt.show()

# Print the yearly totals for verification
print("Yearly totals:", sorted_total_cyclists_per_year)

## Jährlicher Verlauf - Zählstellen im Vergleich: In welchen Monaten radeln die Münchner am liebsten?

In [None]:
# Function to determine the total cyclists per month across different counting stations
def get_monthly_totals(df):
    if df.empty:
        return pd.Series()
    df['datum'] = pd.to_datetime(df['datum'])
    monthly_totals = df.groupby(df['datum'].dt.month)['gesamt'].sum()
    return monthly_totals

# Initialize a dictionary to store monthly totals for all years and counting stations
monthly_totals_per_year = {year: get_monthly_totals(df) for year, df in dataframes_per_year.items() if not df.empty}

# Combine the monthly totals into a single dataframe
combined_monthly_totals = pd.DataFrame(monthly_totals_per_year).fillna(0)  # Fill NA with 0 for missing data

# Calculate the average number of cyclists per month across all years
average_monthly_totals = combined_monthly_totals.mean(axis=1)

# Plot the average number of cyclists per month
plt.figure(figsize=(12, 6))
average_monthly_totals.plot(kind='bar', color='skyblue')
plt.xlabel('Month')
plt.ylabel('Average Number of Cyclists')
plt.title('Average Number of Cyclists per Month (2020-2024)')
plt.xticks(range(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.grid(True)
plt.show()

## Freizeitverhalten versus Nutzung des Fahrrads für den Weg zur Arbeit: Auf welchen Strecken wird mehr unter der Woche geradelt, auf welchen Strecken mehr am Wochenende?

In [None]:
# Helper function to preprocess the dataframe
def preprocess_df(df):
    df['datum'] = pd.to_datetime(df['datum'])
    df['richtung_1'] = pd.to_numeric(df['richtung_1'], errors='coerce')
    df['richtung_2'] = pd.to_numeric(df['richtung_2'], errors='coerce')
    df['gesamt'] = pd.to_numeric(df['gesamt'], errors='coerce')
    return df

# Preprocess all dataframes in the dictionary to ensure numeric columns
dataframes_per_year = {year: preprocess_df(df) for year, df in dataframes_per_year.items()}

# Function to determine the total cyclists per day of the week
def get_weekday_weekend_totals(df):
    if df.empty:
        return pd.Series(), pd.Series()
    
    df['weekday'] = df['datum'].dt.dayofweek
    df['total'] = df['richtung_1'].fillna(0) + df['richtung_2'].fillna(0)
    
    weekday_totals = df[df['weekday'] < 5].groupby('zaehlstelle')['total'].sum()  # Monday (0) to Friday (4)
    weekend_totals = df[df['weekday'] >= 5].groupby('zaehlstelle')['total'].sum()  # Saturday (5) and Sunday (6)
    
    return weekday_totals, weekend_totals

# Initialize dictionaries to store totals for weekdays and weekends
weekday_totals = {}
weekend_totals = {}

for year, df in dataframes_per_year.items():
    weekly_totals, weekend_totals_year = get_weekday_weekend_totals(df)
    if not weekly_totals.empty:
        for zaehlstelle, total in weekly_totals.items():
            if zaehlstelle in weekday_totals:
                weekday_totals[zaehlstelle] += total
            else:
                weekday_totals[zaehlstelle] = total
    if not weekend_totals_year.empty:
        for zaehlstelle, total in weekend_totals_year.items():
            if zaehlstelle in weekend_totals:
                weekend_totals[zaehlstelle] += total
            else:
                weekend_totals[zaehlstelle] = total

# Convert the totals dictionaries to pandas Series for easier handling
weekday_totals_series = pd.Series(weekday_totals)
weekend_totals_series = pd.Series(weekend_totals)

# Plot the weekday versus weekend totals for each counting station
plt.figure(figsize=(14, 7))
bar_width = 0.35
index = range(len(weekday_totals_series))

plt.bar(index, weekday_totals_series, bar_width, label='Weekdays', color='skyblue')
plt.bar([i + bar_width for i in index], weekend_totals_series, bar_width, label='Weekends', color='lightcoral')

plt.xlabel('Counting Station')
plt.ylabel('Total Number of Cyclists')
plt.title('Total Number of Cyclists per Counting Station: Weekdays vs Weekends')
plt.xticks([i + bar_width / 2 for i in index], weekday_totals_series.index, rotation=90)
plt.legend()
plt.show()

## Welche Jahre waren die Jahre, in denen in München am meisten Fahrrad gefahren wurde?

In [None]:
# Preprocess all dataframes in the dictionary to ensure numeric columns
dataframes_per_year = {year: preprocess_df(df) for year, df in dataframes_per_year.items()}

# Calculate the total number of cyclists for each year
total_cyclists_per_year = {year: df['gesamt'].dropna().sum() for year, df in dataframes_per_year.items() if not df.empty}

# Convert the totals to a pandas series and sort by total cyclists descending
total_cyclists_series = pd.Series(total_cyclists_per_year).sort_values(ascending=False)

# Find the years with the highest totals
top_years = total_cyclists_series.index[:5]  # Change this number to get more or fewer top years
top_values = total_cyclists_series.values[:5]

# Print the years with the highest cycling activity
print("Years with the highest cycling activity in Munich:")
for year, total in zip(top_years, top_values):
    print(f"{year}: {total:.0f} cyclists")

# Plot the total number of cyclists per year
plt.figure(figsize=(10, 6))
plt.bar(total_cyclists_series.index.astype(str), total_cyclists_series.values, color='skyblue')
plt.xlabel('Year')
plt.ylabel('Total Number of Cyclists')
plt.title('Total Number of Cyclists per Year (2020-2024)')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()