In [1]:
# Notwendige Bibliotheken importieren
import pandas as pd
import requests
import random
import datetime

## Laden des Datensatzes

Jetzt laden wir unseren Datensatz. Wir gehen davon aus, dass der Datensatz eine CSV-Datei mit Verkaufsdaten enthält.

Fetch Public and public Parking Data from the APIs

In [2]:
# API URL for Freiburg
api_url_freiburg = "https://api.parkendd.de/Freiburg"

# Fetch the data from the WFS API
response_freiburg = requests.get(api_url_freiburg)

if response_freiburg.status_code == 200:
    public_parking_data = response_freiburg.json()

    # Extract relevant data
    parking_data_public = []
    for site in public_parking_data['lots']:

        if site.get("purpose") and site.get("purpose") != "CAR":
            continue
        
        coords = site.get("coords", {})
        parking_data_public.append({
            "public_id": site.get("id"),
            "name": site.get("name"),
            "address": site.get("address"),
            "latitude": coords.get("lat"),
            "longitude": coords.get("lng"),
            "price_per_hour": random.choice([3.80, 3.20, 1.60]),
            "capacity": site.get("total"),
            "available_space": site.get("free"),
            "opening_time": random.choice(["Weekdays 9AM-11PM", "Weekdays 9AM-7PM", "Weekdays 6AM-6PM", "Monday-Sunday 9AM-11PM"])
        })
    
    # Convert to DataFrame
    df_freiburg = pd.DataFrame(parking_data_public)
    print("Public Parking Data:", df_freiburg.head())
else:
    print("Failed to fetch MobiData BW parking data.")

Public Parking Data:                    public_id                name address   latitude  \
0   freiburgzurunterfuehrung    Zur Unterführung      P5  48.001286   
1  freiburgzentrumoberwiehre  Zentrum Oberwiehre     P21  47.987991   
2         freiburgmartinstor          Martinstor     P18  47.992506   
3        freiburglandratsamt         Landratsamt     P14  48.000059   
4        freiburgkonzerthaus         Konzerthaus      P2  47.995290   

   longitude  price_per_hour  capacity  available_space  \
0   7.844732             3.2       130               42   
1   7.871473             3.2       303              296   
2   7.847365             3.2       140              113   
3   7.857081             1.6       220              124   
4   7.840836             3.2       478              293   

             opening_time  
0       Weekdays 9AM-11PM  
1       Weekdays 9AM-11PM  
2        Weekdays 9AM-7PM  
3        Weekdays 6AM-6PM  
4  Monday-Sunday 9AM-11PM  


In [3]:
# URL for MobiData BW API (public parking data)
api_url_gebündelte_parkplätze_parkbauten = "https://api.mobidata-bw.de/park-api/api/public/v3/parking-sites"

# Fetch data from MobiData BW API
response_gebündelte_parkplätze_parkbauten = requests.get(api_url_gebündelte_parkplätze_parkbauten)

# Check the response and parse the JSON data
if response_gebündelte_parkplätze_parkbauten.status_code == 200:
    public_parking_data = response_gebündelte_parkplätze_parkbauten.json()
    
    # Extract relevant data (you can modify based on the actual response structure)
    parking_data_public = []
    for site in public_parking_data['items']:
        
        if site.get("purpose") and site.get("purpose") != "CAR":
            continue

        latitude = float(site.get("lat", 0)) 
        longitude = float(site.get("lon", 0))  
        
        if not (47.52 <= latitude <= 49.79 and 7.50 <= longitude <= 10.49):
            continue
        
        parking_data_public.append({
            "public_id": site.get("id"),
            "name": site.get("name"),
            "address": site.get("address"),
            "latitude": latitude,
            "longitude": longitude,
            "price_per_hour": random.choice([3.80, 3.20, 1.60]),
            "capacity": site.get("capacity"),
            "available_space": site.get("realtime_free_capacity"),
            "opening_time": random.choice(["Weekdays 9AM-11PM", "Weekdays 9AM-7PM", "Weekdays 6AM-6PM", "Monday-Sunday 9AM-11PM"])
        })
    
    # Convert to DataFrame
    df_public_parking = pd.DataFrame(parking_data_public)
    print("Public Parking Data:", df_public_parking.head())
else:
    print("Failed to fetch MobiData BW parking data.")


Public Parking Data:    public_id                         name  \
0        887  Listplatz 1 / Bahnhofstraße   
1        888          Obere Wässere 3 - 7   
2        757                Bahnhofstraße   
3        758                Am Südbahnhof   
4        759          Tannenberger Straße   

                                   address   latitude  longitude  \
0  Listplatz 1 / Bahnhofstraße, Reutlingen  48.495983   9.210331   
1          Obere Wässere 3 - 7, Reutlingen  48.488955   9.216701   
2                Bahnhofstraße, Reutlingen  48.497575   9.211077   
3                Am Südbahnhof, Reutlingen  48.482762   9.229821   
4          Tannenberger Straße, Reutlingen  48.511669   9.204728   

   price_per_hour  capacity  available_space      opening_time  
0             3.8     198.0              NaN  Weekdays 6AM-6PM  
1             3.2     181.0              NaN  Weekdays 6AM-6PM  
2             3.2     128.0              NaN  Weekdays 6AM-6PM  
3             3.2       NaN            

In [4]:
df_freiburg

Unnamed: 0,public_id,name,address,latitude,longitude,price_per_hour,capacity,available_space,opening_time
0,freiburgzurunterfuehrung,Zur Unterführung,P5,48.001286,7.844732,3.2,130,42,Weekdays 9AM-11PM
1,freiburgzentrumoberwiehre,Zentrum Oberwiehre,P21,47.987991,7.871473,3.2,303,296,Weekdays 9AM-11PM
2,freiburgmartinstor,Martinstor,P18,47.992506,7.847365,3.2,140,113,Weekdays 9AM-7PM
3,freiburglandratsamt,Landratsamt,P14,48.000059,7.857081,1.6,220,124,Weekdays 6AM-6PM
4,freiburgkonzerthaus,Konzerthaus,P2,47.99529,7.840836,3.2,478,293,Monday-Sunday 9AM-11PM
5,freiburgrotteck,Rotteck,P10,47.995801,7.846298,1.6,310,283,Weekdays 6AM-6PM
6,freiburgwestarkaden,Westarkaden,P22,48.00642,7.827394,3.8,750,380,Weekdays 9AM-11PM
7,freiburgkollegiengebaeude,Kollegiengebäude,P19,47.993431,7.847403,3.2,315,209,Monday-Sunday 9AM-11PM
8,freiburgschwabentor,Schwabentor,P16,47.991264,7.85631,1.6,239,149,Monday-Sunday 9AM-11PM
9,freiburgschwarzwaldcity,Schwarzwald City,P9,47.997206,7.851119,3.8,471,0,Weekdays 9AM-7PM


In [5]:
df_public_parking

Unnamed: 0,public_id,name,address,latitude,longitude,price_per_hour,capacity,available_space,opening_time
0,887,Listplatz 1 / Bahnhofstraße,"Listplatz 1 / Bahnhofstraße, Reutlingen",48.495983,9.210331,3.8,198.0,,Weekdays 6AM-6PM
1,888,Obere Wässere 3 - 7,"Obere Wässere 3 - 7, Reutlingen",48.488955,9.216701,3.2,181.0,,Weekdays 6AM-6PM
2,757,Bahnhofstraße,"Bahnhofstraße, Reutlingen",48.497575,9.211077,3.2,128.0,,Weekdays 6AM-6PM
3,758,Am Südbahnhof,"Am Südbahnhof, Reutlingen",48.482762,9.229821,3.2,,,Weekdays 6AM-6PM
4,759,Tannenberger Straße,"Tannenberger Straße, Reutlingen",48.511669,9.204728,1.6,46.0,,Weekdays 9AM-7PM
...,...,...,...,...,...,...,...,...,...
3336,414,Parkgarage campus.guest,"Universitätsstraße 28-34, 70569 Stuttgart",48.745988,9.109418,1.6,119.0,,Weekdays 6AM-6PM
3337,445,Parkplatz P6 Hermann-Herder-Straße,"Hermann-Herder-Straße 7, 79104 Freiburg",48.002846,7.850061,3.8,7.0,,Weekdays 6AM-6PM
3338,17131,Parkplatz P3 Fröbelstraße,"Fröbelstraße, 71634 Ludwigsburg",48.906314,9.182597,3.2,323.0,,Weekdays 9AM-7PM
3339,17734,Parkplatz,Crailsheim,49.137865,10.067532,3.8,94.0,,Monday-Sunday 9AM-11PM


Data Cleaning

In [6]:
# Clean the Freiburg parking data
df_freiburg_cleaned = df_freiburg.dropna(subset=["public_id", "name", "address", "latitude", "longitude", "price_per_hour", "capacity"])  

# Clean the public parking data
df_public_parking_cleaned = df_public_parking.dropna(subset=["public_id", "name", "address", "latitude", "longitude", "price_per_hour", "capacity"]) 

# Function to fill NaN in 'available_space' with half of 'capacity'
def fill_available_space_with_half_capacity(df):
    df['available_space'] = df['available_space'].fillna(df['capacity'] / 2)
    df['available_space'] = df['available_space'].astype(int)  # Ensure integer values
    return df

# Apply the function to both datasets
df_freiburg_cleaned = fill_available_space_with_half_capacity(df_freiburg_cleaned)
df_public_parking_cleaned = fill_available_space_with_half_capacity(df_public_parking_cleaned)

# Remove duplicates based on parking ID
df_freiburg_cleaned = df_freiburg_cleaned.drop_duplicates(subset=["public_id"])
df_public_parking_cleaned = df_public_parking_cleaned.drop_duplicates(subset=["public_id"])

df_public_parking_cleaned


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['available_space'] = df['available_space'].fillna(df['capacity'] / 2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['available_space'] = df['available_space'].astype(int)  # Ensure integer values


Unnamed: 0,public_id,name,address,latitude,longitude,price_per_hour,capacity,available_space,opening_time
0,887,Listplatz 1 / Bahnhofstraße,"Listplatz 1 / Bahnhofstraße, Reutlingen",48.495983,9.210331,3.8,198.0,99,Weekdays 6AM-6PM
1,888,Obere Wässere 3 - 7,"Obere Wässere 3 - 7, Reutlingen",48.488955,9.216701,3.2,181.0,90,Weekdays 6AM-6PM
2,757,Bahnhofstraße,"Bahnhofstraße, Reutlingen",48.497575,9.211077,3.2,128.0,64,Weekdays 6AM-6PM
4,759,Tannenberger Straße,"Tannenberger Straße, Reutlingen",48.511669,9.204728,1.6,46.0,23,Weekdays 9AM-7PM
5,483,AQUAtoll P1,"Wilfenseeweg, 74172 Neckarsulm",49.190045,9.244872,1.6,203.0,101,Monday-Sunday 9AM-11PM
...,...,...,...,...,...,...,...,...,...
3335,394,Parkgarage Uni/ Schloss,"Zufahrt über Bismarckstraße / Schloss, 68161 M...",49.484971,8.459501,3.2,701.0,50,Weekdays 9AM-7PM
3336,414,Parkgarage campus.guest,"Universitätsstraße 28-34, 70569 Stuttgart",48.745988,9.109418,1.6,119.0,59,Weekdays 6AM-6PM
3337,445,Parkplatz P6 Hermann-Herder-Straße,"Hermann-Herder-Straße 7, 79104 Freiburg",48.002846,7.850061,3.8,7.0,3,Weekdays 6AM-6PM
3338,17131,Parkplatz P3 Fröbelstraße,"Fröbelstraße, 71634 Ludwigsburg",48.906314,9.182597,3.2,323.0,161,Weekdays 9AM-7PM


Data Integration

In [7]:
# Ensure latitude and longitude columns are numeric and consistent
df_freiburg_cleaned["latitude"] = pd.to_numeric(df_freiburg_cleaned["latitude"], errors="coerce")
df_freiburg_cleaned["longitude"] = pd.to_numeric(df_freiburg_cleaned["longitude"], errors="coerce")

df_public_parking_cleaned["latitude"] = pd.to_numeric(df_public_parking_cleaned["latitude"], errors="coerce")
df_public_parking_cleaned["longitude"] = pd.to_numeric(df_public_parking_cleaned["longitude"], errors="coerce")

# Drop rows with NaN in latitude or longitude
df_freiburg_cleaned = df_freiburg_cleaned.dropna(subset=["latitude", "longitude"])
df_public_parking_cleaned = df_public_parking_cleaned.dropna(subset=["latitude", "longitude"])

# Round latitude and longitude to the same precision for matching
df_freiburg_cleaned["latitude"] = df_freiburg_cleaned["latitude"].round(6)
df_freiburg_cleaned["longitude"] = df_freiburg_cleaned["longitude"].round(6)

df_public_parking_cleaned["latitude"] = df_public_parking_cleaned["latitude"].round(6)
df_public_parking_cleaned["longitude"] = df_public_parking_cleaned["longitude"].round(6)

# Merge the DataFrames, prioritizing df_public_parking_cleaned data
df_combined = pd.merge(
    df_freiburg_cleaned,
    df_public_parking_cleaned,
    on=["latitude", "longitude"],
    how="outer",
    suffixes=("_freiburg", "_public")
)

# Prioritize data from df_public_parking_cleaned
for column in df_public_parking_cleaned.columns:
    if column not in ["latitude", "longitude"]:  # Exclude keys used for merging
        df_combined[column] = df_combined[column + "_public"].combine_first(df_combined[column + "_freiburg"])

# Drop redundant columns
columns_to_drop = [col for col in df_combined.columns if col.endswith("_freiburg") or col.endswith("_public")]
df_combined = df_combined.drop(columns=columns_to_drop)

df_combined



Unnamed: 0,latitude,longitude,public_id,name,address,price_per_hour,capacity,available_space,opening_time
0,47.520513,8.585892,9352.0,Embrach-Rorbas,"Bahnstrasse 9, 8424 Embrach-Rorbas",1.6,28.0,14.0,Weekdays 9AM-11PM
1,47.522619,7.689327,9354.0,Pratteln,"Güterstrasse 19, 4133 Pratteln",3.8,58.0,29.0,Monday-Sunday 9AM-11PM
2,47.523924,8.537127,19807.0,Bülach,"Schaffhauserstrasse 100, 8180 Bülach",1.6,234.0,117.0,Weekdays 9AM-11PM
3,47.524211,9.202017,8993.0,Kradolf,"Hauptstrasse 57, 9214 Kradolf",1.6,16.0,8.0,Weekdays 6AM-6PM
4,47.525776,8.775885,9116.0,Wiesendangen,"Alte Frauenfelderstrasse 3, 8542 Wiesendangen",3.8,35.0,17.0,Weekdays 9AM-11PM
...,...,...,...,...,...,...,...,...,...
2528,49.762093,9.512663,17773.0,Parkplatz,Wertheim (TBB),3.8,9.0,4.0,Weekdays 9AM-11PM
2529,49.762799,9.512175,17771.0,Parkplatz,Wertheim (TBB),1.6,3.0,1.0,Monday-Sunday 9AM-11PM
2530,49.785998,9.501370,17777.0,Parkplatz,Bestenheid,3.8,14.0,7.0,Weekdays 9AM-11PM
2531,49.786031,9.500884,17776.0,Parkplatz,Bestenheid,3.8,11.0,5.0,Weekdays 6AM-6PM


In [8]:
# Define desired column order
desired_columns = [
    "public_id", "name", "address", "latitude", "longitude", "price_per_hour", "capacity", "available_space", "opening_time"
]

# Reorder the columns in the combined DataFrame
df_combined = df_combined[[col for col in desired_columns if col in df_combined.columns]]

df_combined['public_id'] = df_combined['public_id'].apply(lambda x: f"PUB{random.randint(10000, 99999)}")

df_combined

Unnamed: 0,public_id,name,address,latitude,longitude,price_per_hour,capacity,available_space,opening_time
0,PUB96035,Embrach-Rorbas,"Bahnstrasse 9, 8424 Embrach-Rorbas",47.520513,8.585892,1.6,28.0,14.0,Weekdays 9AM-11PM
1,PUB51970,Pratteln,"Güterstrasse 19, 4133 Pratteln",47.522619,7.689327,3.8,58.0,29.0,Monday-Sunday 9AM-11PM
2,PUB81196,Bülach,"Schaffhauserstrasse 100, 8180 Bülach",47.523924,8.537127,1.6,234.0,117.0,Weekdays 9AM-11PM
3,PUB59245,Kradolf,"Hauptstrasse 57, 9214 Kradolf",47.524211,9.202017,1.6,16.0,8.0,Weekdays 6AM-6PM
4,PUB31037,Wiesendangen,"Alte Frauenfelderstrasse 3, 8542 Wiesendangen",47.525776,8.775885,3.8,35.0,17.0,Weekdays 9AM-11PM
...,...,...,...,...,...,...,...,...,...
2528,PUB84573,Parkplatz,Wertheim (TBB),49.762093,9.512663,3.8,9.0,4.0,Weekdays 9AM-11PM
2529,PUB40647,Parkplatz,Wertheim (TBB),49.762799,9.512175,1.6,3.0,1.0,Monday-Sunday 9AM-11PM
2530,PUB21395,Parkplatz,Bestenheid,49.785998,9.501370,3.8,14.0,7.0,Weekdays 9AM-11PM
2531,PUB21501,Parkplatz,Bestenheid,49.786031,9.500884,3.8,11.0,5.0,Weekdays 6AM-6PM


In [9]:
df_combined.to_json('public_parking.json', orient='records', indent=4)

print("DataFrame has been saved to 'public_parking.json'")

DataFrame has been saved to 'public_parking.json'
