# Apartement pice prediction

## Import libraries

In [27]:
import gradio as gr
import pickle
import numpy as np
import pandas as pd
from geopy.distance import geodesic
from geopy.geocoders import Nominatim

## Import data

In [28]:
# Load the data
df_bfs_data = pd.read_csv("bfs_municipality_and_tax_data.csv", sep=',', encoding='utf-8')
df_apartments = pd.read_csv("apartments_data_enriched_lat_lon_combined.csv", sep=',', encoding='utf-8')

df_bfs_data['tax_income'] = df_bfs_data['tax_income'].astype(str).str.replace("'", "").astype(float)

## Locations & Train stations

In [29]:
locations = {
    "Zürich": 261,
    "Kloten": 62,
    "Uster": 198,
    "Illnau-Effretikon": 296,
    "Feuerthalen": 27,
    "Pfäffikon": 177,
    "Ottenbach": 11,
    "Dübendorf": 191,
    "Richterswil": 138,
    "Maur": 195,
    "Embrach": 56,
    "Bülach": 53,
    "Winterthur": 230,
    "Oetwil am See": 157,
    "Russikon": 178,
    "Obfelden": 10,
    "Wald (ZH)": 120,
    "Niederweningen": 91,
    "Dällikon": 84,
    "Buchs (ZH)": 83,
    "Rüti (ZH)": 118,
    "Hittnau": 173,
    "Bassersdorf": 52,
    "Glattfelden": 58,
    "Opfikon": 66,
    "Hinwil": 117,
    "Regensberg": 95,
    "Langnau am Albis": 136,
    "Dietikon": 243,
    "Erlenbach (ZH)": 151,
    "Kappel am Albis": 6,
    "Stäfa": 158,
    "Zell (ZH)": 231,
    "Turbenthal": 228,
    "Oberglatt": 92,
    "Winkel": 72,
    "Volketswil": 199,
    "Kilchberg (ZH)": 135,
    "Wetzikon (ZH)": 121,
    "Zumikon": 160,
    "Weisslingen": 180,
    "Elsau": 219,
    "Hettlingen": 221,
    "Rüschlikon": 139,
    "Stallikon": 13,
    "Dielsdorf": 86,
    "Wallisellen": 69,
    "Dietlikon": 54,
    "Meilen": 156,
    "Wangen-Brüttisellen": 200,
    "Flaach": 28,
    "Regensdorf": 96,
    "Niederhasli": 90,
    "Bauma": 297,
    "Aesch (ZH)": 241,
    "Schlieren": 247,
    "Dürnten": 113,
    "Unterengstringen": 249,
    "Gossau (ZH)": 115,
    "Oberengstringen": 245,
    "Schleinikon": 98,
    "Aeugst am Albis": 1,
    "Rheinau": 38,
    "Höri": 60,
    "Rickenbach (ZH)": 225,
    "Rafz": 67,
    "Adliswil": 131,
    "Zollikon": 161,
    "Urdorf": 250,
    "Hombrechtikon": 153,
    "Birmensdorf (ZH)": 242,
    "Fehraltorf": 172,
    "Weiach": 102,
    "Männedorf": 155,
    "Küsnacht (ZH)": 154,
    "Hausen am Albis": 4,
    "Hochfelden": 59,
    "Fällanden": 193,
    "Greifensee": 194,
    "Mönchaltorf": 196,
    "Dägerlen": 214,
    "Thalheim an der Thur": 39,
    "Uetikon am See": 159,
    "Seuzach": 227,
    "Uitikon": 248,
    "Affoltern am Albis": 2,
    "Geroldswil": 244,
    "Niederglatt": 89,
    "Thalwil": 141,
    "Rorbas": 68,
    "Pfungen": 224,
    "Weiningen (ZH)": 251,
    "Bubikon": 112,
    "Neftenbach": 223,
    "Mettmenstetten": 9,
    "Otelfingen": 94,
    "Flurlingen": 29,
    "Stadel": 100,
    "Grüningen": 116,
    "Henggart": 31,
    "Dachsen": 25,
    "Bonstetten": 3,
    "Bachenbülach": 51,
    "Horgen": 295
}

train_stations = {
    "Zürich HB": (47.378177, 8.540192),
    "Zürich Oerlikon": (47.4116, 8.5446),
    "Zürich Altstetten": (47.3913, 8.4850),
    "Winterthur": (47.4998, 8.7257),
    "Dietikon": (47.4052, 8.4009),
    "Uster": (47.3476, 8.7207),
    "Dübendorf": (47.3978, 8.6189),
    "Wetzikon": (47.3275, 8.7976),
    "Bülach": (47.5202, 8.5385),
    "Meilen": (47.2699, 8.6458),
    "Thalwil": (47.2911, 8.5647),
    "Horgen": (47.2597, 8.5975)
}

## Functions

In [30]:
# Funktion zur Geocodierung (Adresse zu Koordinaten)
def get_coordinates_from_address(address):
    geolocator = Nominatim(user_agent="geoapiExercises")
    location = geolocator.geocode(address)
    if location:
        return location.latitude, location.longitude
    else:
        return None, None
    

# Funktion zur Berechnung der Distanz zum nächsten Bahnhof
def get_nearest_station_distance(lat, lon):
    return min([geodesic((lat, lon), coords).km for coords in train_stations.values()])

# Falls 'distance_to_train_station' nicht existiert, berechne sie
if 'lat' in df_apartments.columns and 'lon' in df_apartments.columns:
    df_apartments['distance_to_train_station'] = df_apartments.apply(lambda row: get_nearest_station_distance(row['lat'], row['lon']), axis=1)
else:
    raise KeyError("Latitude und Longitude Spalten fehlen in den Daten. Kann 'distance_to_train_station' nicht berechnen.")


## Train model

In [31]:
# Neues Modell trainieren
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

features = ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'distance_to_train_station']
target = 'price'

df_apartments = df_apartments.dropna(subset=features + [target])
X = df_apartments[features]
y = df_apartments[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
random_forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest_model.fit(X_train, y_train)

## Save model

In [32]:
import pickle

# save model to file
model_filename = "random_forest_regression_appartmentprice_prediction.pkl"
with open(model_filename, mode="bw") as f:
    pickle.dump(random_forest_model, file=f)

## Core prediction function

In [33]:
# Vorhersage-Funktion
def predict_apartment(rooms, area, town, address):
    lat, lon = get_coordinates_from_address(address)
    if lat is None or lon is None:
        return "Ungültige Adresse"
    
    bfs_number = df_bfs_data[df_bfs_data['bfs_name'] == town]['bfs_number'].values
    if len(bfs_number) == 0:
        return "Ungültige Stadtwahl"
    bfs_number = bfs_number[0]
    
    df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
    df.reset_index(inplace=True)
    
    if len(df) != 1:  # if there are more than two records with the same bfs_number return -1
        return -1
    
    df.loc[0, 'rooms'] = rooms
    df.loc[0, 'area'] = area
    df.loc[0, 'distance_to_train_station'] = get_nearest_station_distance(lat, lon)
    
    prediction = random_forest_model.predict(df[['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'distance_to_train_station']])
    return np.round(prediction[0], 0)

## Test prediction function

In [34]:
# Test prediction function
predict_apartment(3, 100, "Zürich", "Bahnhofstrasse 1, Zürich")

GeocoderInsufficientPrivileges: Non-successful status code 403

## Create Gradio UI

In [None]:
# Gradio UI erstellen
iface = gr.Interface(
    fn=predict_apartment,
    inputs=[
        gr.Number(label="Anzahl Zimmer"),
        gr.Number(label="Wohnfläche (m²)"),
        gr.Dropdown(choices=list(df_bfs_data['bfs_name'].unique()), label="Stadt"),
        gr.Textbox(label="Adresse")
    ],
    outputs=[gr.Number(label="Geschätzter Mietpreis (CHF)")],
    title="Wohnungsmietpreis Vorhersage",
    description="Dieses Tool schätzt den Mietpreis einer Wohnung basierend auf Standort, Größe, Anzahl Zimmer und Entfernung zum nächsten Bahnhof.",
    examples=[[3.5, 80, "Zürich", "Bahnhofstrasse 1, Zürich"], [2.5, 60, "Winterthur", "Technikumstrasse 10, Winterthur"]]
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


