# Analysing Homegate data

A plaground to transform and analyise scraped Homegate data.

## 1. Load data

In [1]:
from frictionless import Resource
res = Resource("data/homegate.resource.yaml")
res.to_petl().display(limit=8)

id,object_ref,category,date,type,floor,rent,rent_add,rent_net,rooms,area,year_built,street_number,city,zip
3001314971,ig6dw.46r3z,rent,2021-09-30T14:31:08.316264,,4.0,520,,,,19.0,,Blumenbergplatz 5,St. Gallen,9000
3001087436,146.1.10.800384.d8ee1213-8011-11eb-8e73-005056bdbc06,rent,2021-09-30T14:31:08.376172,,3.0,550,,,,,,Linsebühlstrasse 18,St. Gallen,9000
3001237053,152801.01.00402,rent,2021-09-30T14:31:08.396096,,4.0,530,,,,28.0,1962.0,Vonwilstr. 15,St. Gallen,9000
3001051234,10738.10738.1012,rent,2021-09-30T14:31:08.452852,,1.0,600,,,,,,Brauerstrasse 3,St. Gallen,9000
3000763958,StGallen,rent,2021-09-30T14:31:08.540689,,,550,,,,,,Schützengasse,St. Gallen,9000
3001381704,51219.01.6001,rent,2021-09-30T14:31:08.595315,,5.0,100,,,,,1905.0,Burggraben 26,St. Gallen,9000
3001292565,Linsen68.16841,rent,2021-09-30T14:31:08.617437,,1.0,475,,,,,,Linsenbühlstrasse 68,St. Gallen,9000
3001209766,050-07.07.830758.b9c75053-b227-11eb-820b-005056bdbc06,rent,2021-09-30T14:31:08.636397,,4.0,1850,,,,101.0,2000.0,Oberer Graben 37,St. Gallen,9000


## 2. Geocode addresses

Using Swisstopos Geocoder translate address strings like `Fähnernstrasse 3 9000 St. Gallen` into a `WKT Point` and `EGID`.

In [2]:
from urllib import parse
import requests
from frictionless.plugins.json import JsonDialect
from frictionless import Resource

def geo_code_swisstopo(addressString=None, attribute="geom"):
    params = parse.urlencode(dict(
        type="locations",
        searchText=addressString,  # "Fähnernstrasse 3 9000 St. Gallen"
    ))

    url = f"https://api3.geo.admin.ch/rest/services/ech/SearchServer?{params}"

    res = requests.get(url)
    if res.status_code == 200:
        result = res.json()
        if "results" in result.keys() and len(result["results"]) > 0:
            likely_match = result["results"][0]["attrs"]
            if attribute == "geom":
                return f"POINT ({likely_match['lon']} {likely_match['lat']})"
            elif attribute == "egid":
                return likely_match["featureId"]
            else:
                return None

print(f'GEOM: {geo_code_swisstopo("Fähnernstrasse 3 9000 St. Gallen", "geom")}, EGID: {geo_code_swisstopo("Fähnernstrasse 3 9000 St. Gallen", "egid")}')

GEOM: POINT (9.362982749938965 47.41105270385742), EGID: 1071458_0


In [3]:
from frictionless import transform, steps, Resource

out = transform(
    "data/homegate.resource.yaml",
    steps=[
        steps.table_normalize(),
        steps.field_add(name="address", function=lambda row: f"{row['street_number']}, {row['zip']} {row['city']}"),
        steps.table_normalize(),
        steps.field_add(name="_geom", function=lambda row: geo_code_swisstopo(row["address"], "geom")),
        steps.field_add(name="EGID", function=lambda row: geo_code_swisstopo(row["address"], "egid")),
        steps.table_normalize(),
        steps.table_write(path="data/homegate-geocoded.csv"),
        steps.row_filter(formula="_geom is not None"),
        steps.table_write(path="data/homegate-geocoded.geojson"),
    ]
)

out.to_petl()

id,object_ref,category,date,type,floor,rent,rent_add,rent_net,rooms,area,year_built,street_number,city,zip,address,_geom,EGID
3000883405,114,rent,2021-09-30 14:31:13.513433,,4,855,,,,60.0,,Zürcher Strasse 70,St. Gallen,9000,"Zürcher Strasse 70, 9000 St. Gallen",POINT (9.35018253326416 47.41749572753906),1070320_0
3001167228,23037.02.430010,rent,2021-09-30 14:31:13.617607,,3,1307,,,,93.0,,Langgasse 155,St. Gallen,9008,"Langgasse 155, 9008 St. Gallen",POINT (9.397712707519531 47.44934844970703),1075536_0
3001016630,23036.02.410020,rent,2021-09-30 14:31:13.639333,,1,1290,,,,94.0,,Langgasse 165,St. Gallen,9008,"Langgasse 165, 9008 St. Gallen",POINT (9.39797592163086 47.45053482055664),2363507_0
3001154674,23037.02.400020,rent,2021-09-30 14:31:13.660529,,GF,1350,,,,98.0,,Langgasse 155,St. Gallen,9008,"Langgasse 155, 9008 St. Gallen",POINT (9.397712707519531 47.44934844970703),1075536_0
3001183467,9008,rent,2021-09-30 14:31:13.696220,,GF,1440,,,,,,Goethestrasse 24,St. Gallen,9008,"Goethestrasse 24, 9008 St. Gallen",POINT (9.381213188171387 47.43441390991211),1073168_0


## 3. Basic statistics

In [4]:
from frictionless import transform, steps, Resource
from statistics import mean, median

avg = transform(
    "data/homegate-geocoded.csv",
    steps=[
        steps.row_filter(formula="area is not ''"),
        steps.row_filter(formula="int(area) > 20"),
        steps.table_normalize(),
        steps.field_add(name="price_per_sqm", type="number", formula="int(rent) / int(area)"),
        steps.table_aggregate(group_name="zip", aggregation={"mean": ("price_per_sqm", mean)}),
        steps.table_normalize(),
        steps.row_sort(field_names=["mean"], reverse=True),
    ]
)

avg.to_petl().display(limit=20)

zip,mean
9011,19.912557860030965
9014,18.648775906376255
9015,18.55923119775
9000,18.558260824277088
9012,17.463714734368953
9016,17.11492787250551
9008,17.03641524671511
9010,16.949081180170214


In [5]:
avg = transform(
    "data/homegate-geocoded.csv",
    steps=[
        steps.row_filter(formula="area is not ''"),
        steps.row_filter(formula="int(area) > 20"),
        steps.table_normalize(),
        steps.field_add(name="price_per_sqm", type="number", formula="int(rent) / int(area)"),
        steps.table_aggregate(group_name="zip", aggregation={"median": ("price_per_sqm", median)}),
        steps.table_normalize(),
        steps.row_sort(field_names=["median"], reverse=True),
    ]
)

avg.to_petl().display(limit=20)

zip,median
9011,19.88571428571429
9000,17.72222222222222
9012,17.636363636363637
9014,17.51185322613894
9016,17.4025974025974
9015,17.25
9010,16.8577648766328
9008,16.53488372093023
