# Analysing Homegate data

A plaground to transform and analyise scraped Homegate data.

## 1. Load data

In [None]:
from frictionless import Resource
res = Resource("data/homegate.resource.yaml")
res.to_petl().display(limit=8)

## 2. Geocode addresses

Using Swisstopos Geocoder translate address strings like `Fähnernstrasse 3 9000 St. Gallen` into a `WKT Point` and `EGID`.

In [None]:
from urllib import parse
import requests
from frictionless.plugins.json import JsonDialect
from frictionless import Resource

def geo_code_swisstopo(addressString=None, attribute="geom"):
    params = parse.urlencode(dict(
        type="locations",
        searchText=addressString,  # "Fähnernstrasse 3 9000 St. Gallen"
    ))

    url = f"https://api3.geo.admin.ch/rest/services/ech/SearchServer?{params}"

    res = requests.get(url)
    if res.status_code == 200:
        result = res.json()
        if "results" in result.keys() and len(result["results"]) > 0:
            likely_match = result["results"][0]["attrs"]
            if attribute == "geom":
                return f"POINT ({likely_match['lon']} {likely_match['lat']})"
            elif attribute == "egid":
                return likely_match["featureId"]
            else:
                return None

print(f'GEOM: {geo_code_swisstopo("Fähnernstrasse 3 9000 St. Gallen", "geom")}, EGID: {geo_code_swisstopo("Fähnernstrasse 3 9000 St. Gallen", "egid")}')

In [None]:
from frictionless import transform, steps, Resource

out = transform(
    "data/homegate.resource.yaml",
    steps=[
        steps.table_normalize(),
        steps.field_add(name="address", function=lambda row: f"{row['street_number']}, {row['zip']} {row['city']}"),
        steps.table_normalize(),
        steps.field_add(name="_geom", function=lambda row: geo_code_swisstopo(row["address"], "geom")),
        steps.field_add(name="EGID", function=lambda row: geo_code_swisstopo(row["address"], "egid")),
        steps.table_normalize(),
        steps.table_write(path="data/homegate-geocoded.csv"),
        steps.row_filter(formula="_geom is not None"),
        steps.table_write(path="data/homegate-geocoded.geojson"),
    ]
)

out.to_petl()

## 3. Basic statistics

In [None]:
from frictionless import transform, steps, Resource
from statistics import mean, median

avg = transform(
    "data/homegate-geocoded.csv",
    steps=[
        steps.row_filter(formula="area is not ''"),
        steps.row_filter(formula="int(area) > 20"),
        steps.table_normalize(),
        steps.field_add(name="price_per_sqm", type="number", formula="int(rent) / int(area)"),
        steps.table_aggregate(group_name="zip", aggregation={"mean": ("price_per_sqm", mean)}),
        steps.table_normalize(),
        steps.row_sort(field_names=["mean"], reverse=True),
    ]
)

avg.to_petl().display(limit=20)

In [None]:
avg = transform(
    "data/homegate-geocoded.csv",
    steps=[
        steps.row_filter(formula="area is not ''"),
        steps.row_filter(formula="int(area) > 20"),
        steps.table_normalize(),
        steps.field_add(name="price_per_sqm", type="number", formula="int(rent) / int(area)"),
        steps.table_aggregate(group_name="zip", aggregation={"median": ("price_per_sqm", median)}),
        steps.table_normalize(),
        steps.row_sort(field_names=["median"], reverse=True),
    ]
)

avg.to_petl().display(limit=20)

## 4. Styling data

In [28]:
from frictionless import transform, steps, Resource

categories = [
    { "from": 0, "to": 10, "cat": "a"},
    { "from": 10, "to": 14, "cat": "b"},
    { "from": 14, "to": 18, "cat": "c"},
    { "from": 18, "to": 22, "cat": "d"},
    { "from": 22, "to": 24, "cat": "e"},
    { "from": 24, "to": 35, "cat": "f"},
]

colors = {
    "a": { "fillColor": "#0028b8", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
    "b": { "fillColor": "#455db5", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
    "c": { "fillColor": "#8a93b2", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
    "d": { "fillColor": "#bfa98f", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
    "e": { "fillColor": "#df9848", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
    "f": { "fillColor": "#ff8800", "fillOpacity": 0.8, "color": "#ffffff", "weight": 1.5, "opacity": 0.9, "radius": 45 },
}

def category_mapping(value, mappings):
    if len(mappings) > 0:
        for mapping in mappings:
            if "from" in mapping.keys() and "to" in mapping.keys() and "cat" in mapping.keys():
                if value >= mapping["from"] and value < mapping["to"]:
                    return mapping["cat"]
    return None

def color_mapping(value, mapping, key):
    if value in mapping.keys():
        if key in mapping[value].keys():
            return mapping[value][key]
    return None


styled = transform(
    "data/homegate-geocoded.csv",
    steps=[
        steps.row_filter(formula="area is not ''"),
        steps.row_filter(formula="int(area) > 20"),
        steps.table_normalize(),
        steps.field_add(name="price_per_sqm", type="number", formula="int(rent) / int(area)"),
        steps.table_normalize(),
        steps.field_add(name="cat", type="string", function=lambda x: category_mapping(x["price_per_sqm"], categories)),
        steps.table_normalize(),
        steps.field_add(name="fillColor", type="string", function=lambda x: color_mapping(x["cat"], colors, "fillColor")),
        steps.field_add(name="fillOpacity", type="number", function=lambda x: color_mapping(x["cat"], colors, "fillOpacity")),
        steps.field_add(name="color", type="string", function=lambda x: color_mapping(x["cat"], colors, "color")),
        steps.field_add(name="weight", type="number", function=lambda x: color_mapping(x["cat"], colors, "weight")),
        steps.field_add(name="opacity", type="number", function=lambda x: color_mapping(x["cat"], colors, "opacity")),
        steps.field_add(name="radius", type="integer", function=lambda x: color_mapping(x["cat"], colors, "radius")),
        steps.table_normalize(),
        steps.table_write(path="data/homegate-styled.csv"),
    ]
)

styled.to_petl()

id,object_ref,category,date,type,floor,rent,rent_add,rent_net,rooms,area,year_built,street_number,city,zip,address,_geom,EGID,price_per_sqm,cat,fillColor,fillOpacity,color,weight,opacity,radius
3001284021,A8365,rent,2021-09-30 14:31:14,,,1005,,,,39,,Bürglistrasse 2,St. Gallen,9000,"Bürglistrasse 2, 9000 St. Gallen",POINT (9.38341999053955 47.42958450317383),10729150,25.76923076923077,f,#ff8800,0.8,#ffffff,1.5,0.9,45
3001055869,Bedastrasse.11.10,rent,2021-09-30 14:31:14,,3,990,,,,45,,Bedastrasse 11,St. Gallen,9000,"Bedastrasse 11, 9000 St. Gallen",POINT (9.390883445739746 47.42866134643555),10760060,22.0,e,#df9848,0.8,#ffffff,1.5,0.9,45
3001328645,37011.01.420030,rent,2021-09-30 14:31:14,,2,960,,,,60,1920.0,Badstrasse 8,St. Gallen,9000,"Badstrasse 8, 9000 St. Gallen",POINT (9.352869987487793 47.41908645629883),10703560,16.0,c,#8a93b2,0.8,#ffffff,1.5,0.9,45
3001328644,37011.01.410030,rent,2021-09-30 14:31:14,,1,960,,,,60,1920.0,Badstrasse 8,St. Gallen,9000,"Badstrasse 8, 9000 St. Gallen",POINT (9.352869987487793 47.41908645629883),10703560,16.0,c,#8a93b2,0.8,#ffffff,1.5,0.9,45
3001327306,(6653123),rent,2021-09-30 14:31:14,,GF,1140,160.0,980.0,,48,,Glaserstrasse 3A,St. Gallen,9000,"Glaserstrasse 3A, 9000 St. Gallen",POINT (9.354249954223633 47.41887664794922),10702640,23.75,e,#df9848,0.8,#ffffff,1.5,0.9,45


## 5. Build a package

In [32]:
import json
from frictionless import Resource, Package

## Setup data

data = transform(
    Resource("data/homegate-styled.csv"),
    steps = [
        steps.table_normalize(),
        steps.field_update(name="_geom", type="string"),
        steps.table_normalize(),
    ]
)
data.write(path="data/homegate-styled.geojson")

styled.name = "data"
styled["mediatype"] = "application/vnd.simplestyle-extended"
with open("data/homegate-styled.geojson") as json_file:
    styled.data = json.load(json_file)
styled

background_map = dict(
    name="mapbox-background",
    path = "mapbox://styles/gemeindescan/ckc4sha4310d21iszp8ri17u2",
    mediatype = "application/vnd.mapbox-vector-tile",
)

wms_noise = dict(
    name="wms-street-day-noise",
    path = "https://wms.geo.admin.ch",
    mediatype = "application/vnd.wms",
    parameters = {
        "format": "image/png",
        "transparent": True,
        "layers": "ch.bafu.laerm-strassenlaerm_tag",
        "opacity": 0.5
    }
)

## Built package

pkg = Package(
    name="01-rent-prices",
    resources=[
        styled, wms_noise, background_map
    ],
    sources=[
        {
            "url": "https://homegate.ch",
            "title": "Homegate",
        },
        {
            "title": "Karte: Mapbox, © OpenStreetMap",
            "url": "https://www.openstreetmap.org/copyright"
        },
    ]
)

pkg["views"] = [
        {
            "name": "mapview",
            "resources": [
                "mapbox-background",
                "wms-street-day-noise",
                "data",
            ],
            "spec": {
                "attribution": "",
                "bounds": [
                    "geo:47.40515506820663,9.321516279706524",
                    "geo:47.44307355684985,9.395247605270359"
                ],
                "title": "Mietpreise und Lärmimmissionen Tag",
                "description": "Mietpreise Stadt St. Gallen, Sample: Ende September 2021 und Lärm Immissionen Strasse Tag.",
                "legend": [
                    {
                        "fillColor": "#0028b8",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "0-10 CHF/qm",
                    },
                    {
                        "fillColor": "#455db5",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "10-14 CHF/qm",
                    },
                    {
                        "fillColor": "#8a93b2",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "14-18 CHF/qm",
                    },
                    {
                        "fillColor": "#bfa98f",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "18-22 CHF/qm",
                    },
                    {
                        "fillColor": "#df9848",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "22-24 CHF/qm",
                    },
                    {
                        "fillColor": "#ff8800",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "24-34 CHF/qm",
                    },
                ]
            },
            "specType": "gemeindescanSnapshot"
        }
    ]

with open("snapshots/01-rent-prices.json", "w") as pkg_file:
    json.dump(pkg, pkg_file, indent=2)

In [34]:
import json
from frictionless import Resource, Package

## Setup data

data = transform(
    Resource("data/homegate-styled.csv"),
    steps = [
        steps.table_normalize(),
        steps.field_update(name="_geom", type="string"),
        steps.table_normalize(),
    ]
)
data.write(path="data/homegate-styled.geojson")

styled.name = "data"
styled["mediatype"] = "application/vnd.simplestyle-extended"
with open("data/homegate-styled.geojson") as json_file:
    styled.data = json.load(json_file)
styled

background_map = dict(
    name="mapbox-background",
    path = "mapbox://styles/gemeindescan/ckc4sha4310d21iszp8ri17u2",
    mediatype = "application/vnd.mapbox-vector-tile",
)

wms_bauzonen = dict(
    name="wms-bauzonen",
    path = "https://wms.geo.admin.ch",
    mediatype = "application/vnd.wms",
    parameters = {
        "format": "image/png",
        "transparent": True,
        "layers": "ch.are.bauzonen",
        "opacity": 0.5
    }
)

## Built package

pkg = Package(
    name="02-rent-bauzonen",
    resources=[
        styled, wms_bauzonen, background_map
    ],
    sources=[
        {
            "url": "https://homegate.ch",
            "title": "Homegate",
        },
        {
            "title": "Karte: Mapbox, © OpenStreetMap",
            "url": "https://www.openstreetmap.org/copyright"
        },
    ]
)

pkg["views"] = [
        {
            "name": "mapview",
            "resources": [
                "mapbox-background",
                "wms-bauzonen",
                "data",
            ],
            "spec": {
                "attribution": "",
                "bounds": [
                    "geo:47.40515506820663,9.321516279706524",
                    "geo:47.44307355684985,9.395247605270359"
                ],
                "title": "Mietpreise und Bauzonen",
                "description": "Mietpreise Stadt St. Gallen, Sample: Ende September 2021 und Bauzonen.",
                "legend": [
                    {
                        "fillColor": "#0028b8",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "0-10 CHF/qm",
                    },
                    {
                        "fillColor": "#455db5",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "10-14 CHF/qm",
                    },
                    {
                        "fillColor": "#8a93b2",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "14-18 CHF/qm",
                    },
                    {
                        "fillColor": "#bfa98f",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "18-22 CHF/qm",
                    },
                    {
                        "fillColor": "#df9848",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "22-24 CHF/qm",
                    },
                    {
                        "fillColor": "#ff8800",
                        "fillOpacity": 0.8,
                        "strokeColor": "#ffffff",
                        "strokeOpacity": 0.9,
                        "strokeWidth": 1.5,
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "label": "24-34 CHF/qm",
                    },
                    {
                        "label": "Wohnzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "fillColor": "#F3AA3C",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Arbeitszonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "fillColor": "#C49BF9",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Mischzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "fillColor": "#E9A8A1",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Zentrumszonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": True,
                        "fillColor": "#D3B39C",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Zonen für öffentliche Nutzungen",
                        "size": 1,
                        "shape": "circle",
                        "primary": False,
                        "fillColor": "#8C8C8C",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "eingeschränkte Bauzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": False,
                        "fillColor": "#A1FC5C",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Tourismus- und Freizeitzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": False,
                        "fillColor": "#F29EF9",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "Verkehrszonen innerhalb der Bauzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": False,
                        "fillColor": "#CCCCCC",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    },
                    {
                        "label": "weitere Bauzonen",
                        "size": 1,
                        "shape": "circle",
                        "primary": False,
                        "fillColor": "#DFD4FC",
                        "fillOpacity": 0.5,
                        "strokeColor": "#222",
                        "strokeWidth": 1,
                        "strokeOpacity": 0
                    }
                ]
            },
            "specType": "gemeindescanSnapshot"
        }
    ]

with open("snapshots/02-rent-bauzonen.json", "w") as pkg_file:
    json.dump(pkg, pkg_file, indent=2)