# 03 Preprocess Septima

Cut all Septima data to contain only region-excerpts, saved into `../data/processed/{regionname}/{datatype}/{datasetname}.gpkg`, where datapype E [point, line, poly] and datasetname E all septima datasets.

This assumes that all data sets from [Septima](https://septima.dk/rida-web/) have been downloaded and saved locally to `data/septima/` (with the same file structure and names as on the website).

In [1]:
# basic stuff
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import pickle

# gis stuff
os.environ['USE_PYGEOS'] = '0' # pygeos/shapely2.0/osmnx conflict solving
import geopandas as gpd
import contextily as cx
from shapely.geometry import Point, LineString
import momepy

# network stuff
import networkx as nx
import osmnx as ox

**Import region boundaries and create subfolders**

In [2]:
regions = ["stevns", "skjern", "aabenraa"]

polydict = {}
for region in regions:
    polydict[region] = gpd.read_file(f"../data/processed/{region}/region.gpkg").loc[0,"geometry"]

In [11]:
# Subfolders for regions
for region in regions:
    os.makedirs(f"../data/processed/{region}/poly/", exist_ok=True)
    os.makedirs(f"../data/processed/{region}/line/", exist_ok=True)
    os.makedirs(f"../data/processed/{region}/point/", exist_ok=True)

***
# Cut all polygon data

**Arealanvendelse**

In [80]:
gdf = gpd.read_file("../data/septima/Arealanvendelse/land_anvendelse/land_anvendelse.shp")

gdf.drop(["tmp_id", "kilde"], axis = 1, inplace = True)

gdf = gdf.explode(index_parts = False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/poly/areal.gpkg", index = False)

del(gdf, gdf_cut)

**Ejerskab**

In [74]:
gdf = gpd.read_file("../data/septima/Ejerskab/land_ejerskab/land_ejerskab.shp")

gdf.drop(["tmp_id", "gruppe", "kilde", "importeret"], axis = 1, inplace = True)

gdf = gdf.explode(index_parts = False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)

    gdf_cut.to_file(f"../data/processed/{region}/poly/ejerskab.gpkg", index = False)

del(gdf, gdf_cut)

**Landskaber**

In [83]:
# BESKYTTET NATUR
gdf = gpd.read_file("../data/septima/Landskaber og beskyttet natur/land_beskyttnatur_flade/land_beskyttnatur_flade.shp")
gdf.drop(["tmp_id", "refid", "type", "kilde", "importeret"], axis = 1, inplace = True)
gdf = gdf.explode(index_parts = False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)

    gdf_cut.to_file(f"../data/processed/{region}/poly/beskyttet-natur.gpkg", index = False)
del(gdf, gdf_cut)

In [92]:
# NATURPARK
gdf = gpd.read_file("../data/septima/Landskaber og beskyttet natur/land_frednatpark/land_frednatpark.shp")
gdf.drop(["tmp_id", "refid", "kilde", "importeret"], axis = 1, inplace = True)
gdf = gdf.explode(index_parts = False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)

    gdf_cut.to_file(f"../data/processed/{region}/poly/natpark.gpkg", index = False)
del(gdf, gdf_cut)

In [103]:
# VAERDIFULD
gdf = gpd.read_file("../data/septima/Landskaber og beskyttet natur/land_vaerdifuld/land_vaerdifuld.shp")

gdf.drop(["tmp_id", "refid", "cvrname", "komnr"], axis = 1, inplace = True)

gdf = gdf.explode(index_parts = False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/poly/vaerdifuld.gpkg", index = False)

del(gdf, gdf_cut)


**Merge *beskyttet natur* and *naturparken***

In [97]:
for region in regions:
    gdf1 = gpd.read_file(f"../data/processed/{region}/poly/natpark.gpkg")[["geometry"]]
    gdf2 = gpd.read_file(f"../data/processed/{region}/poly/beskyttet-natur.gpkg")
    if gdf1.crs == gdf2.crs:
        poly1 = gdf1.unary_union
        poly2 = gdf2.unary_union
        gdf = gpd.GeoDataFrame({
            "geometry": [gpd.GeoDataFrame({
                "geometry": [poly1, poly2]}, crs = gdf1.crs).unary_union]}, crs = gdf1.crs).explode(index_parts=False)
        gdf.to_file(f"../data/processed/{region}/poly/nature.gpkg", index = False)
        del(gdf, gdf1, gdf2, poly1, poly2)

***
# Cut all point data

**Attraktioner**

In [165]:
beso = gpd.read_file("../data/septima/Attraktioner/land_besoegs/land_besoegs.shp")
beso = beso[["type", "geometry"]]
beso["land_type"] = "besoegs"

In [166]:
fort = gpd.read_file("../data/septima/Attraktioner/land_fortid/land_fortid.shp")
fort = fort[["type", "geometry"]]
fort["land_type"] = "fortid"

In [167]:
merk = gpd.read_file("../data/septima/Attraktioner/land_landemaerke/land_landemaerke.shp")
merk = merk[["type", "geometry"]]
merk["land_type"] = "landemaerke"

In [169]:
udfl = gpd.read_file("../data/septima/Attraktioner/land_udflugt/land_udflugt.shp")
udfl = udfl[["type", "geometry"]]
udfl["land_type"] = "udflugt"

In [170]:
gdf = pd.concat([beso,fort,merk,udfl]).explode(index_parts=False).reset_index(drop=True)

In [173]:
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/point/attraktioner.gpkg", index = False)
del(gdf, gdf_cut)

**Faciliteter**

In [156]:
indkoeb = gpd.read_file("../data/septima/Faciliteter/facilit_indkoeb/facilit_indkoeb.shp")
indkoeb = indkoeb[["type", "geometry"]]
indkoeb["facility_type"] = "indkoeb"

In [157]:
overnat = gpd.read_file("../data/septima/Faciliteter/facilit_overnatning/facilit_overnatning.shp")
overnat = overnat[["type", "geometry"]]
overnat["facility_type"] = "overnatning"

In [158]:
raste = gpd.read_file("../data/septima/Faciliteter/facilit_rasteplads/facilit_rasteplads.shp")
raste = raste[["type", "geometry"]]
raste["facility_type"] = "rasteplads"

In [159]:
serv = gpd.read_file("../data/septima/Faciliteter/facilit_service/facilit_service.shp")
serv = serv[["type", "geometry"]]
serv["facility_type"] = "service"

In [163]:
gdf = pd.concat([serv,raste,overnat,indkoeb]).explode(index_parts=False).reset_index(drop=True)

In [164]:
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/point/faciliteter.gpkg", index = False)
del(gdf, gdf_cut)

***
# Cut all line data

**Hastighedsgrænser**

In [None]:
# DATA ISSUE

**Beskyttet natur linje**

In [14]:
gdf = gpd.read_file("../data/septima/Landskaber og beskyttet natur/land_beskyttnatur_linje/land_beskyttnatur_linje.shp")

gdf = gdf[["type", "geometry"]]

gdf = gdf.explode(index_parts = False)


for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/vandlob.gpkg", index = False)
del(gdf, gdf_cut)

**Skiltede ruter**

In [27]:
### CYKEL

gdf = gpd.read_file("../data/septima/Skiltede ruter/rute_cykel/rute_cykel.shp")

gdf = gdf["geometry"]

gdf = gdf.explode(index_parts=False)

for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/cykelrute.gpkg", index = False)
del(gdf, gdf_cut)

In [31]:
### VANDRE 

gdf = gpd.read_file("../data/septima/Skiltede ruter/rute_vandre/rute_vandre.shp")
gdf = gdf["geometry"]
gdf = gdf.explode(index_parts=False)
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/vandrerute.gpkg", index = False)
del(gdf, gdf_cut)

In [56]:
### MARG

gdf = gpd.read_file("../data/septima/Skiltede ruter/rute_anden/rute_anden.shp")
gdf = gdf["geometry"]
gdf = gdf.explode(index_parts=False)
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/margrute.gpkg", index = False)
del(gdf, gdf_cut)

**Vejmyndighed**

In [12]:
gdf = gpd.read_file("../data/septima/Vejmyndighed/vej_myndighed/vej_myndighed.shp")

gdf = gdf[["type", "geometry"]]
gdf = gdf.explode(index_parts=False)
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/myndighed.gpkg", index = False)
del(gdf, gdf_cut)

**Vejtyper**

In [57]:
gdf = gpd.read_file("../data/septima/Vejtyper/vej_type/vej_type.shp")
gdf = gdf[["gruppe", "type", "geometry"]]
gdf = gdf.explode(index_parts=False)
for region in regions:
    gdf_cut = gdf[gdf.covered_by(polydict[region])].copy().reset_index(drop=True)
    gdf_cut.to_file(f"../data/processed/{region}/line/vejtype.gpkg", index = False)
del(gdf, gdf_cut)

**Vej basislinje**
same as vej_type but not classified (?)

In [59]:
# gdf = gpd.read_file("../data/septima/Vejtyper/vej_basislinje/vej_basislinje.shp")