In [1]:
import bs4
import requests
import json 
import re

import pandas as pd
import numpy as np

from pymongo import MongoClient

from geopy.distance import geodesic
import osmnx as ox
import networkx as nx
import geopy
import geopandas as gpd
from shapely.geometry import Point

In [2]:
def set_up_mongo(client_str,database_str,collection_str):
    client = MongoClient(client_str) #connect to mongodb client
    db = client[database_str] #connect to database

    existing_collections = db.list_collection_names() #check that dbs collections
    if collection_str not in existing_collections:
        db.create_collection(collection_str) #create collection if needed
    
    my_collection = db[collection_str] #connect to collection

    return my_collection

In [3]:
collection = set_up_mongo('mongodb://localhost:27017','webscraping_dataLabKiel','digitized_planet_indices') 

## digitize the planet

- comprehensive collection of digitized protected areas and regulations for recreational use of nature
- https://content.digitizetheplanet.org/en/

since on this page a lot of information is missing, just collect the uuids first, because we can use them to query for protected area details again

## get all uuids to get more info about areas -> write them into index collection

In [24]:
url = "https://content.digitizetheplanet.org/api/protectedarea/?format=json&page=1"
response = requests.get(url).json()

while response["next"] is not None:
    result_list = response["results"]
    for e in result_list:
        try:
            uuid = e["uuid"]
            date_mod = e["date_modified"]
            doc = {"uuid":uuid, "date_modified":date_mod,"collected":False}
            collection.insert_one(doc)
        except:
            pass
    url = response["next"]
    response = requests.get(url).json()


url = "https://content.digitizetheplanet.org/api/protectedarea/?format=json&page=97"
response = requests.get(url).json()
result_list = response["results"]
for e in result_list:
    try:
        uuid = e["uuid"]
        date_mod = e["date_modified"]
        doc = {"uuid":uuid, "date_modified":date_mod,"collected":False}
        collection.insert_one(doc)
    except:
        pass

---

# now collect info about parks in schleswig holstein

In [None]:
# either state == Schleswig-Holstein or name contains schleswig holstein tried this: 17 results
# now via is in geometry

In [71]:
collection_indices = set_up_mongo('mongodb://localhost:27017','webscraping_dataLabKiel','digitized_planet_indices') 
collection_data = set_up_mongo('mongodb://localhost:27017','webscraping_dataLabKiel','digitized_planet_v2') 

In [76]:
schleswig_holstein = gpd.read_file("sh_boundaries.geojson")

for x in collection_indices.find({"collected":False}):
    uuid = x["uuid"]
    obj_id = x["_id"]

    url = f"https://content.digitizetheplanet.org/api/protectedarea/{uuid}/"
    response = requests.get(url).json()

    if response != []: 
        centroid = Point(response["centroid"]["coordinates"])
        if schleswig_holstein.geometry.contains(centroid).any():
            data_obj = {"states":response["states"],
                        "countries":response["countries"],
                        "districts":response["districts"],
                        "organization":response["organization"],
                        "category":response["category"],
                        "rules":response["rules"],
                        "date_modified":response["date_modified"],
                        "name":response["name"],
                        "osm_id":response["osm_id"],
                        "geometry":response["geometry"],
                        "centroid":response["centroid"],
                        "area":response["area"],
                        "api_url":response["dtp_api_url"],
                        "geometry_source":response["geometry_source_organization"],
                        }
            collection_data.insert_one(data_obj)
            collection_indices.update_one({"_id":obj_id},{"$set":{"collected":True}})


## make it more into nice schema

In [12]:
collection_data = set_up_mongo('mongodb://localhost:27017','webscraping_dataLabKiel','digitized_planet_v2') 

In [11]:
for e in collection_data.find():
    id = e["_id"]
    lon = e["centroid"]["coordinates"][0]
    lat = e["centroid"]["coordinates"][1]
    collection_data.update_one({"_id":id},{"$set":{"lon":lon,"lat":lat}})

In [16]:
for e in collection_data.find():
    print(e["geometry"]["coordinates"])

[[[[9.95935650568003, 54.305757078962], [9.96049308761556, 54.3057480042106], [9.96078808017067, 54.3067655146191], [9.96085212295574, 54.3072049180047], [9.96089381438935, 54.3074219034638], [9.96097084347314, 54.3076902530261], [9.96101034757858, 54.3078278765794], [9.96125981838612, 54.3081593127689], [9.96155120622528, 54.30846671364], [9.96225773799935, 54.3082948649486], [9.96325460188799, 54.3083850927689], [9.96410418190873, 54.3076828968886], [9.96402289650042, 54.3075403931435], [9.96392682894396, 54.3072994217433], [9.96380599034406, 54.3071039747183], [9.96381501986911, 54.3069377049176], [9.96371008705029, 54.3065218101521], [9.96493807236852, 54.3063016488809], [9.96616226546868, 54.30604596649], [9.96639281174597, 54.3062481870957], [9.96681010550392, 54.3063017493557], [9.9670521078266, 54.3063509255172], [9.96711177951314, 54.3064957389594], [9.96708164505006, 54.3067348258744], [9.96708389945199, 54.3068308430188], [9.96726917864559, 54.3068533612336], [9.968232964915