In [225]:
from abc import ABC, abstractmethod
from typing import Any
from pymongo import MongoClient
from geopy.distance import geodesic
import folium
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import shape, Point
from shapely import from_wkt
import json
from thefuzz import fuzz
from thefuzz import process

# 1 data base handlers

In [226]:
class DBHandlers(ABC):

    @abstractmethod 
    def connect_db(self,Client,client_str,db_str,coll_str): 
        client = Client(client_str) #connect to mongodb client
        db = client[db_str] #connect to database
        existing_collections = db.list_collection_names() #check that dbs collections
        if coll_str not in existing_collections:
            db.create_collection(coll_str) #create collection if needed
        self.collection = db[coll_str] #connect to collection

    @abstractmethod
    def design_query_dict(self):
        query_terms = [{},{}] # {"$or":[{},{}]} returns everything
        return query_terms


    @abstractmethod
    def retrieve_data(self):
        self.data = [elem for elem in self.collection.find(self.query_dict)]

    @abstractmethod
    def check_click_radius(self,click_point,radius,data):
        in_radius = []
        for e in data:
            try:
                #try like this because of naming irregularities with yp and osm data
                try:
                    lat, lon = e["lat"],e["lon"]
                except KeyError:
                    lat, lon = e["latitude"],e["longitude"]
                other_point = (lat,lon)
                dist = geodesic(click_point, other_point).km
                if dist < radius:
                    in_radius.append(e)
            except:
                pass
        self.results = in_radius

    @abstractmethod
    def click_polygon(self,click_point, data):
        #get adminlevel=6 boundaries data
        df = pd.read_csv("sh_boundaries_6.csv")
        df = df[["name","geometry"]]
        df["geometry"] = df["geometry"].apply(from_wkt)
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.set_crs("EPSG:4326",inplace=True)

        #reorder for check
        click_point = [click_point]
        reordered_point = [(lon, lat) for lat, lon in click_point]
        reordered_point = Point(reordered_point)

        #check for intersection
        poly = df[df["geometry"].apply(lambda x: x.contains(reordered_point))]
        
        self.results = []
        for e in data:
            if "lon" in e and "lat" in e and e["lon"] is not None and e["lat"] is not None:
                reordered_coords = Point(e["lon"],e["lat"])
                if any(poly["geometry"].apply(lambda x: x.contains(reordered_coords))):
                    self.results.append(e)
        return poly


    @abstractmethod
    def no_duplicates(self,dict_list,new_dicts,key):
        for e in new_dicts:
            #if the item to add doesnt have the relevant key, put it into results list
            if key not in e:
                dict_list.append(e)
                continue
            #if its false that the new item has the same value as an already collected dict, add new item 
            if not any(e.get(key) == d.get(key) for d in dict_list):
                dict_list.append(e)
        return dict_list

    @abstractmethod
    def popupStr_generator(self, df_row):
        pass

    @abstractmethod
    def tooltip_generator(self,df_row):
        pass

    @abstractmethod
    def gestalte_map(self):
        pass

    @abstractmethod
    def orderly_output(self):
        pass



## OSM

In [227]:
class OSM_queryer(DBHandlers):

    def connect_db(self,Client=MongoClient,client_str='mongodb://localhost:27017', db_str='webscraping_dataLabKiel', coll_str='osm_pois'):
        return super().connect_db(Client,client_str,db_str,coll_str)


    def design_query_dict(self,input):
        query_terms = []

        if "what" in input.keys():
            for val in input["what"]:
                query_terms.append({"name":{ "$regex": rf"^{val}", "$options": "i" }})
                query_terms.append({"amenity": { "$regex": rf"^{val}", "$options": "i" }})

        if "all" in input.keys():
            query_terms = super().design_query_dict()

        #for returning nothing
        if query_terms == []:
            query_terms.append({"_id":"thisisanimpossibleid"}) #so that if nothing is entered nothing will be returned instead of everything
                

        self.query_dict = {"$or":query_terms}


    def retrieve_data(self, **kwargs):
        return super().retrieve_data()


    def check_click_radius(self, click_point, radius, data):
        return super().check_click_radius(click_point, radius, data)
    

    def no_duplicates(self, dict_list, new_dicts, key):
        return super().no_duplicates(dict_list, new_dicts, key)


    def popupStr_generator(self, df_row):
        try:
            name = f"<b>{df_row['name']}</b><br><br>"
            amenity = f"<i>amenity:</i>: {df_row['amenity']}"
            pps = name + amenity
            return pps
        except:
            pass

    def tooltip_generator(self,row):
        tts = ""
        try:
            tts = row['name']
            return tts
        except:
            pass
        return tts

    def gestalte_map(self,map,data):
        poi_df = pd.DataFrame(data) 
        try:    
            poi_df["lat"] = pd.to_numeric(poi_df["lat"])
            poi_df["lon"] = pd.to_numeric(poi_df["lon"])
            #poi_df.dropna(subset=["lat","lon"],inplace=True)

            for _, row in poi_df.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="gray")
                ).add_to(map)
            return map
        except:
            #print("no map available due to e.g. naming errors")
            return map
        
    def display_output(self,map,data):
        self.gestalte_map(map=map,data=data)

    def click_polygon(self, click_point, data):
        return super().click_polygon(click_point, data)
    
    def plot_polygon(self,polygons,map):
        #add county area
        gdf = gpd.GeoDataFrame(polygons, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        for _,row in gdf.iterrows():
            sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
            geo_j = sim_geo.to_json()
            geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "lightgray"})
            folium.Popup(row["name"]).add_to(geo_j)
            geo_j.add_to(map)

        #add rent markers
        self.display_output(map=map,data=self.results)

        return map


    def orderly_output(self, data):
        output = []
        for df_row in data:
            doc = {"name":df_row['name'], 
                   "amenity":df_row['amenity']}
            output.append(doc)
        return output 
            

## YELLOW PAGES

In [228]:
class YP_queryer(DBHandlers):

    def connect_db(self,Client=MongoClient,client_str='mongodb://localhost:27017', db_str='sh_data_collection', coll_str='yp_kiel'):
        return super().connect_db(Client,client_str,db_str,coll_str)

    def design_query_dict(self, input):
        query_terms = []

        if "what" in input.keys():
                for val in input["what"]:
                        query_terms.append({"name":{ "$regex": rf"^{val}", "$options": "i" }})
                        query_terms.append({"keywords":{ "$regex": rf"^{val}", "$options": "i" }})

        if "all" in input.keys():
              query_terms = super().design_query_dict()

        if query_terms == []:
                query_terms.append({"_id":"thisisanimpossibleid"}) #so that if nothing is entered nothing will be returned instead of everything
                
        self.query_dict = {"$or":query_terms}
    
    def retrieve_data(self):
        self.data = [elem for elem in self.collection.find(self.query_dict)]


    #add this!!!
    def is_open(self):
        #self.data if time.now is within opening hours would be cool!
        pass


    def check_click_radius(self, click_point, radius, data):
         return super().check_click_radius(click_point, radius, data)
    
    
    def no_duplicates(self, dict_list, new_dicts, key):
         return super().no_duplicates(dict_list, new_dicts, key)


    # PLOTS

    def popupStr_generator(self, df_row):
        try:
            name = f"<b>{df_row['name']}</b><br><br>"
            link = f"<i>link:</i>: <a href='{df_row['sameAs']}'>{df_row['sameAs']}</a><br>"
            tel = f"<i>tel.:</i> {df_row['telephone']}<br>"
            address = df_row["address"]
            try:
                address = address["streetAddress"] + ", " + address["postalCode"] + ", " + address["addressLocality"]
            except:
                pass
            addr = f"<i>address:</i> {address}<br>"
            pps = name + link + tel + addr 
            return pps
        except:
            pass

    def tooltip_generator(self,row):
        tts = ""
        try:
            tts = row['name']
            return tts
        except:
            pass
        return tts

    def gestalte_map(self,map,data):
        poi_df = pd.DataFrame(data) 
        try:    
            poi_df["lat"] = pd.to_numeric(poi_df["lat"])
            poi_df["lon"] = pd.to_numeric(poi_df["lon"])
            #poi_df.dropna(subset=["lat","lon"],inplace=True)

            for _, row in poi_df.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="beige")
                ).add_to(map)
            return map
        except:
            #print("no map available due to e.g. naming errors")
            return map
            
    def display_output(self,map,data):
        self.gestalte_map(map=map,data=data) 

    def click_polygon(self, click_point, data):
         return super().click_polygon(click_point, data)

    def plot_polygon(self,polygons,map):
        #add county area
        gdf = gpd.GeoDataFrame(polygons, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        for _,row in gdf.iterrows():
            sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
            geo_j = sim_geo.to_json()
            geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "lightgrey"})
            folium.Popup(row["name"]).add_to(geo_j)
            geo_j.add_to(map)

        #add rent markers
        self.display_output(map=map,data=self.results)

        return map

    def orderly_output(self, data):
        output = []
        for df_row in data:
            address = df_row["address"]
            try:
                nice_address = address["streetAddress"] + ", " + address["postalCode"] + ", " + address["addressLocality"]
                address = f"<i>address:</i> {nice_address}<br>"
            except:
                pass

            doc = {"name":df_row['name'], 
                   "tel":df_row['telephone'],
                   "address":address,
                   "open":df_row["openingHours"]}
            output.append(doc)
        return output 
    
    

## RENT

In [229]:
class RENT_queryer(DBHandlers):

    def connect_db(self,Client=MongoClient,client_str='mongodb://localhost:27017', db_str='webscraping_dataLabKiel', coll_str='avg_rent'):
        return super().connect_db(Client,client_str,db_str,coll_str)

    def design_query_dict(self, input):
        query_terms = []

        if "rent" in input.keys():
                #query_terms = super().design_query_dict() #get everything
                query_terms = [{"collected":"True"}]

        if "all" in input.keys():
              #query_terms = super().design_query_dict()
              query_terms = [{"collected":"True"}]

        if query_terms == []:
                query_terms.append({"_id":"thisisanimpossibleid"}) #so that if nothing is entered nothing will be returned instead of everything
                
        self.query_dict = {"$or":query_terms}
    

    def retrieve_data(self):
        self.data = [elem for elem in self.collection.find(self.query_dict)]

    
    def check_click_radius(self, click_point, radius, data):
          return super().check_click_radius(click_point, radius, data)


    def no_duplicates(self, dict_list, new_dicts, key):
        return super().no_duplicates(dict_list, new_dicts, key)


    def tooltip_generator(self,row):
        tts = ""
        try: 
            tts = f"{row["average_rent"]}€ m²"
            return tts
        except:
            pass
        return tts

    def popupStr_generator(self,df_row):
        try:
            if df_row["Landkreis"] != []:
                name = f"<b>{df_row['Landkreis']}</b><br><br>"
            else:
                name = f"<b>{df_row['Stadt']}</b><br><br>"
            plz = f"<i>postcode:</i> {df_row['PLZ']}<br>"
            rent = f"<i>rent:</i> {df_row['average_rent']}€ per m²"
            pps = name + plz + rent
            return pps
        except:
            pass

        #so make sure data has no duplicates and all the naming is the same
    def gestalte_map(self,map,data):
        poi_df = pd.DataFrame(data) #compile business sample into df
        try:    
            median_rent = poi_df.average_rent.mean()
            poi_df["rent_ratio"] = poi_df.average_rent.apply(lambda x: "high" if x > median_rent else "low")
            high_rents = poi_df[poi_df["rent_ratio"]=="high"]
            for _, row in high_rents.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="lightred")
                ).add_to(map)
            low_rents = poi_df[poi_df["rent_ratio"]=="low"]
            for _, row in low_rents.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="lightblue")
                ).add_to(map)
            return map
        except:
            #print("no map available due to e.g. naming errors")
            return map

            
    def display_output(self,map,data):
        self.gestalte_map(map=map,data=data) #z is level of zoom

    def click_polygon(self, click_point, data):
        return super().click_polygon(click_point, data)
    
    def plot_polygon(self,polygons,map):
        #add county area
        gdf = gpd.GeoDataFrame(polygons, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        for _,row in gdf.iterrows():
            sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
            geo_j = sim_geo.to_json()
            geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "lightgray"})
            folium.Popup(row["name"]).add_to(geo_j)
            geo_j.add_to(map)

        #add rent markers
        self.display_output(map=map,data=self.results)

        return map
        


    def orderly_output(self, data):
        output = []
        for df_row in data:
            doc = {"county":df_row["Landkreis"],
                   "city":df_row['Stadt'],
                   "postcode":df_row['PLZ'],
                   "avgerage rent":df_row['average_rent']}
            output.append(doc)
        return output 
    
    

## EVENTS

In [230]:
class EVENT_queryer(DBHandlers):

    def connect_db(self,Client=MongoClient,client_str='mongodb://localhost:27017', db_str='webscraping_dataLabKiel', coll_str='event_data'):
        return super().connect_db(Client,client_str,db_str,coll_str)

    def design_query_dict(self, input):
        query_terms = []

        if "what" in input.keys():
            for val in input["what"]:
                query_terms.append({"title":{ "$regex": rf"^{val}", "$options": "i" }})
                query_terms.append({"categories":{ "$regex": rf"^{val}", "$options": "i" }})

        if "event" in input.keys():
            if input["event"] == True:
                query_terms = super().design_query_dict()
            #else:
                #query_terms = query_terms = super().design_query_dict()
                #check if collected events are at the relevant time or sth

        if "all" in input.keys():
            query_terms = super().design_query_dict()

        if query_terms == []:
            query_terms.append({"_id":"thisisanimpossibleid"}) #so that if nothing is entered nothing will be returned instead of everything
                
        self.query_dict = {"$or":query_terms}
    

    def retrieve_data(self):
        self.data = [elem for elem in self.collection.find(self.query_dict)]

    
    def check_click_radius(self, click_point, radius, data):
        return super().check_click_radius(click_point, radius, data)


    def no_duplicates(self, dict_list, new_dicts, key):
        return super().no_duplicates(dict_list, new_dicts, key)

    # PLOTS
    def popupStr_generator(self, df_row):
        try:
            name = f"<b>{df_row['title']}</b><br><br>"
            link = f"<i>link:</i>: <a href='{df_row['source.url']}'>{df_row['source.url']}</a><br>"
            tel = f"<i>tel.:</i> {df_row['phone']}<br>"
            nice_addr = df_row["street"] + ", " + df_row["city"] + ", " + df_row["zip"]
            addr = f"<i>address:</i> {nice_addr}<br>"
            event_type = df_row['categories']
            separator = ", " 
            event_cat = separator.join(event_type)
            cat = f"<i>event type:</i> {event_cat}<br>"
            pps = name + link + tel + cat + addr 
            return pps
        except:
            pass

    def tooltip_generator(self,row):
        tts = ""
        try:
            tts = row['title']
            return tts
        except:
            pass
        return tts

    def gestalte_map(self,map,data):
        poi_df = pd.DataFrame(data) 
        try:    
            poi_df["lat"] = pd.to_numeric(poi_df["lat"])
            poi_df["lon"] = pd.to_numeric(poi_df["lon"])
            #poi_df.dropna(subset=["lat","lon"],inplace=True)

            for _, row in poi_df.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="purple")
                ).add_to(map)
            return map
        except:
            #print("no map available due to e.g. naming errors")
            return map
            
    def display_output(self,map,data):
        self.gestalte_map(map=map,data=data) 


    def click_polygon(self, click_point, data):
        return super().click_polygon(click_point, data)
    
    def plot_polygon(self,polygons,map):
        #add county area
        gdf = gpd.GeoDataFrame(polygons, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        for _,row in gdf.iterrows():
            sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
            geo_j = sim_geo.to_json()
            geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "lightgray"})
            folium.Popup(row["name"]).add_to(geo_j)
            geo_j.add_to(map)

        #add rent markers
        self.display_output(map=map,data=self.results)

        return map

    def orderly_output(self, data):
        output = []
        for df_row in data:
            nice_addr = df_row["street"] + ", " + df_row["city"] + ", " + df_row["zip"]
            doc = {"name":df_row["name"],
                   "tel":df_row['phone'],
                   "address":nice_addr,
                   "time":df_row['timeIntervals']}
            output.append(doc)
        return output 
    

## NATURAL AREAS

In [231]:
class DIGITIZEDPLANET_queryer(DBHandlers):


    #CONNECT TO DB - COLLECTION
    def connect_db(self,Client=MongoClient,client_str='mongodb://localhost:27017', db_str='webscraping_dataLabKiel', coll_str='digitized_planet_v2'):
        return super().connect_db(Client,client_str,db_str,coll_str)

    #GET QUERY DICTIONARY FOR SEARCH
    def design_query_dict(self, input):
        query_terms = []

        if "what" in input.keys():
            for e in input["what"]:
                if e in ["nature","Natur","natural area","protected area","naturschutzgebiet","park"]:
                    query_terms = super().design_query_dict()

        if "all" in input.keys():
            query_terms = super().design_query_dict()

        if query_terms == []:
            query_terms.append({"_id":"thisisanimpossibleid"}) #so that if nothing is entered nothing will be returned instead of everything
                
        self.query_dict = {"$or":query_terms}
    
    #RETRIEVE DATA BASED ON QUERY DICT
    def retrieve_data(self):
        self.data = [elem for elem in self.collection.find(self.query_dict)]

    
    #FILTER DATA BASED ON`RADIUS`
    def check_click_radius(self, click_point, radius, data):
        return super().check_click_radius(click_point, radius, data)
    

    def no_duplicates(self, dict_list, new_dicts, key):
        return super().no_duplicates(dict_list, new_dicts, key)


    # PLOTS
    def popupStr_generator(self, df_row):
        try:
            name = f"<b>{df_row['name']}</b><br><br>"
            area = f"<i>area in m²:</i> {df_row['area']}<br>"
            geom_s = f"<i>source:</i> {df_row['geometry_source']}<br>"
            pps = name + area + geom_s
            return pps
        except:
            pass

    def tooltip_generator(self,row):
        tts = ""
        try:
            tts = row['name']
            return tts
        except:
            pass
        return tts

    def gestalte_map(self,map,data):
        poi_df = pd.DataFrame(data) 
        try:    
            for _, row in poi_df.iterrows():
                folium.Marker(
                    location=[row['lat'], row['lon']],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="darkblue")
                ).add_to(map)
            return map
        except:
            #print("no map available due to e.g. naming errors")
            return map
                    

    def click_polygon(self, click_point, data):
        return super().click_polygon(click_point, data)

    def plot_area_polygon(self,map,data):
        try:
            poi_df = pd.DataFrame(data)
            poi_df["geometry"] = poi_df['geometry'].apply(shape)
            gdf = gpd.GeoDataFrame(poi_df, geometry='geometry')
            gdf.set_crs(epsg=4326, inplace=True)
            for _,row in gdf.iterrows():
                sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
                geo_j = sim_geo.to_json()
                geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "darkblue"})
                folium.Popup(self.popupStr_generator(row)).add_to(geo_j)
                geo_j.add_to(map)
            
            gdf = gdf.to_crs(epsg=2263)
            gdf["centroid"] = gdf.centroid.to_crs(epsg=4326)
            for _, row in gdf.iterrows():
                lat = row["centroid"].y
                lon = row["centroid"].x
                folium.Marker(
                    location=[lat, lon],
                    popup=self.popupStr_generator(row),
                    tooltip=self.tooltip_generator(row),
                    icon=folium.Icon(color="darkblue")
                ).add_to(map)
            return map
        except:
            return map

    def plot_polygon(self,polygons,map):
        #add county area
        gdf = gpd.GeoDataFrame(polygons, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        for _,row in gdf.iterrows():
            sim_geo = gpd.GeoSeries(row["geometry"]).simplify(tolerance=0.001)
            geo_j = sim_geo.to_json()
            geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "lightgray"})
            folium.Popup(row["name"]).add_to(geo_j)
            geo_j.add_to(map)

        #add areas & markers
        self.plot_area_polygon(map=map,data=self.results)

        return map
    
    def display_output(self,map,data):
        self.plot_area_polygon(map=map,data=data) 
        #bzw self.gestalte_map(map=map,data=data) 


    def orderly_output(self, data):
        output = []
        for df_row in data:
            nice_addr = df_row["street"] + ", " + df_row["city"] + ", " + df_row["zip"]
            doc = {"name":df_row["name"],
                   "area":df_row['area'],
                   "source":df_row['geometry_source']}
            output.append(doc)
        return output 
    
    

# 2 output manager

## OUTPUT MANAGER

In [232]:
class OutputManager():

    #make function which integrated but doesnt return data from all data bases!!!
    def disambiguate(self,queriers):    
        threshold = 80 #of similarity, selected by just trying it out
        temp = []

        #go through the returned (no duplicate) result list of every querier
        for q in queriers:
            querier_results = q.results #get data for the querier
            q.results = [] #empty slate for querier.results

            #as long as theres still names to check
            while querier_results != []:
                elem = querier_results.pop()
                name = elem["name"]
                possible_double = [k for k in temp if fuzz.token_set_ratio(name,k.get("name")) >= threshold] #retrieve similar names from temp result list
                if len(possible_double) >= 1: #if some similar name(s) found
                    elem_loc = (elem["lat"],elem["lon"]) #get coordinates of original point which we want to add
                    the_same = False #assume they are not the same location
                    for pos_doub in possible_double:
                        pos_doub_loc = (pos_doub["lat"],pos_doub["lon"]) #get coordinates from matches from temporary result list
                        if geodesic(elem_loc,pos_doub_loc).km < 0.1: # if it turns out they are in same locations, so if they are more less 100m apart
                            the_same = True # flag: okay so actually we found a match, so something with a similar name less than 100 m away
                    if the_same==False: # only if we didnt find anything simlar at all
                        temp.append(elem) #append to temporary checking list
                        q.results.append(elem) #add to collection specific result bucket so we can pass it again to database handler for output
                else: #if we dont have anything similar (yet) just stick it in there
                    temp.append(elem)
                    q.results.append(elem)


    #return (filtered) search results 
    def initiate_search(self,Querier,query):
        q = Querier()
        q.connect_db()
        q.design_query_dict(query)
        q.retrieve_data()
        return q
    
    def unbounded_data(self,q,map,key):
        results = []
        q.results = q.no_duplicates(results,q.data,key)
        q.display_output(map=self.map,data=q.results)
        return map, q.results
    
    #get data within a certain radius
    def radius_data(self,q,map,point,radius,key):
        results = []
        q.check_click_radius(click_point=point,radius=radius, data=q.data) #yields q.results
        q.no_duplicates(results,q.results,key) #so also q.results
        q.display_output(map=self.map,data=q.results)
        return map, results

    #get data within polygon (== county)
    def polygon_data(self,q,map,point,key):
        results = []
        poly = q.click_polygon(click_point=point,data=q.data)
        q.no_duplicates(results,q.results,key)
        q.plot_polygon(poly,map)
        return map, results


    # area is radius (e.g. 0.5) or "polygon"
    def process_query_click(self, queryer_class, query, area, point, map_object, key):
        query_instance = self.initiate_search(queryer_class, query=query)
        if area == "polygon":
            return self.polygon_data(query_instance, map_object, point, key)
        elif isinstance(area, (int, float)): #is radius essentially
            return self.radius_data(query_instance, map_object, point, area, key)
        else:
            return self.unbounded_data(query_instance, map_object, key)


    def perform_search(self,**kwargs):
        
        # click + filter 
        # click + empty filter: all 
        # no click + filter (so not area specific, neither radius nor polygon!!!) -> area = None

        #if "query" in kwargs and "point" in kwargs and "area" in kwargs:

        # SET UP: get arguments
        if "query" in kwargs and "point" in kwargs and "area" in kwargs:
            query, point, area = kwargs["query"], kwargs["point"], kwargs["area"]
            self.map = folium.Map(location=point,zoom_start=8) #one output object of output manager
        elif "query" in kwargs:
            query, point, area = kwargs["query"], None, None
            self.map = folium.Map(location=(54.2194,9.6961),zoom_start=6) #one output object of output manager

        # output scaffolding
        self.printable_output = [] #one output object of output manager
        
        # INSTANTIATE QUERIERS; GET DATA; RETURN MAP AND RESULT LIST 
        self.map, self.osm_data = self.process_query_click(OSM_queryer, query=query, area=area, point=point, map_object=self.map, key="")
        self.map, self.yp_data = self.process_query_click(YP_queryer, query=query, area=area, point=point, map_object=self.map, key="name")
        self.map, self.event_data = self.process_query_click(EVENT_queryer, query=query, area=area, point=point, map_object=self.map, key="name")
        self.map, self.rent_data = self.process_query_click(RENT_queryer, query=query, area=area, point=point, map_object=self.map, key="PLZ")
        self.map, self.area_data = self.process_query_click(DIGITIZEDPLANET_queryer, query=query, area=area, point=point, map_object=self.map, key="name")

        # COMBINE ALL DATA
        self.data = self.yp_data + self.osm_data + self.event_data + self.rent_data + self.area_data

        # PRINTABLE OUTPUT
        #self.nice_data = 

        # IF 
        # wed need to only get data from each querier, do integration here, and then do the map ... maybe later
        # so we get non duplicate data from handlers, create a temporary list which would do the integration, but store the actual data we wanna use in collection specific
        # buckets, and then call on that data the plots again! we have all those functions, would just need to detangle the current function

        

            

# 3 experiments handlers

--- 

## Try it out: what can the datahandlers do?

---

### osm 1

In [233]:
click_point = (54.3233,10.1228)
radius = 5
i = {"what":["pub"]}

cosm = OSM_queryer()
cosm.connect_db()
cosm.collection

cosm.design_query_dict(i)
print(cosm.query_dict)

cosm.retrieve_data()
results = cosm.data
print(f"len of result pre range filter: {len(results)}")

cosm.check_click_radius(click_point=click_point,radius=radius,data=results)
results = cosm.results
print(f"len of result post range filter: {len(results)}")

results_2 = cosm.no_duplicates([],results,key="name")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
cosm.display_output(map=my_map,data=results)
#my_map

{'$or': [{'name': {'$regex': '^pub', '$options': 'i'}}, {'amenity': {'$regex': '^pub', '$options': 'i'}}]}
len of result pre range filter: 611
len of result post range filter: 58
len of result duplicate filter: 58


### osm 2

In [234]:
click_point = (54.43700, 11.12160)
i = {"what":["pub"]}

c = OSM_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

poly = c.click_polygon(click_point=click_point,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
my_map = c.plot_polygon(poly,map=my_map)
#my_map

{'$or': [{'name': {'$regex': '^pub', '$options': 'i'}}, {'amenity': {'$regex': '^pub', '$options': 'i'}}]}
len of result pre range filter: 611
len of result post range filter: 41


### yellow pages 1

In [235]:
click_point = (54.3233,10.1228)
radius = 1
i = {"what":["friseur"]}


c = YP_queryer()
#c.connect_db(db_str="webscraping_dataLabKiel",coll_str="yellow_pages")
c.connect_db()
print(c.collection)

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

c.check_click_radius(click_point=click_point,radius=radius,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="name")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
c.display_output(map=my_map,data=results)
#cosm.display_output(map=my_map,data=results_2)
my_map

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'sh_data_collection'), 'yp_kiel')
{'$or': [{'name': {'$regex': '^friseur', '$options': 'i'}}, {'keywords': {'$regex': '^friseur', '$options': 'i'}}]}
len of result pre range filter: 41
len of result post range filter: 12
len of result duplicate filter: 9


In [236]:
len(c.results)

12

In [237]:
len(c.data)

41

### yellow pages 2

In [238]:
click_point = (54.3233,10.1228)
i = {"what":["friseur"]}

c = YP_queryer()
#c.connect_db(db_str="webscraping_dataLabKiel",coll_str="yellow_pages")
c.connect_db()
print(c.collection)

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

poly = c.click_polygon(click_point=click_point,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
my_map = c.plot_polygon(poly,map=my_map)
#my_map

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'sh_data_collection'), 'yp_kiel')
{'$or': [{'name': {'$regex': '^friseur', '$options': 'i'}}, {'keywords': {'$regex': '^friseur', '$options': 'i'}}]}
len of result pre range filter: 41
len of result post range filter: 33


### rent

In [239]:
click_point = (54.323334, 10.139444)
radius = 25
i = {"rent":True}

c = RENT_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

c.check_click_radius(click_point=click_point,radius=radius,data=results)
results = c.data
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="PLZ")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
my_map = c.display_output(map=my_map,data=results)
# my_map

{'$or': [{'collected': 'True'}]}
len of result pre range filter: 386
len of result post range filter: 386
len of result duplicate filter: 381


### rent 2

In [240]:
coordinates = [
    (54.51450, 8.86610),  # Nordfriesland
    (54.17230, 9.03890),  # Dithmarschen
    (54.36980, 9.73560),  # Rendsburg-Eckernförde
    (54.71020, 9.41230),  # Schleswig-Flensburg
    (54.20840, 10.41490), # Plön
    (54.17650, 10.93020), # Ostholstein
    (53.67620, 9.66230),  # Pinneberg
    (53.91540, 10.25580), # Segeberg
    (53.70010, 10.39460)  # Stormarn
]

In [241]:
click_point =  (54.17230, 9.03890)
i = {"rent":True}

c = RENT_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

poly = c.click_polygon(click_point=click_point,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="PLZ")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
#c.plot_polygon(poly,map=my_map,data=results)

{'$or': [{'collected': 'True'}]}
len of result pre range filter: 386
len of result post range filter: 17
len of result duplicate filter: 17


### events 1

In [242]:
click_point = (54.323334, 10.139444)
radius = 10
i = {"what":["kultur","kunst","ausstellung"]}

c = EVENT_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

c.check_click_radius(click_point=click_point,radius=radius,data=results)
results = c.data
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="title")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
c.display_output(map=my_map,data=results)
#my_map

{'$or': [{'title': {'$regex': '^kultur', '$options': 'i'}}, {'categories': {'$regex': '^kultur', '$options': 'i'}}, {'title': {'$regex': '^kunst', '$options': 'i'}}, {'categories': {'$regex': '^kunst', '$options': 'i'}}, {'title': {'$regex': '^ausstellung', '$options': 'i'}}, {'categories': {'$regex': '^ausstellung', '$options': 'i'}}]}
len of result pre range filter: 1625
len of result post range filter: 1625
len of result duplicate filter: 330


### events 2

In [243]:
click_point = (54.323334, 10.139444)
i = {"what":["kunst","ausstellung"]}

c = EVENT_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

poly = c.click_polygon(click_point=click_point,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="title")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
#c.plot_polygon(poly,map=my_map)

{'$or': [{'title': {'$regex': '^kunst', '$options': 'i'}}, {'categories': {'$regex': '^kunst', '$options': 'i'}}, {'title': {'$regex': '^ausstellung', '$options': 'i'}}, {'categories': {'$regex': '^ausstellung', '$options': 'i'}}]}
len of result pre range filter: 482
len of result post range filter: 349
len of result duplicate filter: 16


### natural areas 1

In [244]:
click_point = (54.323334, 10.139444)
radius = 19
i = {"all":True}

c = DIGITIZEDPLANET_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

c.check_click_radius(click_point=click_point,radius=radius,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="name")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
c.display_output(map=my_map,data=results)
#my_map

{'$or': [{}, {}]}
len of result pre range filter: 17
len of result post range filter: 10
len of result duplicate filter: 10


### natural areas 2 & 3

In [245]:
click_point = (54.323334, 10.139444)
radius = 50
i = {"what":["protected area"]}

c = DIGITIZEDPLANET_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

c.check_click_radius(click_point=click_point,radius=radius,data=results)
results = c.results
print(f"len of result post range filter: {len(results)}")

results = c.no_duplicates([],results,key="name")
print(f"len of result duplicate filter: {len(results)}")

zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
my_map = c.plot_area_polygon(map=my_map,data=results)
#my_map

{'$or': [{}, {}]}
len of result pre range filter: 17
len of result post range filter: 17
len of result duplicate filter: 17


In [246]:
click_point = (54.36980, 9.73560)
i = {"what":["protected area"]}

c = DIGITIZEDPLANET_queryer()
c.connect_db()
c.collection

c.design_query_dict(i)
print(c.query_dict)

c.retrieve_data()
results = c.data
print(f"len of result pre range filter: {len(results)}")

poly = c.click_polygon(click_point=click_point,data=results)
results = c.results
print(f"len of result post polygon filter: {len(results)}")


zoom = 8
my_map = folium.Map(location=click_point,zoom_start=zoom)
my_map = c.plot_polygon(map=my_map,polygons=poly)
#my_map

{'$or': [{}, {}]}
len of result pre range filter: 17
len of result post polygon filter: 16


----

# 4 experiments output manager

this returns osm and yp  and event data, <br> - fix the regex expressions, cause 'asia' returns 19 results, but 'asian' nothing

In [254]:
click_point = (54.323334, 10.139444)
radius = 5
i = {"what":["asia","karaoke","music"]} 

om = OutputManager()
om.perform_search(query=i,point=click_point,area=radius)
print(len(om.data))
om.map

25


show rent with area

In [248]:
click_point = (54.17230, 9.03890)
radius = 5
i = {"rent":True} 

om = OutputManager()
om.perform_search(query=i,point=click_point,area="polygon")
print(len(om.data))
om.map

17


does the all flag work?

In [249]:
click_point = (53.67620, 9.66230)
#radius = 5
i = {"all":True}

om = OutputManager()
om.perform_search(query=i,point=click_point,area="polygon")
print(f"we found {len(om.data)} matches for your query!") #but they are double still
#om.map

we found 5467 matches for your query!


just filter no click, does it work?

In [250]:
i = {"what":["ferry", "fähre"]} #fähre adds like 10 search results

om = OutputManager()
om.perform_search(query=i)
print(f"we found {len(om.data)} matches for your query!") #but they are double still
om.map

we found 167 matches for your query!


so look for bestattungen with parking. in one region / in one radius circle!
- and no event? just look for event true and then consider yourself?

In [251]:
click_point = (54.323334, 10.139444)
radius = 1
i = {"what":["bestattung","bestattungen","undertaker","parkplatz","parking","parken","fussball"]} #"event"=True

om = OutputManager()
om.perform_search(query=i,point=click_point,area=radius)
print(f"we found {len(om.data)} matches for your query!") #but they are double still
om.map

we found 29 matches for your query!


just get like something of everything around kiel

In [252]:
click_point = (54.323334, 10.139444)
radius = 20
i = {"what":["nature","outdoor","ramen","weihnachten"]} 

om = OutputManager()
om.perform_search(query=i,point=click_point,area=radius)
print(f"we found {len(om.data)} matches for your query!") #but they are double still
om.map

we found 25 matches for your query!


now get something of everything around plön? (no events and no nature cause no data for this location)

In [253]:
click_point = (54.20840, 10.41490) #plön
i = {"what":["segeln","sailing","wassersport","burger"]} 

om = OutputManager()
om.perform_search(query=i,point=click_point,area="polygon")
print(f"we found {len(om.data)} matches for your query!") #but they are double still
om.map

we found 7 matches for your query!


---