In [1]:
import requests
import pandas as pd
import geopandas as gpd

import sys

sys.path.append("../")

# from helper_functions import *
# from loading_data import *
from Code.helper_functions import initial_loading
from Code.algorithms import random_start_hillclimber

POSTGRES password?WelkomCorne!


### Locations per cluster
The first step is to slightly modify the loading of the clusters

In [None]:
def load_api_data(prnt=False):
    """
    This function loads in information on the current composition of container
    clusters in Amsterdam. It uses the API from data.amsterdam.nl (available at
    'https://api.data.amsterdam.nl/vsd/afvalclusters'). It returns the coordinates,
    amount and volume of different fractions and the address of the clusters. As
    a check, it is determined whether or not the cluster is currently active.
    Returns:
    - df containing coordinates, dict-like amount and volume per fraction and
    address.
    """
    x_coordinates = []
    y_coordinates = []
    aantal = []
    volumes = []
    adresses = []
    buurt = []

    link = 'https://api.data.amsterdam.nl/vsd/afvalclusters'

    while link != None: #This is the case on the last page of the API
        if prnt: # Can be used for some kind of monitoring of progres
            print(link)
        response = requests.get(link)
        output = response.json()
        for result in output['results']:
            if result['cluster_datum_einde_cluster'] == None: #Als het cluster nog actief is
                x_coordinates.append(str(result['cluster_geometrie']['coordinates'][0]))
                y_coordinates.append(str(result['cluster_geometrie']['coordinates'][1]))
                aantal.append(result['cluster_fractie_aantal'])
                volumes.append(result['cluster_fractie_volume'])
                adresses.append(result['bag_adres_openbare_ruimte_naam'])
                buurt.append(result['gbd_buurt_code'])
        try:
            link = output['_links']['next']['href'] #Retrieve link for next page
        except:
            link = None #True for last page of API

    df_clusters = pd.DataFrame([x_coordinates, y_coordinates, aantal, volumes, adresses, buurt]).T
    df_clusters = df_clusters.rename(columns={0: 'cluster_x', 1:'cluster_y', 2:'aantal_per_fractie', 3:'volume_per_fractie', 4: 'street_name', 5:'buurt'})
    # Transform coordinates of clusters to ints, as this helps easing join
    df_clusters['cluster_x'] = df_clusters['cluster_x'].astype('float').round(0).astype('int')
    df_clusters['cluster_y'] = df_clusters['cluster_y'].astype('float').round(0).astype('int')
    df_clusters['wijk'] = df_clusters['buurt'].str[:3]
    df_clusters['stadsdeel'] = df_clusters['buurt'].str[0]
    return df_clusters

In [None]:
def load_geodata_containers(subsectie=None):
    """
    This function loads in all polygons representing areas in the city of Amsterdam
    where general waste needs to be brought to a container. This is different
    from the alternative where general waste is collected from the sidewalk. This
    is needed to filter the address POI's to relevant POI's for optimization.
    Subsectie is optional parameter to filter on specific stadsdelen. This can be used
    for partial optimization.

    Returns:
    - List of polygons making up the area of centralized garbage collection
    """

    source = gpd.read_file('../data/Inzameling_huisvuil_100220.shp')
    source = source[source['aanbiedwij'] == 'Breng uw restafval  naar een container voor restafval.']
    if subsectie:
        source = source[source['sdcode'] == subsectie]
    return list(source.geometry)

In [None]:
def get_db_afvalcluster_info():
    """
    Function that modifies loads in data on the garbage clusters from the Postgres
    database and modifies the resulting dataframe in a way that makes it usable
    for future analysis
    Returns:
    - pandas DataFrame containing all information from the database and also the
    added coordinates for the clusters and the type of POI
    """
    polygon_list = load_geodata_containers()
    db_df = get_dataframe("""SELECT *
                             FROM proj_afval_netwerk.afv_rel_nodes_poi
                             """)
    db_df['woning'] = db_df['bk_afv_rel_nodes_poi'].str.split('~')
    db_df['cluster_x'] = db_df['woning'].apply(lambda x: x[0]).astype('float').round(0).astype('int')
    db_df['cluster_y'] = db_df['woning'].apply(lambda x: x[1]).astype('float').round(0).astype('int')
    db_df['type'] = db_df['woning'].apply(lambda x: x[2])
    db_df['bag'] = db_df['woning'].apply(lambda x: x[3])
    print('a')
#     db_df['uses_container'] = db_df.apply(lambda row: address_in_service_area(row['cluster_x'], row['cluster_y'], polygon_list = polygon_list), axis=1)
    db_df = db_df.drop('woning', axis=1)
    return db_df

In [None]:
def create_all_households(rel_poi_df, subsectie=None):
    """
    Function that creates a dataframe containing all households as rows
    """
    polygon_list = load_geodata_containers(subsectie = subsectie)
    all_households = rel_poi_df[rel_poi_df['type']!='afval_cluster']
    all_households = all_households[['s1_afv_nodes', 'cluster_x', 'cluster_y']]
    print('b')
    all_households['uses_container'] = all_households.apply(lambda row: address_in_service_area(row['cluster_x'], row['cluster_y'], polygon_list=polygon_list), axis=1)
    return all_households

In [None]:
df_clusters = load_api_data_neigborhood()

In [None]:
df_clusters['stadsdeel'].value_counts()
# Stadsdeel Zuid-Oost (T) has 423 clusters. This is to be optimized

In [None]:
rel_poi_df = get_db_afvalcluster_info()

In [None]:
all_households = create_all_households(rel_poi_df, subsectie='T')

In [None]:
joined = join_api_db(rel_poi_df, df_clusters)

In [None]:
joined['rest'], joined['plastic'], joined['papier'], joined['glas'], joined['textiel'], joined['totaal'] = zip(*joined['aantal_per_fractie'].apply(lambda x: containers_per_cluster(x)))

In [None]:
df_afstandn2 = distance_matrix_with_counts()

In [None]:
for i in ['T', 'M', 'N', 'A', 'K', 'E', 'F', 'B']:
    print(i)
    all_households, rel_poi_df, joined, df_afstandn2 = initial_loading(use_count=True, subsectie=i)
    joined_cluster_distance, good_result_rich, aansluitingen, avg_distance, penalties = \
    analyze_candidate_solution(joined, all_households, rel_poi_df, df_afstandn2, clean=True, use_count=True)

In [2]:
all_households, rel_poi_df, joined, df_afstandn2 = initial_loading()

Do you want to use addresses instead of clusters?True
What stadsdeel do you want to make as a subsection (optional parameter)?T
API data loaded
DB relation POIs loaded
Table all households created
API and DB joined
containers per cluster determined


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  verblijfsobjecten['bag'] = verblijfsobjecten['split'].apply(lambda x: x[3]).astype('int64')


distance matrix loaded


In [None]:
hill_df, best_solution = random_start_hillclimber(joined, all_households, rel_poi_df, df_afstandn2)

How many random iterations?50
How many iterations hillclimber?1500
Do you want the results saved(True/False)?True
Do you want to only use a subset of data?True
Do you want to use addresses instead of clusters?True
What parameter to optimize on (score/penalties)?penalties
Average distance is : 249.6004611676881
Penalties are: 308.1859662117452
Average distance is : 256.8217469027376
Penalties are: 328.9011396366502
Average distance is : 263.61695525287126
Penalties are: 332.1797250318917
Average distance is : 252.70698264125934
Penalties are: 319.0010724920824
Average distance is : 258.29419267794844
Penalties are: 320.14774167768053
Average distance is : 268.02888272637875
Penalties are: 326.82923480174406
Average distance is : 264.6703402731018
Penalties are: 316.7889141021934
Average distance is : 257.57205462462105
Penalties are: 330.29514692837006
Average distance is : 251.40989216701954
Penalties are: 318.1099367570236
Average distance is : 250.19696566156182
Penalties are: 330.36

Average distance is : 261.24689914616897
Penalties are: 290.862285404865
290.862285404865 289.87964563895366
Average distance is : 260.8052428008205
Penalties are: 292.9338104590136
292.9338104590136 289.87964563895366
Average distance is : 261.11962935588184
Penalties are: 291.8858155614795
291.8858155614795 289.87964563895366
Average distance is : 260.61808412938416
Penalties are: 288.00171923942014
288.00171923942014 289.87964563895366
Average distance is : 260.9308502599863
Penalties are: 288.11734809230256
288.11734809230256 288.00171923942014
Average distance is : 260.7966867007399
Penalties are: 291.211646242919
291.211646242919 288.00171923942014
Average distance is : 260.8836088880811
Penalties are: 288.64778457180944
288.64778457180944 288.00171923942014
Average distance is : 260.00326172806876
Penalties are: 289.1144558897033
289.1144558897033 288.00171923942014
Average distance is : 262.429480058924
Penalties are: 287.2441583013995
287.2441583013995 288.00171923942014
Avera

Average distance is : 260.89620150955056
Penalties are: 272.3364086762745
272.3364086762745 269.67535981756083
Average distance is : 261.6192560037973
Penalties are: 268.2639067185938
268.2639067185938 269.67535981756083
Average distance is : 261.98476553870705
Penalties are: 268.98884927940685
268.98884927940685 268.2639067185938
Average distance is : 262.7236550278168
Penalties are: 268.07202476257913
268.07202476257913 268.2639067185938
Average distance is : 262.73816835049
Penalties are: 268.50791952682437
268.50791952682437 268.07202476257913
Average distance is : 262.34402423166506
Penalties are: 270.0592597775834
270.0592597775834 268.07202476257913
Average distance is : 262.286061496027
Penalties are: 267.46331516577806
267.46331516577806 268.07202476257913
Average distance is : 261.74173803749403
Penalties are: 265.031275658114
265.031275658114 267.46331516577806
Average distance is : 261.18368618612726
Penalties are: 267.5130627499167
267.5130627499167 265.031275658114
Averag

Average distance is : 265.52946414828114
Penalties are: 253.05337306314559
253.05337306314559 251.23207389203597
Average distance is : 266.25911369569087
Penalties are: 252.3219846092969
252.3219846092969 251.23207389203597
Average distance is : 266.25910244140005
Penalties are: 251.29849148200597
251.29849148200597 251.23207389203597
Average distance is : 264.7146880440711
Penalties are: 254.74142415028325
254.74142415028325 251.23207389203597
Average distance is : 265.3107775167109
Penalties are: 250.07229196517827
250.07229196517827 251.23207389203597
Average distance is : 263.1234484434044
Penalties are: 251.037595718094
251.037595718094 250.07229196517827
Average distance is : 265.32719730744805
Penalties are: 255.1019029073642
255.1019029073642 250.07229196517827
Average distance is : 265.3552122038679
Penalties are: 248.22548731672774
248.22548731672774 250.07229196517827
Average distance is : 265.07134096817794
Penalties are: 250.7861238545485
250.7861238545485 248.225487316727

Average distance is : 265.9432158201433
Penalties are: 238.02654854631788
238.02654854631788 237.52658736671106
Average distance is : 266.86069357024525
Penalties are: 237.95341961012994
237.95341961012994 237.52658736671106
Average distance is : 267.6710458961034
Penalties are: 239.27760563145617
239.27760563145617 237.52658736671106
Average distance is : 266.4695275301598
Penalties are: 240.71450456097966
240.71450456097966 237.52658736671106
Average distance is : 267.4210030259592
Penalties are: 240.514546901033
240.514546901033 237.52658736671106
Average distance is : 268.26609252500236
Penalties are: 240.0750158280573
240.0750158280573 237.52658736671106
Average distance is : 266.92716840982445
Penalties are: 252.52552656574036
252.52552656574036 237.52658736671106
Average distance is : 266.7655955830204
Penalties are: 237.75465690603127
237.75465690603127 237.52658736671106
Average distance is : 265.9774251744606
Penalties are: 236.47593127290904
236.47593127290904 237.5265873667

Average distance is : 268.1256884399552
Penalties are: 233.33487960263247
233.33487960263247 226.42298077724095
Average distance is : 269.7011162123907
Penalties are: 225.71737918610464
225.71737918610464 226.42298077724095
Average distance is : 269.575481931278
Penalties are: 226.961294026991
226.961294026991 225.71737918610464
Average distance is : 270.0689950522538
Penalties are: 232.64131526991
232.64131526991 225.71737918610464
Average distance is : 269.31468349037215
Penalties are: 230.18434271909368
230.18434271909368 225.71737918610464
Average distance is : 271.8122662116866
Penalties are: 227.51146528240588
227.51146528240588 225.71737918610464
Average distance is : 267.38205583729825
Penalties are: 235.88819653865377
235.88819653865377 225.71737918610464
Average distance is : 269.722813436206
Penalties are: 228.50859655114965
228.50859655114965 225.71737918610464
Average distance is : 269.6959327827557
Penalties are: 227.762295464012
227.762295464012 225.71737918610464
Averag

Average distance is : 266.6805850843712
Penalties are: 226.07161823142286
226.07161823142286 223.02880287820724
Average distance is : 269.140548663758
Penalties are: 223.26899400199932
223.26899400199932 223.02880287820724
Average distance is : 268.03898122182045
Penalties are: 229.5796683605465
229.5796683605465 223.02880287820724
Average distance is : 267.35915411204405
Penalties are: 227.55809028240583
227.55809028240583 223.02880287820724
Average distance is : 266.9931520232416
Penalties are: 226.71572380039984
226.71572380039984 223.02880287820724
Average distance is : 268.15102566448576
Penalties are: 227.2807677232589
227.2807677232589 223.02880287820724
Average distance is : 268.907723936327
Penalties are: 228.5833082382816
228.5833082382816 223.02880287820724
Average distance is : 267.72971841278485
Penalties are: 227.18096717760747
227.18096717760747 223.02880287820724
Average distance is : 268.08778339963976
Penalties are: 230.99540201182938
230.99540201182938 223.0288028782

In [None]:
from Code.algorithms import random_shuffling_clusters
from Code.algorithms import best_of_random
from Code.algorithms import hillclimber
from Code.helper_functions import analyze_candidate_solution

In [None]:
i = int(input("How many random iterations?"))
j = int(input("How many iterations hillclimber?"))
to_save = bool(input("Do you want the results saved(True/False)?"))
clean = bool(input("Do you want to only use a subset of data?"))
use_count = bool(input("Do you want to use addresses instead of clusters?"))
parameter = str(input("What parameter to optimize on (score/penalties)?"))
print(i, j, to_save, clean, use_count, parameter)
print(type(clean))


hill_df, best_solution = hillclimber(10, joined, all_households, \
        rel_poi_df, df_afstandn2, clean=clean, use_count=use_count,\
        parameter=parameter, save=to_save)