In [None]:
import geopandas as gpd
import shapely
from getpass import getpass

import sys
import copy
sys.path.append("../Code")

from helper_functions import *
from loading_data import *
from algorithms import *

In [None]:
dfob= get_dataframe("""
                    SELECT bk_votpand_cluster, COUNT(*)
                    FROM proj_afval_netwerk.rel_votpand_cluster_verblijfsobject
                    GROUP BY bk_votpand_cluster
                    """)

In [None]:
dfob['split'] = dfob['bk_votpand_cluster'].str.split('~')
dfob['bag'] = dfob['split'].apply(lambda x: x[0]).astype('int64')
dfob['x'] = dfob['split'].apply(lambda x: x[1]).astype('float').round().astype('int')
dfob['y'] = dfob['split'].apply(lambda x: x[2]).astype('float').round().astype('int')
dfob = dfob.drop(['split'], axis=1)

In [None]:
df_afstandn2 = get_dataframe("""
                                SELECT *
                                FROM proj_afval_netwerk.afv_rel_nodes_poi
                                """)

In [None]:
df_afstandn2['split'] = df_afstandn2['bk_afv_rel_nodes_poi'].str.split('~')
df_afstandn2['x'] = df_afstandn2['split'].apply(lambda x: x[0]).astype('float').round().astype('int')
df_afstandn2['y'] = df_afstandn2['split'].apply(lambda x: x[1]).astype('float').round().astype('int')
df_afstandn2['type'] = df_afstandn2['split'].apply(lambda x: x[2])
verblijfsobjecten = df_afstandn2[df_afstandn2['type'] != 'afval_cluster']
verblijfsobjecten['bag'] = verblijfsobjecten['split'].apply(lambda x: x[3]).astype('int64')


In [None]:
temp = dfob.set_index(['bag', 'x', 'y']).join(verblijfsobjecten.set_index(['bag', 'x', 'y']), how='outer').reset_index()

In [None]:
df_afstandn = get_distance_matrix()

In [None]:
joined= temp.set_index('s1_afv_nodes').join(df_afstandn.set_index('naar_s1_afv_nodes'), how='outer')
joined = joined.reset_index()[['van_s1_afv_nodes', 'index', 'afstand', 'count']].\
         rename(columns={'index':'naar_s1_afv_nodes'}).sort_values(by='afstand').\
         reset_index().drop(['index'],axis=1).dropna()

In [None]:
joined

In [None]:
def distance_matrix_with_counts():
    """
    Function that tries to match table with addresses per poi with 
    information and subsequently with distance matrix to give back
    a distance matrix with the amount of households per addres poi.
    """
    
    dfob= get_dataframe("""
                    SELECT bk_votpand_cluster, COUNT(*)
                    FROM proj_afval_netwerk.rel_votpand_cluster_verblijfsobject
                    GROUP BY bk_votpand_cluster
                    """)
    
    dfob['split'] = dfob['bk_votpand_cluster'].str.split('~')
    dfob['bag'] = dfob['split'].apply(lambda x: x[0]).astype('int64')
    dfob['x'] = dfob['split'].apply(lambda x: x[1]).astype('float').round().astype('int')
    dfob['y'] = dfob['split'].apply(lambda x: x[2]).astype('float').round().astype('int')
    dfob = dfob.drop(['split'], axis=1)
    
    df_afstandn2 = get_dataframe("""
                                SELECT *
                                FROM proj_afval_netwerk.afv_rel_nodes_poi
                                """)
    
    df_afstandn2['split'] = df_afstandn2['bk_afv_rel_nodes_poi'].str.split('~')
    df_afstandn2['x'] = df_afstandn2['split'].apply(lambda x: x[0]).astype('float').round().astype('int')
    df_afstandn2['y'] = df_afstandn2['split'].apply(lambda x: x[1]).astype('float').round().astype('int')
    df_afstandn2['type'] = df_afstandn2['split'].apply(lambda x: x[2])
    verblijfsobjecten = df_afstandn2[df_afstandn2['type'] != 'afval_cluster']
    verblijfsobjecten['bag'] = verblijfsobjecten['split'].apply(lambda x: x[3]).astype('int64')
    
    temp = dfob.set_index(['bag', 'x', 'y']).join(verblijfsobjecten.set_index(['bag', 'x', 'y']), how='outer').reset_index()
    df_afstandn = get_distance_matrix()
    joined= temp.set_index('s1_afv_nodes').join(df_afstandn.set_index('naar_s1_afv_nodes'), how='outer')
    joined = joined.reset_index()[['van_s1_afv_nodes', 'index', 'afstand', 'count']].\
             rename(columns={'index':'naar_s1_afv_nodes'}).sort_values(by='afstand').\
             reset_index().drop(['index'],axis=1).dropna()
    
    return joined

In [None]:
joined = distance_matrix_with_counts()

### Try out

In [None]:
all_households, rel_poi_df, joined, df_afstandn2 = initial_loading(use_count=True)

In [None]:
df_afstandn2

In [None]:
joined_cluster_distance = joined.set_index('s1_afv_nodes').join(df_afstandn2.set_index('van_s1_afv_nodes')).reset_index().rename(columns={'index': 'van_s1_afv_nodes'})

In [None]:
good_result_rich = add_shortest_distances_to_all_households(all_households, joined_cluster_distance, count=True)
good_result_rich = good_result_rich[good_result_rich['uses_container']]
good_result_rich

In [None]:
aansluitingen = create_aansluitingen(good_result_rich, joined_cluster_distance, use_count=True)

In [None]:

def calculate_penalties(good_result, aansluitingen, use_count=False):
    """
    This function calculates all the penalties associated with the candidate
    solution. It does this by calculating the number of times all constraints
    are violated and applies the weighing that is associated with all these
    violations

    Input:
    dataframe good_result containing per adress or adress poi the distance
    to the nearest container for all fractions.
    dataframe aansluitingen containing for all clusters the amount of containers
    per fraction, the amount of people using these containers and the percentage
    of occupancy compared to the norm

    Output:
    The sum of all different penalties as a single float
    """
    
    penalty1 = good_result[good_result['rest_afstand'] > 100]
    penalty2 = good_result[good_result['plastic_afstand'] > 150]
    penalty3 = good_result[good_result['papier_afstand'] > 150]
    penalty4 = good_result[good_result['glas_afstand'] > 150]
    penalty5 = good_result[good_result['textiel_afstand'] > 300]
    penalty6 = aansluitingen[aansluitingen['rest_perc'] > 100]
    penalty7 = aansluitingen[aansluitingen['plastic_perc'] > 100]
    penalty8 = aansluitingen[aansluitingen['papier_perc'] > 100]
    penalty9 = aansluitingen[aansluitingen['glas_perc'] > 100]
    penalty10 = aansluitingen[aansluitingen['textiel_perc'] > 100]
    
    if not use_count:
        penalty1_sum = (penalty1['rest_afstand'].sum() - 100 * penalty1.shape[0])/good_result.shape[0] * 0.35
        penalty2_sum = (penalty2['plastic_afstand'].sum() - 150 * penalty2.shape[0])/good_result.shape[0] * 0.25
        penalty3_sum = (penalty3['papier_afstand'].sum() - 150 * penalty3.shape[0])/good_result.shape[0] * 0.2
        penalty4_sum = (penalty4['glas_afstand'].sum() - 150 * penalty4.shape[0])/good_result.shape[0] * 0.15
        penalty5_sum = (penalty5['textiel_afstand'].sum() - 300 * penalty5.shape[0])/good_result.shape[0] * 0.05
        penalty6_sum = (penalty6['poi_rest'] - (penalty6['rest'] * 100)).sum()/ good_result.shape[0] * 0.35 * 1000
        penalty7_sum = (penalty7['poi_plastic'] - (penalty7['plastic'] * 200)).sum()/ good_result.shape[0] * 0.25 * 1000
        penalty8_sum = (penalty8['poi_papier'] - (penalty8['papier'] * 200)).sum()/ good_result.shape[0] * 0.2 * 1000
        penalty9_sum = (penalty9['poi_glas'] - (penalty9['glas'] * 200)).sum()/ good_result.shape[0] * 0.15 * 1000
        penalty10_sum = (penalty10['poi_textiel'] - (penalty10['textiel'] * 750)).sum()/ good_result.shape[0] * 0.05 * 1000
        
    else:
        penalty1_sum = (penalty1['rest_afstand'].sum() - 100 * penalty1['count'].sum())/good_result['count'].sum() * 0.35
        penalty2_sum = (penalty2['plastic_afstand'].sum() - 150 * penalty2['count'].sum())/good_result['count'].sum() * 0.25
        penalty3_sum = (penalty3['papier_afstand'].sum() - 150 * penalty3['count'].sum())/good_result['count'].sum() * 0.2
        penalty4_sum = (penalty4['glas_afstand'].sum() - 150 * penalty4['count'].sum())/good_result['count'].sum() * 0.15
        penalty5_sum = (penalty5['textiel_afstand'].sum() - 300 * penalty5['count'].sum())/good_result['count'].sum() * 0.05
        penalty6_sum = (penalty6['poi_rest'] - (penalty6['rest'] * 100)).sum()/ good_result['count'].sum() * 0.35 * 1000
        penalty7_sum = (penalty7['poi_plastic'] - (penalty7['plastic'] * 200)).sum()/ good_result['count'].sum() * 0.25 * 1000
        penalty8_sum = (penalty8['poi_papier'] - (penalty8['papier'] * 200)).sum()/ good_result['count'].sum() * 0.2 * 1000
        penalty9_sum = (penalty9['poi_glas'] - (penalty9['glas'] * 200)).sum()/ good_result['count'].sum() * 0.15 * 1000
        penalty10_sum = (penalty10['poi_textiel'] - (penalty10['textiel'] * 750)).sum()/ good_result['count'].sum() * 0.05 * 1000
        
    total_penalties = sum([penalty1_sum, penalty2_sum, penalty3_sum, penalty4_sum, penalty5_sum,\
                           penalty6_sum, penalty7_sum, penalty8_sum, penalty9_sum, penalty10_sum])
    return total_penalties


In [None]:
calculate_penalties(good_result_rich, aansluitingen, use_count=True)

### Initial test for correctly loading data using count per poi

In [None]:
all_households, rel_poi_df, joined, df_afstandn2 = initial_loading(use_count=True)

In [None]:
joined_cluster_distance, good_result_rich, aansluitingen, avg_distance, penalties = \
best_of_random(2, joined, all_households, rel_poi_df, df_afstandn2, clean=True, use_count=True)
hill_dict, best_solution = hillclimber(10, joined_cluster_distance, all_households, rel_poi_df, df_afstandn2, clean=True, use_count=True)

In [None]:
def best_of_random(num_iterations, joined, all_households, rel_poi_df, df_afstandn2, clean=True, use_count=False):
    """
    Create multiple random candidate solutions and return the best one of these
    Num_iterations decides the amount of iterations. The best option is always
    returned and can also be the standard solution that is also included in this
    options. The best of random can subsequently used as input for some kind of
    iterative optimization process(hillclimber for example).
    """
    joined_cluster_distance, good_result_rich, aansluitingen, avg_distance, penalties = analyze_candidate_solution(joined, all_households, rel_poi_df, df_afstandn2, clean=clean, use_count=use_count)

    for i in range(num_iterations):
        joined2 = random_shuffling_clusters(joined)
        joined_cluster_distance2, good_result_rich2, aansluitingen2, avg_distance2, penalties2 = analyze_candidate_solution(joined2, all_households, rel_poi_df, df_afstandn2, clean=clean, use_count=use_count)
        if penalties2 < penalties:
            joined = joined2
            joined_cluster_distance = joined_cluster_distance2
            good_result_rich = good_result_rich2
            aansluitingen = aansluitingen2
            avg_distance = avg_distance2
            penalties = penalties2

    print('***************************************')
    print(avg_distance, penalties)
    return joined_cluster_distance, good_result_rich, aansluitingen, avg_distance, penalties
