In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import pickle

from itertools import chain
from itertools import product
from itertools import groupby
from operator import itemgetter

import fiona
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point, Polygon, MultiPoint
from descartes import PolygonPatch

import matplotlib.colors as mpl_colors
from random import randint
import time

from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score

In [None]:
with open('data_routes_pickle/routes_coord_cur', 'rb') as f:
    routes_coord_cur = pickle.load(f)

with open('data_routes_pickle/sim_matrix_sim_segments_1_mod23', 'rb') as f:
    sim_matrix = np.array(pickle.load(f))

with open('data_routes_pickle/dist_matrix', 'rb') as f:
    dist_matrix = np.array(pickle.load(f))

with open('data_routes_pickle/cluster_count_cur', 'rb') as f:
    cluster_count_cur = pickle.load(f)

with open('data_routes_pickle/idxs', 'rb') as f:
    idxs = pickle.load(f)

In [None]:
coord_list = []

with open('data_routes_pickle/processed_table', 'rb') as f:
    coord_list = pickle.load(f, encoding='latin1')

def compute_distance(x, y, metric='l1'):
    if metric == "l1":
        return np.linalg.norm((np.array(x) - np.array(y)), ord=1)
    else:
        raise Exception('Unknown metric')
        
number_of_paths = len(coord_list)
distance_matrix = np.zeros((number_of_paths, number_of_paths))
for i in range(number_of_paths):
    for j in range(i, number_of_paths):
        distance = compute_distance(coord_list[i], coord_list[j], 'l1')
        distance_matrix[i][j] = distance
        distance_matrix[j][i] = distance

In [None]:
distance_matrix

In [None]:
epsilon_range = np.arange(0.2, 1.41, 0.2) # y axis
min_sample_range = np.arange(2, 11, 1) # x axis
quality_matrix = np.zeros((len(epsilon_range), len(min_sample_range)))
cluster_count_matrix = np.zeros((len(epsilon_range), len(min_sample_range)))

for min_sample_index, min_sample_size in enumerate(min_sample_range):
    for eps_index, eps in enumerate(epsilon_range):
        clustering_algorithm = DBSCAN(eps=eps, min_samples=min_sample_size, metric='precomputed')
#         clustering_algorithm = KMeans(n_clusters=min_sample_size)
        labels = clustering_algorithm.fit_predict(distance_matrix)
        cluster_count_matrix[eps_index][min_sample_index] = len(set(clustering_algorithm.labels_))
#         print(clustering_algorithm.labels_)
#         raise Exception('Exc')
        if len(clustering_algorithm.labels_) > len(set(clustering_algorithm.labels_)) > 1:
            quality_matrix[eps_index][min_sample_index] = silhouette_score(np.array(coord_list), labels)

# Plotting
plt.figure(figsize=(15, 5))
plt.imshow(quality_matrix, interpolation='none', aspect="auto",
           extent=[np.min(min_sample_range), np.max(min_sample_range) + 1, 
                   np.max(epsilon_range) + 0.2, np.min(epsilon_range)])
plt.clim(-1, 1)
plt.colorbar()

plt.savefig("dbscan_output/dbscan {}.png".format(int(time.time())), dpi=200, alpha=True)

plt.figure(figsize=(15, 5))
plt.imshow(cluster_count_matrix, interpolation='none', aspect="auto",
           extent=[np.min(min_sample_range), np.max(min_sample_range) + 1, 
                   np.max(epsilon_range) + 0.2, np.min(epsilon_range)])
plt.colorbar()

In [None]:
epsilon_range = np.arange(0.25, 1.55, 0.25) # y
leaf_size_range = np.arange(1, 20, 2) # x
quality_matrix = np.zeros((len(epsilon_range), len(leaf_size_range)))
cluster_count_matrix = np.zeros((len(epsilon_range), len(leaf_size_range)))

for leaf_size_index, leaf_size in enumerate(leaf_size_range):
    for eps_index, eps in enumerate(epsilon_range):
        clustering_algorithm = DBSCAN(eps=eps, min_samples=5, metric='precomputed')
        labels = clustering_algorithm.fit_predict(distance_matrix)
        cluster_count_matrix[eps_index][leaf_size_index] = len(set(clustering_algorithm.labels_))
        if len(set(clustering_algorithm.labels_)) > 1:
            quality_matrix[eps_index][leaf_size_index] = silhouette_score(np.array(co`ord_list), labels)

# Plotting
plt.figure(figsize=(15, 5))
plt.imshow(quality_matrix, interpolation='none', aspect="auto",
           extent=[np.min(leaf_size_range), np.max(leaf_size_range), np.max(epsilon_range), np.min(epsilon_range)])
plt.colorbar()
plt.figure(figsize=(15, 5))
plt.imshow(cluster_count_matrix, aspect="auto", interpolation='none')
plt.colorbar()