In [None]:
import numpy as np
import pandas as pd

In [None]:
from case_curves import *
from case_hours import plot_time, kmeans_parameters, measures_kmeans_range
from pract2_utils import *

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from collections import Counter

# Case 3

Accidentes que tengan que ver con curvas con visibilidad restringida

In [None]:
sns.set_style("whitegrid")

In [None]:
dgt_data = pd.read_csv("../data/accidentes_2013.csv")
dgt_data.columns = [col.lower() for col in dgt_data]

In [None]:
g = sns.FacetGrid(dgt_data, col="trazado_no_intersec",  col_wrap=2)
g.map_dataframe(sns.histplot,x="visibilidad_restringida",y="tot_muertos")
g.set_titles(col_template="{col_name}")
g.set_axis_labels("trazado_no_intersec", "tot_muertos")
g.savefig("figures/2_segmentation/case_3/eda_trazado_visibilidad_muertos.png")

In [None]:
g = sns.FacetGrid(dgt_data, col="trazado_no_intersec",  col_wrap=2)
g.map_dataframe(sns.histplot,x="visibilidad_restringida",y="tot_victimas")
g.set_titles(col_template="{col_name}")
g.set_axis_labels("trazado_no_intersec", "tot_victimas")
g.savefig("figures/2_segmentation/case_3/eda_trazado_visibilidad_victimas.png")

In [None]:
case_3_subset = define_case_3()

In [None]:
case_3_subset.head()

In [None]:
case_3_subset.shape

In [None]:
case_3_subset['trazado_no_intersec'].value_counts().head()

In [None]:
case_3_subset['visibilidad_restringida'].value_counts().head()

In [None]:
atributos = ['tot_vehiculos_implicados', 'tot_heridos_leves', 'tot_heridos_graves','tot_muertos', 'tot_victimas']

In [None]:
hm = sns.pairplot(case_3_subset, vars = atributos, diag_kind="kde")
hm.savefig("figures/2_segmentation/case_3/eda.png")

In [None]:
case_3_data = to_matrix(case_3_subset, atributos)

In [None]:
case_3_data_norm = norm(case_3_data)

In [None]:
case_3_algorithms = definition_clusters_case_3(case_3_data_norm)

In [None]:
case_3_predictions, case_3_times = get_predictions(case_3_algorithms, case_3_data_norm)

In [None]:
case_3_measures = calcule_measures(case_3_data_norm, case_3_predictions, case_3_times)

In [None]:
columns = ['Algoritmo', 'Clusters', 'Calinski', 'Silhouette', 'time(s)']
print(latex_table(case_3_measures, columns, False))

In [None]:
a, b, n = 1, 10, 1

In [None]:
case_3_distortions, case_3_inertia, case_3_time_kmeans  = kmeans_parameters(case_3_data_norm, a, b, n)

In [None]:
plot_time(case_3_inertia, case_3_time_kmeans, "elbow", "time(s)", "figures/2_segmentation/case_3/kmeans_elbow.pdf", a, b, n)

In [None]:
a, b, n = 5, 50, 5

In [None]:
case_3_distortions, case_3_inertia, case_3_time_kmeans  = kmeans_parameters(case_3_data_norm, a, b, n)

In [None]:
plot_time(case_3_distortions, case_3_time_kmeans, "Distortions", "time(s)", "figures/2_segmentation/case_3/kmeans_distortions.pdf", a, b, n)

In [None]:
plot_time(case_3_inertia, case_3_time_kmeans, "Inertia", "time(s)", "figures/2_segmentation/case_3/kmeans_inertia.pdf", a, b, n)

In [None]:
case_3_silhouette_scores, case_3_calinski_scores = measures_kmeans_range(case_3_data_norm, a, b, n)

In [None]:
plot_time(case_3_silhouette_scores, case_3_calinski_scores, "silhouette", "calinski", "figures/2_segmentation/case_3/silhouette_calinski.pdf", a, b, n)

In [None]:
case_3_table = {
                "Silhouette":case_3_silhouette_scores,
                "Calinski-Harabaz":case_3_calinski_scores,
                "time(s)":case_3_time_kmeans
                }

In [None]:
columns = ['Silhouette','Calinski-Harabaz','time(s)']
index=range(a,b,n)
print(latex_table_index(case_3_table, columns, index))

## Birch parameters

In [None]:
config_birch_clusters = configuraciones_birch_clusters(0.01)

In [None]:
prediction_birch_clusters, times_birch_clusters = get_predictions(config_birch_clusters, case_3_data_norm)

In [None]:
measures_birch_clusters = calcule_measures(case_3_data_norm, prediction_birch_clusters, times_birch_clusters)

In [None]:
columns = ['Algoritmo', 'Clusters', 'Calinski', 'Silhouette', 'time(s)']
print(latex_table(measures_birch_clusters, columns, False))

In [None]:
# Birch threshold
config_birch_threshold = configuraciones_birch_threshold(10)

In [None]:
prediction_birch_threshold, times_birch_threshold = get_predictions(config_birch_threshold, case_3_data_norm)

In [None]:
measures_birch_threshold = calcule_measures(case_3_data_norm, prediction_birch_threshold, times_birch_threshold)

In [None]:
columns = ['Algoritmo', 'Clusters', 'Calinski', 'Silhouette', 'time(s)']
print(latex_table(measures_birch_threshold, columns, False))

## Meanshift

In [None]:
config_meanshift = configuraciones_meanshift(case_3_data_norm)

In [None]:
predictions_meanshift, times_meanshift = get_predictions(config_meanshift, case_3_data_norm)

In [None]:
measures_meanshift = calcule_measures(case_3_data_norm, predictions_meanshift, times_meanshift)

In [None]:
columns = ['Algoritmo', 'Clusters', 'Calinski', 'Silhouette', 'time(s)']
print(latex_table(measures_meanshift, columns, False))

## Agglomerative

In [None]:
config_ward = configuraciones_agglomerative_connectivity(case_3_data_norm)

In [None]:
predictions_ward , times_ward = get_predictions(config_ward, case_3_data_norm)

In [None]:
measures_ward = calcule_measures(case_3_data_norm, predictions_ward, times_ward)

In [None]:
columns = ['Algoritmo', 'Clusters', 'Calinski', 'Silhouette', 'time(s)']
print(latex_table(measures_ward, columns, False))

## Interpretaciones

In [None]:
# kmeans
kmeans_label = case_3_predictions[0][1]

In [None]:
Counter(kmeans_label)

In [None]:
pairplot(case_3_subset, atributos, "figures/2_segmentation/case_3/pairplot_kmeans.png", kmeans_label)

In [None]:
df_kmeans_centroids = pd.DataFrame(case_3_data_norm)
df_kmeans_centroids.columns = atributos
df_kmeans_centroids['cluster'] = kmeans_label
df_kmeans_centroids = df_kmeans_centroids.groupby('cluster').mean()

In [None]:
visualize_centroids(df_kmeans_centroids.values, case_3_data_norm, "figures/2_segmentation/case_3/centroids_kmeans_norm.pdf", atributos, 0.0)

In [None]:
visualize_centroids(df_kmeans_centroids.values, case_3_data, "figures/2_segmentation/case_3/centroids_kmeans.pdf", atributos, 0.0)

In [None]:
# Agglomerative 
agglomerative_labels = case_3_predictions[3][1]

In [None]:
Counter(agglomerative_labels)

In [None]:
pairplot(case_3_subset, atributos, "figures/2_segmentation/case_3/pairplot_agglomerative.png", agglomerative_labels)

In [None]:
df_agglomerative_centroids = pd.DataFrame(case_3_data_norm)
df_agglomerative_centroids.columns = atributos
df_agglomerative_centroids['cluster'] = agglomerative_labels
df_agglomerative_centroids = df_agglomerative_centroids.groupby('cluster').mean()

In [None]:
visualize_centroids(df_agglomerative_centroids.values, case_3_data_norm, "figures/2_segmentation/case_3/centroids_agglomerative_norm.pdf", atributos, 0.0)

In [None]:
visualize_centroids(df_agglomerative_centroids.values, case_3_data, "figures/2_segmentation/case_3/centroids_agglomerative.pdf", atributos, 0.0)

In [None]:
dendograma_subset = pd.DataFrame(case_3_subset,index=case_3_subset.index, columns=atributos)
hm = sns.clustermap(dendograma_subset, method='ward', col_cluster=False, figsize=(20,10), cmap="YlGnBu",  yticklabels=False)
hm.savefig("figures/2_segmentation/case_3/clustermap.pdf")