In [None]:
from datetime import datetime

import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from db_utils import get_rect_to_rect_data

In [None]:
ped_files = ['VM2_-2112701535', 'VM2_-217686115','VM2_-1247665811','VM2_-104300786', 'VM2_330973206', 'VM2_1100569031', 'VM2_421371629', 'VM2_421371629', 'VM2_1476499235', 'VM2_-1451152685', 'VM2_-1523872256']

In [None]:
start_rect_coords = (13.416257,52.521664,13.416462,52.521793)
end_rect_coords = (13.416026,52.522284,13.416231,52.522413)

end_date_str = '2021-01-01 00:00:00'
end_date = datetime.strptime(end_date_str, '%Y-%m-%d %H:%M:%S')

df_simra = get_rect_to_rect_data(start_rect_coords, end_rect_coords, end_date=end_date, files_to_exclude=ped_files)

In [None]:
# Filtering needed!

# df_simra.loc[(df_simra==0).any(axis=1)]
# df_simra.loc[(df_simra.isna()).any(axis=1)]
df_simra[(df_simra.velo == 0) | (df_simra.velo.isna())]

df_simra[((df_simra.velo == 0) | (df_simra.velo.isna())) & (df_simra.dist != 0.0)]

In [None]:
df_simra_grouped = df_simra.groupby('filename').agg({'velo': 'mean', 'dist': 'sum'})
df_simra_grouped

In [None]:
N_CLUSTERS = 2
kmeans = KMeans(n_clusters=N_CLUSTERS, random_state=0)

dists = np.array(df_simra_grouped.dist).reshape(-1, 1)
dist_cluster_labels = kmeans.fit_predict(dists)
dist_cluster_centers = kmeans.cluster_centers_

sns.stripplot(x=[x_ for x_, label in zip(dists, dist_cluster_labels) if label == 0], color='blue')
sns.stripplot(x=[x_ for x_, label in zip(dists, dist_cluster_labels) if label == 1], color='orange')
sns.stripplot(x=dist_cluster_centers, color='red', size=10, jitter=False)

In [None]:
N_CLUSTERS = 2
kmeans = KMeans(n_clusters=N_CLUSTERS, random_state=0)

velos = np.array(df_simra_grouped.velo).reshape(-1, 1)
velo_cluster_labels = kmeans.fit_predict(velos)
velo_cluster_centers = kmeans.cluster_centers_

sns.stripplot(x=[x_ for x_, label in zip(velos, velo_cluster_labels) if label == 0], color='blue')
sns.stripplot(x=[x_ for x_, label in zip(velos, velo_cluster_labels) if label == 1], color='orange')
sns.stripplot(x=velo_cluster_centers, color='red', size=10, jitter=False)

In [None]:
import matplotlib.pyplot as plt
from matplotlib import ticker
from matplotlib.collections import LineCollection

fig, ax = plt.subplots(figsize=(12, 12))
ax.set_aspect('equal')
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(df_simra.velo.min(), df_simra.velo.max())

df_simra_grouped = df_simra.groupby('filename')
for ride_group_name in df_simra_grouped.groups:
    df_ride_group = df_simra_grouped.get_group(ride_group_name)
    points = np.array([df_ride_group.lon, df_ride_group.lat]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    lc = LineCollection(segments, cmap='viridis', norm=norm)
    # Set the values used for colormapping
    lc.set_array(df_ride_group.velo.values)
    lc.set_linewidth(2)
    line = ax.add_collection(lc)

ax.set_xlim(min(df_simra.lon), max(df_simra.lon))
ax.set_ylim(min(df_simra.lat), max(df_simra.lat))

axcb = fig.colorbar(lc, ax=ax, shrink=0.8, pad=0.1)
axcb.set_label('Velocity in m/s')

ax.xaxis.set_major_locator(ticker.LinearLocator(4))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=False))
ax.yaxis.set_major_locator(ticker.LinearLocator(4))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=False))
ax.set_xlabel('Longitude in decimal degrees')
ax.set_ylabel('Latitude in decimal degrees')


plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()


N_CLUSTERS = 2
kmeans = KMeans(n_clusters=N_CLUSTERS, random_state=0)

print(velos.shape)
print(dists.shape)

velos_c = velos.copy()
dists_c = dists.copy()

velos = scaler.fit_transform(velos)
dists = scaler.fit_transform(dists)

velo_dist_combined = np.hstack([velos, dists])
print(velo_dist_combined.shape)

velo_dist_cluster_labels = kmeans.fit_predict(velo_dist_combined)
velo_dist_cluster_centers = kmeans.cluster_centers_

colors = ['blue' if label == 0 else 'orange' for label in velo_dist_cluster_labels]
plt.scatter(velo_dist_combined[:,0], velo_dist_combined[:,1], c=colors)
plt.scatter(velo_dist_cluster_centers[:,0], velo_dist_cluster_centers[:,1], color='red')
