In [1]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.set_visible_devices(gpus[1], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
from tensorflow import keras
from tensorflow.keras.layers import LSTM, RepeatVector, GRU, Embedding
from tensorflow.keras.optimizers import Adam
import tensorflow_probability as tfp
import numpy as np
import os
from ast import literal_eval as make_tuple
from scipy.sparse import csr_matrix
import pandas as pd
import skmob
from skmob.tessellation import tilers

2023-03-14 08:36:43.853836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-14 08:36:45.356143: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-14 08:36:45.357463: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-14 08:36:45.358975: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least on

4 Physical GPUs, 1 Logical GPU


In [2]:
points = pd.read_pickle('../data/freemove/freemove_point_geographical_context.pickle')
points['lat'] = points.geometry.apply(lambda x: x.y)
points['lng'] = points.geometry.apply(lambda x: x.x)

In [3]:
# Filter trajectories that lie outside of berlin

tessellation = tilers.tiler.get("squared", base_shape='Berlin, Germany', meters=500)

tdf = skmob.TrajDataFrame(points)
mapped = tdf.mapping(tessellation, remove_na=True)

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  tile_ids = gpd.sjoin(gdf, tessellation, how=how, op='within')[[constants.TILE_ID]]


In [4]:
filtered_indices = set(points.index).difference(mapped.index)

In [5]:
drop_traj_ids = []
for i, point in points.iterrows():
    if i in filtered_indices:
        drop_traj_ids.append(point.TRIP_ID)

In [6]:
len(drop_traj_ids) == len(points) - len(mapped)

True

In [7]:
points = points.query('TRIP_ID not in @drop_traj_ids').reset_index(drop=True).copy()

In [8]:
# Filter users with less than 5 trips

# Count the number of unique values for each ID
unique_counts = points.groupby('PERSON_ID')['TRIP_ID'].nunique()
# Filter out the IDs with less than 5 unique values
filtered_ids = unique_counts[unique_counts >= 5].index.tolist()
points = points[points['PERSON_ID'].isin(filtered_ids)].reset_index(drop=True)

## Geo Context

In [9]:
max_points = points.groupby('TRIP_ID').count()['PERSON_ID'].max()
geographical_context_dim = len(points['geographical_context'].iloc[0])

X_geo_context = np.zeros((points.TRIP_ID.nunique(), max_points, geographical_context_dim))
Y = np.zeros((points.TRIP_ID.nunique(),))

In [10]:
# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    # get the trajectory id
    traj_id = traj[0]

    # get the user id
    user_id = traj[1]['PERSON_ID'].iloc[0]

    Y[index] = user_id

    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geo_context[index, idx] = point['geographical_context']

In [11]:
print(X_geo_context.shape, X_geo_context[0])

(1250, 36, 10) [[0.36 0.22 0.04 0.17 0.46 0.55 0.01 0.3  1.   0.02]
 [0.83 0.35 0.05 0.4  0.99 0.84 0.02 0.27 1.   0.01]
 [0.18 0.27 0.03 0.09 0.49 0.27 0.16 0.21 1.   0.01]
 [0.27 0.4  0.07 0.16 0.45 0.53 0.14 0.19 1.   0.  ]
 [0.26 0.39 0.08 0.15 0.43 0.55 0.25 0.2  1.   0.  ]
 [0.18 0.27 0.02 0.09 0.51 0.25 0.2  0.2  1.   0.01]
 [0.46 0.29 0.04 0.15 0.75 0.47 0.1  0.25 1.   0.03]
 [0.44 0.32 0.06 0.12 0.76 0.45 0.16 0.3  1.   0.04]
 [0.44 0.32 0.06 0.12 0.77 0.45 0.16 0.3  1.   0.05]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.  

## Geo type

In [12]:
from sklearn.preprocessing import OneHotEncoder

In [13]:
geo_type = np.asarray(points.geographical_context.apply(lambda x: np.argmax(x)).tolist()).reshape(-1,1)

In [14]:
enc_geo_type = OneHotEncoder(handle_unknown='ignore')

enc_geo_type.fit(geo_type)
enc_geo_type.categories_

[array([0, 1, 4, 5, 6, 7, 8])]

In [15]:
points['geo_type'] = enc_geo_type.transform(geo_type).toarray().tolist()

In [16]:
geo_type_dim = len(points['geo_type'].iloc[0])


X_geo_type = np.zeros((points.TRIP_ID.nunique(), max_points, geo_type_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geo_type[index, idx, :] = point['geo_type']

In [17]:
print(X_geo_type.shape, X_geo_type[0])

(1250, 36, 7) [[0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


## Geohash

In [18]:
import geohash

In [19]:
points['bin_geohash'] = points.geometry.apply(lambda x: geohash.bin_geohash(x.y, x.x, precision=8))

In [20]:
geohash_dim = len(points['bin_geohash'].iloc[0])


X_geohash = np.zeros((points.TRIP_ID.nunique(), max_points, geohash_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geohash[index, idx, :] = point['bin_geohash']

In [21]:
print(X_geohash.shape, X_geohash[0])

(1250, 36, 40) [[1. 1. 0. ... 1. 0. 0.]
 [1. 1. 0. ... 0. 0. 1.]
 [1. 1. 0. ... 1. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## Time

In [22]:
enc_time = OneHotEncoder(handle_unknown='ignore')

In [23]:
# get hour from TIME column and reshape to array wiht one feature
hour = np.asarray(points.TIME.dt.hour).reshape(-1,1)

In [24]:
enc_time.fit(hour)
enc_time.categories_

[array([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23])]

In [25]:
points['hour'] = enc_time.transform(hour).toarray().tolist()

In [26]:
hour_dim = len(points['hour'].iloc[0])


X_hour = np.zeros((points.TRIP_ID.nunique(), max_points, hour_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_hour[index, idx, :] = point['hour']

In [27]:
print(X_hour.shape, X_hour[0])

(1250, 36, 23) [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

## Merge Inputs

In [29]:
from tensorflow.keras.layers import Concatenate

keys = ['bin_geohash', 'hour', 'geographical_context', 'geo_type']

X = [X_geohash, X_hour, X_geo_context, X_geo_type]

vocab_size = []
X = Concatenate(axis=2)(X)


## Autoencoder

In [30]:
from tensorflow.keras.layers import LSTM, RepeatVector, GRU, Embedding, Dense, TimeDistributed, Lambda, Bidirectional, Masking
from keras.initializers import he_uniform
from keras.regularizers import l1
from attention import Attention


In [32]:
latent_dim = 500
EMBEDDER_SIZE = 150
timesteps = int(max_points)

# input_geohash = keras.Input(shape=(timesteps,geohash_dim))
# input_hour = keras.Input(shape=(timesteps,hour_dim))
# input_geo_context = keras.Input(shape=(timesteps,geographical_context_dim))
# input_geo_type = keras.Input(shape=(timesteps,geo_type_dim))

# inputs = [input_geohash, input_hour, input_geo_context, input_geo_type]
# hidden_input = Concatenate(axis=2)(inputs)

inputs = keras.Input(shape=(timesteps,geohash_dim+hour_dim+geographical_context_dim+geo_type_dim))

# masked = Masking(mask_value=0.,
#                 input_shape=(timesteps, features))(inputs)

# e_geohash = Embedding(geohash_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geohash')(input_geohash)
# e_hour = Embedding(hour_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_hour')(input_hour)
# e_geo_context = Embedding(geographical_context_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geo_context')(input_geo_context)
# e_geo_type = Embedding(geo_type_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geo_type')(input_geo_type)

# e_geohash = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geohash)
# e_hour = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_hour)
# e_geo_context = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geo_context)
# e_geo_type = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geo_type)

# embeddings = [e_geohash, e_hour, e_geo_context, e_geo_type]
# hidden_input = Concatenate(axis=2)(embeddings)

encoded = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(inputs)
encoded = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(encoded)

encoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(encoded)
encoded = Bidirectional(GRU(latent_dim, return_sequences=False, recurrent_regularizer=l1(0.02)))(encoded)

# encoded = Attention(units=500)(encoded)

z_mean = Dense(latent_dim)(encoded)
z_log_sigma = Dense(latent_dim)(encoded)

decoded = RepeatVector(timesteps)(encoded)
decoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(decoded)
decoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(decoded)

decoded = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)
decoded = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)

output_geohash = TimeDistributed(Dense(geohash_dim, kernel_initializer=he_uniform(), activation='sigmoid'), name='output_geohash')(decoded)
output_hour = TimeDistributed(Dense(hour_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_hour')(decoded)
output_geo_context = TimeDistributed(Dense(geographical_context_dim, kernel_initializer=he_uniform(), activation='tanh'), name='output_geo_context')(decoded)
output_geo_type = TimeDistributed(Dense(geo_type_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_geo_type')(decoded)
outputs = [output_geohash, output_hour, output_geo_context, output_geo_type]

outputs = Concatenate(axis=2)(outputs)

# d_4 = Dense(units=2000, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)
# d_5 = Dense(units=500, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(d_4)
# d_6 = Dense(units=500, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(d_5)

sequence_autoencoder = keras.Model(inputs, outputs)
encoder = keras.Model(inputs, encoded)
decoder = keras.Model(encoded, outputs)

opt = Adam(learning_rate=0.001)


# sequence_autoencoder.compile(optimizer='adam', 
#                              metrics=['accuracy'],
#                             loss={'output_geohash': 'binary_crossentropy', 'output_hour': 'categorical_crossentropy', 'output_geo_context': 'mse', 'output_geo_type': 'categorical_crossentropy'})

sequence_autoencoder.compile(optimizer='adam', 
                             metrics=['accuracy'],
                            loss='mse')


In [33]:
sequence_autoencoder.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 36, 80)]     0           []                               
                                                                                                  
 dense_1 (Dense)                (None, 36, 128)      10368       ['input_2[0][0]']                
                                                                                                  
 dense_2 (Dense)                (None, 36, 256)      33024       ['dense_1[0][0]']                
                                                                                                  
 bidirectional (Bidirectional)  (None, 36, 1000)     2274000     ['dense_2[0][0]']                
                                                                                              

In [34]:
sequence_autoencoder.fit(X, X,
                epochs=500,
                batch_size=256,
                shuffle=True)

Epoch 1/500


2023-03-14 08:37:59.195884: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-03-14 08:37:59.763821: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-03-14 08:38:00.038044: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f0f51576980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-03-14 08:38:00.038089: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA A100-PCIE-40GB, Compute Capability 8.0
2023-03-14 08:38:00.046615: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-03-14 08:38:00.188704: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of th

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 7

Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/

Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 

Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 

Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 

Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 

Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x7f29d6be56d0>

In [39]:
import sys
sys.path.append('..')
import attack
import geopandas as gp
gdf = gp.read_file('../data/freemove/raw_full.geojson', crs='EPSG:4326')
gdf = gdf[gdf['TRIP_ID'].isin(points.TRIP_ID)]
true_mapping = gdf[['TRIP_ID', 'PERSON_ID']].sort_values('TRIP_ID').drop_duplicates().reset_index(drop=True)

print('Number of persons in data', points.PERSON_ID.nunique())

Number of persons in data 54


In [40]:
# Evaluate base line hdbscan
y = attack.getGroundTruth(true_mapping)
import hdbscan
from sklearn import metrics
from sklearn.cluster import KMeans, DBSCAN

pred = encoder.predict(X)

h_dbscan = hdbscan.HDBSCAN()
h_dbscan.fit(pred)

kmeans = KMeans(n_clusters=54, random_state=0, n_init=20).fit(pred)

dbscan = DBSCAN(eps=0.5, min_samples=10).fit(pred)

attack.evaluate(h_dbscan.labels_, true_mapping)
attack.evaluate(kmeans.labels_, true_mapping)
attack.evaluate(dbscan.labels_, true_mapping)

Homogeneity: 0.252
Completeness: 0.589
V-measure: 0.353
Rand index: 0.530
ARI: 0.009
MI: 0.954
NMI: 0.353
AMI: 0.238
Cluster accuracy: 0.244
Homogeneity: 0.540
Completeness: 0.524
V-measure: 0.532
Rand index: 0.964
ARI: 0.199
MI: 2.046
NMI: 0.532
AMI: 0.379
Cluster accuracy: 0.336
Homogeneity: 0.000
Completeness: 1.000
V-measure: 0.000
Rand index: 0.025
ARI: 0.000
MI: 0.000
NMI: 0.000
AMI: 0.000
Cluster accuracy: 0.060


In [252]:
sequence_autoencoder.predict(X)

2023-03-13 16:00:01.705914: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "NVIDIA A100-PCIE-40GB" frequency: 1410 num_cores: 108 environment { key: "architecture" value: "8.0" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 41943040 shared_memory_size_per_multiprocessor: 167936 memory_size: 40267612160 bandwidth: 1555200000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }




[array([[[9.9998212e-01, 9.9996865e-01, 3.0526273e-06, ...,
          2.8011220e-02, 8.2141507e-01, 9.8367494e-01],
         [9.9999475e-01, 9.9999261e-01, 1.6640771e-06, ...,
          4.3803542e-03, 9.8835200e-01, 4.7826632e-03],
         [9.9999762e-01, 9.9999750e-01, 8.8590264e-07, ...,
          4.7581547e-01, 9.9295312e-01, 7.3154890e-03],
         ...,
         [6.9164475e-06, 6.6300845e-06, 3.1118915e-08, ...,
          2.6860985e-09, 9.6402125e-08, 7.1761697e-08],
         [4.3214168e-06, 4.3096052e-06, 5.6993724e-08, ...,
          5.8852581e-09, 1.2842621e-07, 7.1494739e-08],
         [2.2446129e-06, 2.3877401e-06, 1.4143866e-07, ...,
          8.8572589e-09, 1.1038742e-06, 4.4416911e-08]],
 
        [[9.9997056e-01, 9.9998796e-01, 4.5588133e-09, ...,
          6.6519165e-01, 4.7724582e-03, 9.9798751e-01],
         [9.9999869e-01, 9.9999797e-01, 7.3203069e-11, ...,
          6.8302907e-02, 9.9137950e-01, 8.5939103e-01],
         [9.9969101e-01, 9.9964547e-01, 9.8956789e-09, 