In [2]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.set_visible_devices(gpus[1], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
from tensorflow import keras
from tensorflow.keras.layers import LSTM, RepeatVector, GRU, Embedding
from tensorflow.keras.optimizers import Adam
import tensorflow_probability as tfp
import numpy as np
import os
from ast import literal_eval as make_tuple
from scipy.sparse import csr_matrix
import pandas as pd
import skmob
from skmob.tessellation import tilers

2023-03-22 18:28:31.920158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-22 18:28:33.411017: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-22 18:28:33.412196: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-22 18:28:33.413761: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least on

4 Physical GPUs, 1 Logical GPU


In [3]:
points = pd.read_pickle('../data/freemove/freemove_point_geographical_context.pickle')
points['lat'] = points.geometry.apply(lambda x: x.y)
points['lng'] = points.geometry.apply(lambda x: x.x)

In [4]:
points

Unnamed: 0,TRIP_ID,PERSON_ID,TIME,geometry,geographical_context,lat,lng
0,978933,17246,2022-10-21 14:59:24,POINT (13.31753 52.53094),"[0.36, 0.22, 0.04, 0.17, 0.46, 0.55, 0.01, 0.3...",52.530942,13.317532
1,978933,17246,2022-10-21 15:02:10,POINT (13.32791 52.53281),"[0.83, 0.35, 0.05, 0.4, 0.99, 0.84, 0.02, 0.27...",52.532806,13.327908
2,978933,17246,2022-10-21 15:10:39,POINT (13.36288 52.53582),"[0.18, 0.27, 0.03, 0.09, 0.49, 0.27, 0.16, 0.2...",52.535821,13.362884
3,978933,17246,2022-10-21 15:13:28,POINT (13.36931 52.52905),"[0.27, 0.4, 0.07, 0.16, 0.45, 0.53, 0.14, 0.19...",52.529052,13.369314
4,978933,17246,2022-10-21 15:16:03,POINT (13.36997 52.52837),"[0.26, 0.39, 0.08, 0.15, 0.43, 0.55, 0.25, 0.2...",52.528374,13.369971
...,...,...,...,...,...,...,...
12346,1015191,16370,2022-11-17 22:09:33,POINT (13.36541 52.47671),"[0.09, 0.26, 0.01, 0.03, 0.28, 0.34, 0.02, 0.0...",52.476712,13.365414
12347,1015191,16370,2022-11-17 22:15:28,POINT (13.35614 52.46070),"[0.02, 0.48, 0.0, 0.02, 1.0, 0.34, 0.15, 0.02,...",52.460702,13.356144
12348,1015191,16370,2022-11-17 22:17:07,POINT (13.35593 52.45973),"[0.02, 0.59, 0.0, 0.03, 1.0, 0.34, 0.22, 0.02,...",52.459734,13.355926
12349,1015191,16370,2022-11-17 22:19:27,POINT (13.35338 52.44808),"[0.29, 0.39, 0.0, 0.07, 0.3, 0.22, 0.16, 0.05,...",52.448084,13.353378


## Filter trajectories that lie outside of berlin


In [5]:
tessellation = tilers.tiler.get("squared", base_shape='Berlin, Germany', meters=500)

tdf = skmob.TrajDataFrame(points)
mapped = tdf.mapping(tessellation, remove_na=True)

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  tile_ids = gpd.sjoin(gdf, tessellation, how=how, op='within')[[constants.TILE_ID]]


In [6]:
filtered_indices = set(points.index).difference(mapped.index)

In [7]:
drop_traj_ids = []
for i, point in points.iterrows():
    if i in filtered_indices:
        drop_traj_ids.append(point.TRIP_ID)

In [8]:
len(drop_traj_ids) == len(points) - len(mapped)

True

In [9]:
points = points.query('TRIP_ID not in @drop_traj_ids').reset_index(drop=True).copy()

## Filter users with less than n trips


In [10]:
n = 1

# Count the number of unique values for each ID
unique_counts = points.groupby('PERSON_ID')['TRIP_ID'].nunique()
# Filter out the IDs with less than n unique values
filtered_ids = unique_counts[unique_counts >= n].index.tolist()
points = points[points['PERSON_ID'].isin(filtered_ids)].reset_index(drop=True)

## Geo Context

In [11]:
max_points = points.groupby('TRIP_ID').count()['PERSON_ID'].max()
geographical_context_dim = len(points['geographical_context'].iloc[0])

X_geo_context = np.zeros((points.TRIP_ID.nunique(), max_points, geographical_context_dim))
Y = np.zeros((points.TRIP_ID.nunique(),))

In [12]:
# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    # get the trajectory id
    traj_id = traj[0]

    # get the user id
    user_id = traj[1]['PERSON_ID'].iloc[0]

    Y[index] = user_id

    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geo_context[index, idx] = point['geographical_context']

In [13]:
print(X_geo_context.shape, X_geo_context[0])

(1294, 36, 10) [[0.36 0.22 0.04 0.17 0.46 0.55 0.01 0.3  1.   0.02]
 [0.83 0.35 0.05 0.4  0.99 0.84 0.02 0.27 1.   0.01]
 [0.18 0.27 0.03 0.09 0.49 0.27 0.16 0.21 1.   0.01]
 [0.27 0.4  0.07 0.16 0.45 0.53 0.14 0.19 1.   0.  ]
 [0.26 0.39 0.08 0.15 0.43 0.55 0.25 0.2  1.   0.  ]
 [0.18 0.27 0.02 0.09 0.51 0.25 0.2  0.2  1.   0.01]
 [0.46 0.29 0.04 0.15 0.75 0.47 0.1  0.25 1.   0.03]
 [0.44 0.32 0.06 0.12 0.76 0.45 0.16 0.3  1.   0.04]
 [0.44 0.32 0.06 0.12 0.77 0.45 0.16 0.3  1.   0.05]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.  

## Geo type

In [14]:
from sklearn.preprocessing import OneHotEncoder

In [15]:
geo_type = np.asarray(points.geographical_context.apply(lambda x: np.argmax(x)).tolist()).reshape(-1,1)

In [16]:
enc_geo_type = OneHotEncoder(handle_unknown='ignore')

enc_geo_type.fit(geo_type)
enc_geo_type.categories_

[array([0, 1, 4, 5, 6, 7, 8])]

In [17]:
points['geo_type'] = enc_geo_type.transform(geo_type).toarray().tolist()

In [18]:
geo_type_dim = len(points['geo_type'].iloc[0])


X_geo_type = np.zeros((points.TRIP_ID.nunique(), max_points, geo_type_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geo_type[index, idx, :] = point['geo_type']

In [19]:
print(X_geo_type.shape, X_geo_type[0])

(1294, 36, 7) [[0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


## Geohash

In [20]:
import geohash

In [21]:
points['bin_geohash'] = points.geometry.apply(lambda x: geohash.bin_geohash(x.y, x.x, precision=8))

In [22]:
geohash_dim = len(points['bin_geohash'].iloc[0])


X_geohash = np.zeros((points.TRIP_ID.nunique(), max_points, geohash_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_geohash[index, idx, :] = point['bin_geohash']

In [23]:
print(X_geohash.shape, X_geohash[0])

(1294, 36, 40) [[1. 1. 0. ... 1. 0. 0.]
 [1. 1. 0. ... 0. 0. 1.]
 [1. 1. 0. ... 1. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## Time

In [24]:
enc_time = OneHotEncoder(handle_unknown='ignore')

In [25]:
# get hour from TIME column and reshape to array wiht one feature
hour = np.asarray(points.TIME.dt.hour).reshape(-1,1)

In [26]:
enc_time.fit(hour)
enc_time.categories_

[array([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23])]

In [27]:
points['hour'] = enc_time.transform(hour).toarray().tolist()

In [28]:
hour_dim = len(points['hour'].iloc[0])


X_hour = np.zeros((points.TRIP_ID.nunique(), max_points, hour_dim))

# convert points into numpy array for each trajectory
for index, traj in enumerate(points.groupby('TRIP_ID')):    
    for idx, point in traj[1].reset_index(drop=True).iterrows():
        X_hour[index, idx, :] = point['hour']

In [29]:
print(X_hour.shape, X_hour[0])

(1294, 36, 23) [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

## Merge Inputs

In [30]:
from tensorflow.keras.layers import Concatenate

keys = ['bin_geohash', 'hour', 'geographical_context', 'geo_type']

X = [X_geohash, X_hour, X_geo_context, X_geo_type]

vocab_size = []
X = Concatenate(axis=2)(X)


In [33]:
np.save('X.npy', X)


NameError: name 'true_mapping' is not defined

## Autoencoder

In [42]:
from tensorflow.keras.layers import LSTM, RepeatVector, GRU, Embedding, Dense, TimeDistributed, Lambda, Bidirectional, Masking
from keras.initializers import he_uniform
from keras.regularizers import l1
from attention import Attention


In [92]:
latent_dim = 500
EMBEDDER_SIZE = 150
timesteps = int(max_points)

# input_geohash = keras.Input(shape=(timesteps,geohash_dim))
# input_hour = keras.Input(shape=(timesteps,hour_dim))
# input_geo_context = keras.Input(shape=(timesteps,geographical_context_dim))
# input_geo_type = keras.Input(shape=(timesteps,geo_type_dim))

# inputs = [input_geohash, input_hour, input_geo_context, input_geo_type]
# hidden_input = Concatenate(axis=2)(inputs)

inputs = keras.Input(shape=(timesteps,geohash_dim+hour_dim+geographical_context_dim+geo_type_dim))

# masked = Masking(mask_value=0.,
#                 input_shape=(timesteps, features))(inputs)

# e_geohash = Embedding(geohash_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geohash')(input_geohash)
# e_hour = Embedding(hour_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_hour')(input_hour)
# e_geo_context = Embedding(geographical_context_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geo_context')(input_geo_context)
# e_geo_type = Embedding(geo_type_dim,
#                       EMBEDDER_SIZE,
#                       input_length=timesteps,
#                       name='e_geo_type')(input_geo_type)

# e_geohash = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geohash)
# e_hour = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_hour)
# e_geo_context = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geo_context)
# e_geo_type = Dense(units=EMBEDDER_SIZE, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(input_geo_type)

# embeddings = [e_geohash, e_hour, e_geo_context, e_geo_type]
# hidden_input = Concatenate(axis=2)(embeddings)

encoded = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(inputs)
encoded = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(encoded)

encoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(encoded)
encoded = Bidirectional(GRU(latent_dim, return_sequences=False, recurrent_regularizer=l1(0.02)))(encoded)

# encoded = Attention(units=500)(encoded)

z_mean = Dense(latent_dim)(encoded)
z_log_sigma = Dense(latent_dim)(encoded)

decoded = RepeatVector(timesteps)(encoded)
decoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(decoded)
decoded = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))(decoded)

decoded = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)
decoded = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)

output_geohash = TimeDistributed(Dense(geohash_dim, kernel_initializer=he_uniform(), activation='sigmoid'), name='output_geohash')(decoded)
output_hour = TimeDistributed(Dense(hour_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_hour')(decoded)
output_geo_context = TimeDistributed(Dense(geographical_context_dim, kernel_initializer=he_uniform(), activation='tanh'), name='output_geo_context')(decoded)
output_geo_type = TimeDistributed(Dense(geo_type_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_geo_type')(decoded)
outputs = [output_geohash, output_hour, output_geo_context, output_geo_type]

outputs = Concatenate(axis=2)(outputs)

# d_4 = Dense(units=2000, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(decoded)
# d_5 = Dense(units=500, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(d_4)
# d_6 = Dense(units=500, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))(d_5)

sequence_autoencoder = keras.Model(inputs, outputs)
encoder = keras.Model(inputs, encoded)
decoder = keras.Model(encoded, outputs)

opt = Adam(learning_rate=0.001)


# sequence_autoencoder.compile(optimizer='adam', 
#                              metrics=['accuracy'],
#                             loss={'output_geohash': 'binary_crossentropy', 'output_hour': 'categorical_crossentropy', 'output_geo_context': 'mse', 'output_geo_type': 'categorical_crossentropy'})

sequence_autoencoder.compile(optimizer='adam', 
                             metrics=['accuracy'],
                            loss='mse')

In [87]:
sequence_autoencoder.summary()

Model: "model_20"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 36, 80)]     0           []                               
                                                                                                  
 dense_30 (Dense)               (None, 36, 128)      10368       ['input_7[0][0]']                
                                                                                                  
 dense_31 (Dense)               (None, 36, 256)      33024       ['dense_30[0][0]']               
                                                                                                  
 bidirectional_12 (Bidirectiona  (None, 36, 1000)    2274000     ['dense_31[0][0]']               
 l)                                                                                        

In [63]:
sequence_autoencoder.fit(X, X,
                epochs=500,
                batch_size=256,
                shuffle=True)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/

Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 

Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 

Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 

Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 

Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x7f1698aa66d0>

## Evaluation & Simple Clustering

In [34]:
import sys
sys.path.append('..')
import attack
import geopandas as gp
gdf = gp.read_file('../data/freemove/raw_full.geojson', crs='EPSG:4326')
gdf = gdf[gdf['TRIP_ID'].isin(points.TRIP_ID)]
true_mapping = gdf[['TRIP_ID', 'PERSON_ID']].sort_values('TRIP_ID').drop_duplicates().reset_index(drop=True)

np.save('Y.npy', attack.getGroundTruth(true_mapping))

print('Number of persons in data', points.PERSON_ID.nunique())

Number of persons in data 72


In [89]:
# Evaluate base line hdbscan
y = attack.getGroundTruth(true_mapping)
import hdbscan
from sklearn import metrics
from sklearn.cluster import KMeans, DBSCAN

pred = encoder.predict(X)

h_dbscan = hdbscan.HDBSCAN()
h_dbscan.fit(pred)

kmeans = KMeans(n_clusters=72, random_state=0, n_init=20).fit(pred)

dbscan = DBSCAN(eps=0.5, min_samples=10).fit(pred)

attack.evaluate(h_dbscan.labels_, true_mapping)
attack.evaluate(kmeans.labels_, true_mapping)
attack.evaluate(dbscan.labels_, true_mapping)

Homogeneity: 0.022
Completeness: 0.234
V-measure: 0.040
Rand index: 0.207
ARI: 0.001
MI: 0.085
NMI: 0.040
AMI: 0.021
Cluster accuracy: 0.068
Homogeneity: 0.536
Completeness: 0.497
V-measure: 0.516
Rand index: 0.967
ARI: 0.142
MI: 2.090
NMI: 0.516
AMI: 0.306
Cluster accuracy: 0.278
Homogeneity: 0.000
Completeness: 1.000
V-measure: 0.000
Rand index: 0.024
ARI: 0.000
MI: 0.000
NMI: 0.000
AMI: 0.000
Cluster accuracy: 0.058


## DETECT Joint Optimization

In [67]:
from keras.layers import Dense, LSTM, RepeatVector, GRU, Bidirectional, InputSpec
import keras.backend as K


class ClusteringLayer(keras.layers.Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.

    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
         Measure the similarity between embedded point z_i and centroid µ_j.
                 q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it.
                 q_ij can be interpreted as the probability of assigning sample i to cluster j.
                 (i.e., a soft assignment)
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [68]:
def target_distribution(q):
    weight = q ** 2 / q.sum(0)
    return (weight.T / weight.sum(1)).T

In [69]:
loss = 0
index = 0
maxiter = 8000
update_interval = 140
index_array = np.arange(X.shape[0])

batch_size = 256

tol = 0.001 # tolerance threshold to stop training

# Simple (First train AE then refine Clustering with KLD loss)

In [83]:
# Initialize Clustering Layer KMEANS
from sklearn.cluster import KMeans, DBSCAN

N_CLUSTERS = 72

# Build clustering model
clustering_layer = ClusteringLayer(N_CLUSTERS, name='clustering')(encoder.output)
model = keras.Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=opt, loss='kld')

# Initialize cluster centers using k-means.
kmeans = KMeans(n_clusters=N_CLUSTERS, n_init=20)
y_pred = kmeans.fit_predict(encoder.predict(X))
y_pred_last = np.copy(y_pred)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

# Get labels (ground truth)
y = attack.getGroundTruth(true_mapping)





In [71]:
from sklearn import metrics

for ite in range(int(maxiter)):
    if ite % update_interval == 0:
        q = model.predict(X, verbose=0)
        p = target_distribution(q)  # update the auxiliary target distribution p

        # evaluate the clustering performance
        y_pred = q.argmax(1)
        if y is not None:
            acc = np.round(attack.cluster_acc(y, y_pred), 5)
            nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
            ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
            loss = np.round(loss, 5)
            print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)

        # check stop criterion - model convergence
        delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
        y_pred_last = np.copy(y_pred)
        if ite > 0 and delta_label < tol:
            print('delta_label ', delta_label, '< tol ', tol)
            print('Reached tolerance threshold. Stopping training.')
            break
    idx = index_array[index * batch_size: min((index+1) * batch_size, X.shape[0])]
    loss = model.train_on_batch(x=tf.gather(X, indices=idx), y=p[idx])
    index = index + 1 if (index + 1) * batch_size <= X.shape[0] else 0

Iter 0: acc = 0.28825, nmi = 0.53513, ari = 0.15959  ; loss= 0
Iter 140: acc = 0.29289, nmi = 0.53364, ari = 0.15715  ; loss= 4.98642
Iter 280: acc = 0.29598, nmi = 0.53360, ari = 0.15579  ; loss= 5.7236
Iter 420: acc = 0.28516, nmi = 0.52341, ari = 0.14236  ; loss= 6.825
Iter 560: acc = 0.28748, nmi = 0.52328, ari = 0.14179  ; loss= 7.2276
Iter 700: acc = 0.29134, nmi = 0.52612, ari = 0.14461  ; loss= 7.11497
Iter 840: acc = 0.29134, nmi = 0.52285, ari = 0.14414  ; loss= 6.56727
Iter 980: acc = 0.29134, nmi = 0.52282, ari = 0.14444  ; loss= 6.23553
Iter 1120: acc = 0.29057, nmi = 0.52283, ari = 0.14461  ; loss= 7.44327
Iter 1260: acc = 0.29212, nmi = 0.52328, ari = 0.14568  ; loss= 6.62974
Iter 1400: acc = 0.29212, nmi = 0.52262, ari = 0.14502  ; loss= 5.9629
Iter 1540: acc = 0.29212, nmi = 0.52307, ari = 0.14430  ; loss= 6.27522
Iter 1680: acc = 0.28825, nmi = 0.52183, ari = 0.14330  ; loss= 6.64078
Iter 1820: acc = 0.29134, nmi = 0.52427, ari = 0.14427  ; loss= 7.33977
Iter 1960: ac

# Advanced (Train AE and Clustering Simultaneously)

**Important:** Do not forget to reinitialize the AE before running the code below

In [90]:
from tensorflow.keras.optimizers import SGD

N_CLUSTERS = 72

# (re)initialize clustering layer
clustering_layer = ClusteringLayer(N_CLUSTERS, name='clustering')(encoder.output)

# Train AE and clustering layer at the same time
model = keras.Model(inputs=inputs,
            outputs=[clustering_layer, sequence_autoencoder.output])
pretrain_optimizer = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss=['kld', 'mse'], loss_weights=[0.1, 1], optimizer='adam')

In [91]:
for ite in range(int(maxiter)):
    if ite % update_interval == 0:
        q, _  = model.predict(X, verbose=0)
        p = target_distribution(q)  # update the auxiliary target distribution p

        # evaluate the clustering performance
        y_pred = q.argmax(1)
        if y is not None:
            acc = np.round(attack.cluster_acc(y, y_pred), 5)
            nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
            ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
            loss = np.round(loss, 5)
            print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)

        # check stop criterion
        delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
        y_pred_last = np.copy(y_pred)
        if ite > 0 and delta_label < tol:
            print('delta_label ', delta_label, '< tol ', tol)
            print('Reached tolerance threshold. Stopping training.')
            break
    idx = index_array[index * batch_size: min((index+1) * batch_size, X.shape[0])]
    loss = model.train_on_batch(x=tf.gather(X, indices=idx), y=[p[idx], tf.gather(X, indices=idx)])
    index = index + 1 if (index + 1) * batch_size <= X.shape[0] else 0

Iter 0: acc = 0.10046, nmi = 0.14096, ari = 0.02553  ; loss= [14.77079  0.49049  0.02395]
Iter 140: acc = 0.08578, nmi = 0.10227, ari = 0.00949  ; loss= [1.503691e+01 7.000000e-05 2.427000e-02]
Iter 280: acc = 0.10046, nmi = 0.15124, ari = 0.02319  ; loss= [14.88898  0.       0.02138]
Iter 420: acc = 0.10896, nmi = 0.17563, ari = 0.03253  ; loss= [1.484727e+01 1.000000e-05 2.219000e-02]
Iter 560: acc = 0.13833, nmi = 0.23023, ari = 0.04790  ; loss= [1.498289e+01 1.000000e-05 1.916000e-02]
Iter 700: acc = 0.19474, nmi = 0.32634, ari = 0.07177  ; loss= [1.481378e+01 3.000000e-05 1.844000e-02]
Iter 840: acc = 0.20711, nmi = 0.37116, ari = 0.08635  ; loss= [1.501879e+01 5.000000e-05 1.900000e-02]
Iter 980: acc = 0.20015, nmi = 0.36085, ari = 0.07436  ; loss= [1.491256e+01 1.800000e-04 1.703000e-02]
Iter 1120: acc = 0.20170, nmi = 0.37403, ari = 0.06870  ; loss= [1.487374e+01 4.000000e-04 1.658000e-02]
Iter 1260: acc = 0.21870, nmi = 0.40721, ari = 0.08320  ; loss= [1.483837e+01 2.600000e-0

## DC-GMM

In [145]:
from scipy.sparse import csr_matrix
import scipy.sparse
import random

class DataGenerator():
    'Generates data for Keras'

    def __init__(self, X, Y, alpha=1000, batch_size=100, num_constrains=0, q=0, ml=0, shuffle=True, l=0):
        'Initialization'
        self.batch_size = batch_size
        self.alpha = alpha
        self.q = q
        self.num_constrains = num_constrains
        self.ml = ml
        self.X = X
        if l == 0:
            self.l = len(Y)
        else:
            self.l = l
        self.Y = Y
        self.W, self.ml_ind1, self.ml_ind2, self.cl_ind1, self.cl_ind2 = self.get_W()
        print(self.W.shape)
        print(self.W)
        print(self.ml_ind1)
        print(self.W.data)
        self.ind1 = np.concatenate([self.ml_ind1,self.cl_ind1])
        self.ind2 = np.concatenate([self.ml_ind2,self.cl_ind2])
        self.indexes = np.arange(len(self.Y))
        self.ind_constr = np.arange(self.num_constrains)
        self.shuffle = shuffle

    def transitive_closure(self, ml_ind1, ml_ind2, cl_ind1, cl_ind2, n):
        """
        This function calculate the total transtive closure for must-links and the full entailment
        for cannot-links.

        # Arguments
            ml_ind1, ml_ind2 = instances within a pair of must-link constraints
            cl_ind1, cl_ind2 = instances within a pair of cannot-link constraints
            n = total training instance number
        # Return
            transtive closure (must-links)
            entailment of cannot-links
        """
        ml_graph = dict()
        cl_graph = dict()
        for i in range(n):
            ml_graph[i] = set()
            cl_graph[i] = set()

        def add_both(d, i, j):
            d[i].add(j)
            d[j].add(i)

        for (i, j) in zip(ml_ind1, ml_ind2):
            add_both(ml_graph, i, j)

        def dfs(i, graph, visited, component):
            visited[i] = True
            for j in graph[i]:
                if not visited[j]:
                    dfs(j, graph, visited, component)
            component.append(i)

        visited = [False] * n
        for i in range(n):
            if not visited[i]:
                component = []
                dfs(i, ml_graph, visited, component)
                for x1 in component:
                    for x2 in component:
                        if x1 != x2:
                            ml_graph[x1].add(x2)
        for (i, j) in zip(cl_ind1, cl_ind2):
            add_both(cl_graph, i, j)
            for y in ml_graph[j]:
                add_both(cl_graph, i, y)
            for x in ml_graph[i]:
                add_both(cl_graph, x, j)
                for y in ml_graph[j]:
                    add_both(cl_graph, x, y)
        ml_res_set = set()
        cl_res_set = set()
        for i in ml_graph:
            for j in ml_graph[i]:
                if j != i and j in cl_graph[i]:
                    raise Exception('inconsistent constraints between %d and %d' % (i, j))
                if i <= j:
                    ml_res_set.add((i, j))
                else:
                    ml_res_set.add((j, i))
        for i in cl_graph:
            for j in cl_graph[i]:
                if i <= j:
                    cl_res_set.add((i, j))
                else:
                    cl_res_set.add((j, i))
        ml_res1, ml_res2 = [], []
        cl_res1, cl_res2 = [], []
        for (x, y) in ml_res_set:
            ml_res1.append(x)
            ml_res2.append(y)
        for (x, y) in cl_res_set:
            cl_res1.append(x)
            cl_res2.append(y)
        return np.array(ml_res1), np.array(ml_res2), np.array(cl_res1), np.array(cl_res2)

    def generate_random_pair(self, y, num, q):
        """
        Generate random pairwise constraints.
        """
        ml_ind1, ml_ind2 = [], []
        cl_ind1, cl_ind2 = [], []
        while num > 0:
            tmp1 = random.randint(0, self.l - 1)
            tmp2 = random.randint(0, self.l - 1)
            ii = np.random.uniform(0,1)
            if tmp1 == tmp2:
                continue
            # If the samples belong to the same cluster in fact
            if y[tmp1] == y[tmp2]:
                # Append must-link constraints for unflipped constrains (>q)
                if ii >= q:
                    ml_ind1.append(tmp1)
                    ml_ind2.append(tmp2)
                else:
                    cl_ind1.append(tmp1)
                    cl_ind2.append(tmp2)
            
            else:
                # If not, append cannot-link constraints for unflipped constrains (>q)
                if ii >= q:
                    cl_ind1.append(tmp1)
                    cl_ind2.append(tmp2)
                else:
                    ml_ind1.append(tmp1)
                    ml_ind2.append(tmp2)
            num -= 1
        return np.array(ml_ind1), np.array(ml_ind2), np.array(cl_ind1), np.array(cl_ind2)

    def generate_random_pair_ml(self, y, num):
        """
        Generate random pairwise constraints.
        """
        ml_ind1, ml_ind2 = [], []
        cl_ind1, cl_ind2 = [], []
        while num > 0:
            tmp1 = random.randint(0, y.shape[0] - 1)
            tmp2 = random.randint(0, y.shape[0] - 1)
            ii = np.random.uniform(0,1)
            if tmp1 == tmp2:
                continue
            if y[tmp1] == y[tmp2]:
                ml_ind1.append(tmp1)
                ml_ind2.append(tmp2)
                num -= 1
        return np.array(ml_ind1), np.array(ml_ind2), np.array(cl_ind1), np.array(cl_ind2)

    def generate_random_pair_cl(self, y, num):
        """
        Generate random pairwise constraints.
        """
        ml_ind1, ml_ind2 = [], []
        cl_ind1, cl_ind2 = [], []
        while num > 0:
            tmp1 = random.randint(0, y.shape[0] - 1)
            tmp2 = random.randint(0, y.shape[0] - 1)
            ii = np.random.uniform(0,1)
            if tmp1 == tmp2:
                continue
            if y[tmp1] != y[tmp2]:
                cl_ind1.append(tmp1)
                cl_ind2.append(tmp2)
                num -= 1
        return np.array(ml_ind1), np.array(ml_ind2), np.array(cl_ind1), np.array(cl_ind2)

    def get_W(self):
        if self.ml==0:
            ml_ind1, ml_ind2, cl_ind1, cl_ind2 = self.generate_random_pair(self.Y, self.num_constrains, self.q)
            if self.q == 0:
                ml_ind1, ml_ind2, cl_ind1, cl_ind2 = self.transitive_closure(ml_ind1, ml_ind2, cl_ind1, cl_ind2, self.X.shape[0])
        elif self.ml == 1:
            ml_ind1, ml_ind2, cl_ind1, cl_ind2 = self.generate_random_pair_ml(self.Y, self.num_constrains)
        elif self.ml == -1:
            ml_ind1, ml_ind2, cl_ind1, cl_ind2 = self.generate_random_pair_cl(self.Y, self.num_constrains)
        print("\nNumber of ml constraints: %d, cl constraints: %d.\n " % (len(ml_ind1), len(cl_ind1)))
        
        #W = np.zeros([len(self.X), len(self.X)])
        #for i in range(len(ml_ind1)):
        #    W[ml_ind1[i], ml_ind2[i]] = 1
        #    W[ml_ind2[i], ml_ind1[i]] = 1
        #for i in range(len(cl_ind1)):
        #    W[cl_ind1[i], cl_ind2[i]] = -1
        #    W[cl_ind2[i], cl_ind1[i]] = -1
        #W = csr_matrix(W)

        #if self.num_constrains > 0:
        if False:
            ml_ind1= np.load("source/data1_pos.npy")
            ml_ind2= np.load("source/data2_pos.npy")
            cl_ind1=np.load("source/data1_neg.npy")
            cl_ind2= np.load("source/data2_neg.npy")

        ind1 = np.concatenate([ml_ind1, ml_ind2, cl_ind1, cl_ind2])
        ind2 = np.concatenate([ml_ind2, ml_ind1, cl_ind2, cl_ind1])
        data = np.concatenate([np.ones(len(ml_ind1)*2), np.ones(len(cl_ind1)*2)*-1])
        W = csr_matrix((data, (ind1, ind2)), shape=(len(self.X), len(self.X)))
        W = W.tanh().rint()
        return W, ml_ind1, ml_ind2, cl_ind1, cl_ind2

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.X) / self.batch_size))
    
    def gen(self):
        while True:
            np.random.shuffle(self.indexes)
            np.random.shuffle(self.ind_constr)
            for index in range(int(len(self.X)/ self.batch_size)):
                indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
                X = tf.gather(self.X, indices=indexes)
                Y = tf.gather(self.Y, indices=indexes)
#                 X = self.X[indexes]
#                 Y = self.Y[indexes]
                W = self.W[indexes][:, indexes]* self.alpha
                ind1, ind2 = csr_matrix_indices(W)
                data = W.data
                yield (X, (ind1, ind2, data)), {"output_1": X, "output_4": Y}
            for index in range(self.num_constrains// self.batch_size):
                indexes = self.ind_constr[index * self.batch_size//2:(index + 1) * self.batch_size//2]
                indexes = np.concatenate([self.ind1[indexes], self.ind2[indexes]])
                np.random.shuffle(indexes)
                X = tf.gather(self.X, indices=indexes)
                Y = tf.gather(self.Y, indices=indexes)
#                 X = self.X[indexes]
#                 Y = self.Y[indexes]
                W = self.W[indexes][:, indexes]* self.alpha
                ind1, ind2 = csr_matrix_indices(W)
                data = W.data
                #W = W.toarray()
                yield (X, (ind1,ind2, data)), {"output_1": X, "output_4": Y}


class Encoder(tf.keras.layers.Layer):
    def __init__(self, encoded_size):
        super(Encoder, self).__init__(name='encoder')
        self.dense1 = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))
        self.dense2 = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))
        
        self.rnn1 = Bidirectional(GRU(encoded_size, return_sequences=True, recurrent_regularizer=l1(0.02)))
        self.rnn2 = Bidirectional(GRU(encoded_size, return_sequences=False, recurrent_regularizer=l1(0.02)))
        
        
        self.mu = Dense(encoded_size, activation=None)
        self.sigma = Dense(encoded_size, activation=None)

    def call(self, inputs):
        encoded = self.dense1(inputs)
        encoded = self.dense2(encoded)

        encoded = self.rnn1(encoded)
        encoded = self.rnn2(encoded)

        mu = self.mu(encoded)
        sigma = self.sigma(encoded)
        
        return mu, sigma


class Decoder(tf.keras.layers.Layer):
    def __init__(self, latent_dim, timesteps):
        super(Decoder, self).__init__(name='dec')
        self.repeat_vector = RepeatVector(timesteps)
        
        self.rnn1 = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))
        self.rnn2 = Bidirectional(GRU(latent_dim, return_sequences=True, recurrent_regularizer=l1(0.02)))

        self.dense1 = Dense(units=256, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))
        self.dense2 = Dense(units=128, activation='relu', use_bias=True, kernel_initializer=he_uniform(seed=1))

        self.output_geohash = TimeDistributed(Dense(geohash_dim, kernel_initializer=he_uniform(), activation='sigmoid'), name='output_geohash')
        self.output_hour = TimeDistributed(Dense(hour_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_hour')
        self.output_geo_context = TimeDistributed(Dense(geographical_context_dim, kernel_initializer=he_uniform(), activation='tanh'), name='output_geo_context')
        self.output_geo_type = TimeDistributed(Dense(geo_type_dim, kernel_initializer=he_uniform(), activation='softmax'), name='output_geo_type')

        self.concat = Concatenate(axis=2)

    def call(self, inputs):
        x = self.repeat_vector(inputs)
        x = self.rnn1(x)
        x = self.rnn2(x)
        x = self.dense1(x)
        x = self.dense2(x)
        
        output_geohash = self.output_geohash(x)
        output_hour = self.output_hour(x)
        output_geo_context = self.output_geo_context(x)
        output_geo_type = self.output_geo_type(x)
        outputs = self.concat([output_geohash, output_hour, output_geo_context, output_geo_type])
        return outputs

class AE(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.encoder = Encoder(500)
        self.decoder = Decoder(timesteps=int(max_points))
        
    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded
    

tfd = tfp.distributions    
class DCGMM(tf.keras.Model):
    def __init__(self):
        super(DCGMM, self).__init__(name="DCGMM")#, dynamic=True)
        self.encoded_size = 500
        self.num_clusters = 72

        self.encoder = Encoder(self.encoded_size)
        self.decoder = Decoder(self.encoded_size, timesteps=int(max_points))

        self.c_mu = tf.Variable(tf.ones([self.num_clusters, self.encoded_size]), name="mu")
        self.log_c_sigma = tf.Variable(tf.ones([self.num_clusters, self.encoded_size]), name="sigma")
        self.prior = tf.constant(tf.ones([self.num_clusters]) * (
                1 / self.num_clusters))  # tf.Variable(tf.ones([self.num_clusters]), name="prior")

    def call(self, inputs, training=True):
        inputs, W = inputs
        z_mu, log_z_sigma = self.encoder(inputs)
        z = tfd.MultivariateNormalDiag(loc=z_mu, scale_diag=tf.math.sqrt(tf.math.exp(log_z_sigma)))
        z_sample = z.sample()

        log_z_sigma_tile = tf.expand_dims(log_z_sigma, axis=-2)
        c = tf.constant([1, self.num_clusters, 1], tf.int32)
        log_z_sigma_tile = tf.tile(log_z_sigma_tile, c)

        z_mu_tile = tf.expand_dims(z_mu, axis=-2)
        c = tf.constant([1, self.num_clusters, 1], tf.int32)
        z_mu_tile = tf.tile(z_mu_tile, c)

        c_sigma = tf.math.exp(self.log_c_sigma)
        p_z_c = tf.stack([tf.math.log(
            tfd.MultivariateNormalDiag(loc=self.c_mu[i, :], scale_diag=tf.math.sqrt(c_sigma[i, :])).prob(
                z_sample) + 1e-30) for i in range(self.num_clusters)], axis=-1)

        prior = self.prior

        p_c_z = tf.math.log(prior + tf.keras.backend.epsilon()) + p_z_c

        norm_s = tf.math.log(1e-30 + tf.math.reduce_sum(tf.math.exp(p_c_z), axis=-1, keepdims=True))
        c = tf.constant([1, self.num_clusters], tf.int32)
        norm = tf.tile(norm_s, c)
        p_c_z = tf.math.exp(p_c_z - norm)

        loss_1a = tf.math.log(c_sigma + tf.keras.backend.epsilon())

        loss_1b = tf.math.exp(log_z_sigma_tile) / (c_sigma + tf.keras.backend.epsilon())

        loss_1c = tf.math.square(z_mu_tile - self.c_mu) / (c_sigma + tf.keras.backend.epsilon())

        loss_1d = self.encoded_size * tf.math.log(tf.keras.backend.constant(2 * np.pi))

        loss_1a = tf.multiply(p_c_z, tf.math.reduce_sum(loss_1a, axis=-1))
        loss_1b = tf.multiply(p_c_z, tf.math.reduce_sum(loss_1b, axis=-1))
        loss_1c = tf.multiply(p_c_z, tf.math.reduce_sum(loss_1c, axis=-1))
        loss_1d = tf.multiply(p_c_z, loss_1d)

        loss_1a = 1 / 2 * tf.reduce_sum(loss_1a, axis=-1)
        loss_1b = 1 / 2 * tf.reduce_sum(loss_1b, axis=-1)
        loss_1c = 1 / 2 * tf.reduce_sum(loss_1c, axis=-1)
        loss_1d = 1 / 2 * tf.reduce_sum(loss_1d, axis=-1)

        loss_2a = - tf.math.reduce_sum(tf.math.xlogy(p_c_z, prior), axis=-1)

        if training:
            ind1, ind2, data = W
            ind1 = tf.reshape(ind1, [-1])
            ind2 = tf.reshape(ind2, [-1])
            data = tf.reshape(data, [-1])
            ind = tf.stack([ind1, ind2], axis=0)
            ind = tf.transpose(ind)
            ind = tf.dtypes.cast(ind, tf.int64)
            W_sparse = tf.SparseTensor(indices=ind, values=data, dense_shape=[len(inputs), len(inputs)])
            W_sparse = tf.sparse.expand_dims(W_sparse, axis=-1)
            W_tile = tf.sparse.concat(-1, [W_sparse] * self.num_clusters)
            mul = W_tile.__mul__(p_c_z)
            sum_j = tf.sparse.reduce_sum(mul, axis=-2)
            loss_2a_constrain = - tf.math.reduce_sum(tf.multiply(p_c_z, sum_j), axis=-1)

            self.add_loss(tf.math.reduce_mean(loss_2a_constrain))
            self.add_metric(loss_2a_constrain, name='loss_2a_c', aggregation="mean")

        loss_2b = tf.math.reduce_sum(tf.math.xlogy(p_c_z, p_c_z), axis=-1)

        loss_3 = - 1 / 2 * tf.reduce_sum(log_z_sigma + 1, axis=-1)

        self.add_loss(tf.math.reduce_mean(loss_1a))
        self.add_loss(tf.math.reduce_mean(loss_1b))
        self.add_loss(tf.math.reduce_mean(loss_1c))
        self.add_loss(tf.math.reduce_mean(loss_1d))
        self.add_loss(tf.math.reduce_mean(loss_2a))
        self.add_loss(tf.math.reduce_mean(loss_2b))
        self.add_loss(tf.math.reduce_mean(loss_3))
        self.add_metric(loss_1a, name='loss_1a', aggregation="mean")
        self.add_metric(loss_1b, name='loss_1b', aggregation="mean")
        self.add_metric(loss_1c, name='loss_1c', aggregation="mean")
        self.add_metric(loss_1d, name='loss_1d', aggregation="mean")
        self.add_metric(loss_2a, name='loss_2a', aggregation="mean")
        self.add_metric(loss_2b, name='loss_2b', aggregation="mean")
        self.add_metric(loss_3, name='loss_3', aggregation="mean")


        dec = self.decoder(z_sample)
        return dec, z_sample, p_z_c, p_c_z


In [146]:
def loss_DCGMM_freemove(inp, x_decoded_mean):
    x = inp
    loss = 80 * tf.keras.losses.MeanSquaredError()(x, x_decoded_mean)
    return loss

def accuracy_metric(inp, p_c_z):
    y = inp
    y_pred = tf.math.argmax(p_c_z, axis=-1)
    return tf.numpy_function(attack.cluster_acc, [y, y_pred], tf.int64)

def csr_matrix_indices(S):
    """
    Return a list of the indices of nonzero entries of a csr_matrix S
    """
    major_dim, minor_dim = S.shape
    minor_indices = S.indices

    major_indices = np.empty(len(minor_indices), dtype=S.indices.dtype)
    scipy.sparse._sparsetools.expandptr(major_dim, S.indptr, major_indices)

    return major_indices, minor_indices

In [118]:
ae = AE()
ae.compile(optimizer='adam', metrics=['accuracy'], loss='mse')

In [119]:
ae.fit(X, X, epochs=500, batch_size=256, shuffle=True)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/

Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 

Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500

In [150]:
dcgmm = DCGMM()

In [153]:
inputs = keras.Input(shape=(int(max_points),geohash_dim+hour_dim+geographical_context_dim+geo_type_dim))

e1 = dcgmm.encoder.dense1(inputs)
e2 = dcgmm.encoder.dense2(e1)
e3 = dcgmm.encoder.rnn1(e2)
e4 = dcgmm.encoder.rnn2(e3)

z = dcgmm.encoder.mu(e4)

d1 = dcgmm.decoder.repeat_vector(z)
d2 = dcgmm.decoder.rnn1(d1)
d3 = dcgmm.decoder.rnn2(d2)
d4 = dcgmm.decoder.dense1(d3)
d5 = dcgmm.decoder.dense2(d4)

d_geohash = dcgmm.decoder.output_geohash(d5)
d_hour = dcgmm.decoder.output_hour(d5)
d_geo_context = dcgmm.decoder.output_geo_context(d5)
d_geo_type = dcgmm.decoder.output_geo_type(d5)
dec = dcgmm.decoder.concat([d_geohash, d_hour, d_geo_context, d_geo_type])

autoencoder = keras.Model(inputs=inputs, outputs=dec)

autoencoder.summary()


Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_13 (InputLayer)          [(None, 36, 80)]     0           []                               
                                                                                                  
 dense_164 (Dense)              (None, 36, 128)      10368       ['input_13[0][0]']               
                                                                                                  
 dense_165 (Dense)              (None, 36, 256)      33024       ['dense_164[1][0]']              
                                                                                                  
 bidirectional_66 (Bidirectiona  (None, 36, 1000)    2274000     ['dense_165[1][0]']              
 l)                                                                                        

In [154]:
autoencoder.compile(optimizer='adam', 
                             metrics=['accuracy'],
                            loss='mse')

autoencoder.fit(X, X,
                epochs=500,
                batch_size=256,
                shuffle=True)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155

Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 

Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 

Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 

Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 

Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x7f9e625712e0>

In [155]:
batch_size = 32
num_epochs = 500



# data loaders
Y = attack.getGroundTruth(true_mapping)
gen = DataGenerator(X, Y, num_constrains=12000, alpha=10000, q=0, batch_size=batch_size, ml=0)
train_gen = gen.gen()

X_test = X[-200:]
Y_test = Y[-200:]

test_gen = DataGenerator(X_test, Y_test, batch_size=batch_size).gen()




optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001, decay=0.00001)
dcgmm.compile(optimizer, loss={"output_1": loss_DCGMM_freemove}, metrics={"output_4": accuracy_metric})


dcgmm.fit(train_gen, validation_data=test_gen, steps_per_epoch=int(len(Y)/batch_size), validation_steps=len(Y_test)//batch_size, epochs=num_epochs)



Number of ml constraints: 1050, cl constraints: 54465.
 
(1294, 1294)
  (0, 14)	-1.0
  (0, 26)	-1.0
  (0, 59)	-1.0
  (0, 235)	-1.0
  (0, 258)	-1.0
  (0, 296)	-1.0
  (0, 344)	-1.0
  (0, 374)	-1.0
  (0, 405)	-1.0
  (0, 465)	-1.0
  (0, 515)	-1.0
  (0, 516)	-1.0
  (0, 651)	-1.0
  (0, 654)	-1.0
  (0, 732)	-1.0
  (0, 738)	-1.0
  (0, 783)	-1.0
  (0, 806)	-1.0
  (0, 819)	-1.0
  (0, 852)	-1.0
  (0, 871)	-1.0
  (0, 890)	-1.0
  (0, 893)	-1.0
  (0, 960)	-1.0
  (0, 1012)	-1.0
  :	:
  (1293, 864)	-1.0
  (1293, 866)	-1.0
  (1293, 887)	-1.0
  (1293, 891)	-1.0
  (1293, 902)	-1.0
  (1293, 953)	-1.0
  (1293, 997)	-1.0
  (1293, 999)	-1.0
  (1293, 1000)	-1.0
  (1293, 1011)	-1.0
  (1293, 1036)	-1.0
  (1293, 1069)	-1.0
  (1293, 1077)	-1.0
  (1293, 1089)	-1.0
  (1293, 1112)	-1.0
  (1293, 1115)	-1.0
  (1293, 1120)	-1.0
  (1293, 1140)	-1.0
  (1293, 1142)	-1.0
  (1293, 1146)	-1.0
  (1293, 1149)	-1.0
  (1293, 1160)	-1.0
  (1293, 1170)	-1.0
  (1293, 1173)	-1.0
  (1293, 1238)	-1.0
[115 929 528 ... 359 728 522]
[-1

Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500


Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500


Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500


Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500


Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500


Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500


Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500


Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
 5/40 [==>...........................] - ETA: 2s - loss: -269.2925 - output_1_loss: 3.2510 - output_4_accuracy_metric: 0.2562 - loss_1a: 702.8470 - loss_1b: 249.3369 - loss_1c: 0.1474 - loss_1d: 229.7353 - loss_2a: 2.1383 - loss_2b: -2.4849 - loss_3: -1827.1348 - loss_2a_c: 360.6791