In [1]:
import numpy as np
import os
import pickle

In [2]:
import gymnasium as gym
import matplotlib.pyplot as plt
from scripts.buffer_utils import LoadSequenceDataset
%matplotlib inline

[31mML_LOGGER_USER is not set. This is required for online usage.[0m


In [3]:
from scripts.buffer_utils import OptimalBuffer

In [4]:
from sklearn.cluster import KMeans

In [5]:
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.metrics import mutual_info_score, adjusted_rand_score

In [6]:
import statistics
from statistics import mode

In [43]:
import random

In [7]:
env_name = 'PointMaze_Medium-v3'
data_path = "/common/users/cc1547/dataset/rainbow/stitching_maze/"

if "AntMaze_UMaze-v4" in env_name:
    dataset_name = "antmaze-umaze-v0"
elif "AntMaze_Medium-v4" in env_name:
    dataset_name = "antmaze-medium-v0"
elif "AntMaze_Large-v4" in env_name:
    dataset_name = "antmaze-large-v0"
elif "PointMaze_UMaze-v3" in env_name:
    dataset_name = "pointmaze-umaze-v1"
elif "PointMaze_Medium-v3" in env_name:
    dataset_name = "pointmaze-medium-v1"
elif "PointMaze_Large-v3" in env_name:
    dataset_name = "pointmaze-large-v1"

data_file = os.path.join(data_path, dataset_name + ".pkl")

In [8]:
with open(data_file, 'rb') as f:
    data = pickle.load(f)

In [9]:
data['observations']['observation'].shape

(1000000, 4)

In [11]:
env_dataset = LoadSequenceDataset(env_name = env_name, data_file=data_file)


number of offline data is 1000000

False


In [12]:
horizon = 80
data_buffer  = OptimalBuffer(horizon=horizon)
for i in range(env_dataset.num_traj): 
    info = env_dataset.get_full_info_traj(i)  
    data_buffer.insert_traj(info) 

In [13]:
training_split = 0.9

In [14]:
data_num = len(data_buffer.info)
data_idx = np.arange(data_num)
np.random.shuffle(data_idx)

In [15]:
train_num = int(data_num * training_split)
train_idx = data_idx[:train_num]
test_idx = data_idx[train_num:]

### Using whole sequence 

In [16]:
train_seq_x = []
train_seq_y = []
for idx in train_idx:
    train_seq_x.append(data_buffer.info[idx]['obs'].reshape(-1))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

test_seq_x = []
test_seq_y = []
for idx in test_idx:
    test_seq_x.append(data_buffer.info[idx]['obs'].reshape(-1))
    test_seq_y.append(data_buffer.info[idx]['region_idx'][0])

In [17]:
train_seq_x = np.array(train_seq_x)
train_seq_y = np.array(train_seq_y)

test_seq_x = np.array(test_seq_x)
test_seq_y = np.array(test_seq_y)

In [18]:
kmeans = KMeans(n_clusters=5, n_init='auto').fit(train_seq_x)

In [19]:
silhouette = silhouette_score(train_seq_x, kmeans.labels_)
db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
mi = mutual_info_score(train_seq_y, kmeans.labels_)

In [20]:
print(f"Silhouette Score: {silhouette:.2f}")
print(f"Davies-Bouldin Index: {db_index:.2f}")
print(f"Calinski-Harabasz Index: {ch_index:.2f}")
print(f"Adjusted Rand Index: {ari:.2f}")
print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.26
Davies-Bouldin Index: 1.53
Calinski-Harabasz Index: 38819.70
Adjusted Rand Index: 0.24
Mutual Information (MI): 0.54


In [61]:
n_clusters = [3, 5, 10, 20,40]

train_seq_x = []
train_seq_y = []
for idx in range(len(data_buffer.info)):

    train_seq_x.append(data_buffer.info[idx]['obs'].reshape(-1))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

for n in n_clusters:
    kmeans = KMeans(n_clusters=n, n_init='auto').fit(train_seq_x)
    silhouette = silhouette_score(train_seq_x, kmeans.labels_)
    db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
    ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
    ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
    mi = mutual_info_score(train_seq_y, kmeans.labels_)
    
    print(" ============= clustering with whole sequence ============ ")

    print(f"Silhouette Score: {silhouette:.2f}")
    print(f"Davies-Bouldin Index: {db_index:.2f}")
    print(f"Calinski-Harabasz Index: {ch_index:.2f}")
    print(f"Adjusted Rand Index: {ari:.2f}")
    print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.22
Davies-Bouldin Index: 1.61
Calinski-Harabasz Index: 44250.58
Adjusted Rand Index: 0.16
Mutual Information (MI): 0.39
Silhouette Score: 0.25
Davies-Bouldin Index: 1.51
Calinski-Harabasz Index: 43750.91
Adjusted Rand Index: 0.25
Mutual Information (MI): 0.58
Silhouette Score: 0.34
Davies-Bouldin Index: 1.29
Calinski-Harabasz Index: 44322.99
Adjusted Rand Index: 0.32
Mutual Information (MI): 0.97
Silhouette Score: 0.41
Davies-Bouldin Index: 1.04
Calinski-Harabasz Index: 46741.22
Adjusted Rand Index: 0.28
Mutual Information (MI): 1.33
Silhouette Score: 0.41
Davies-Bouldin Index: 0.95
Calinski-Harabasz Index: 49085.61
Adjusted Rand Index: 0.18
Mutual Information (MI): 1.47


### Using the two end states

In [21]:
train_seq_x = []
train_seq_y = []
for idx in train_idx:
    end_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][1]])
    train_seq_x.append(end_states.reshape(-1))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

test_seq_x = []
test_seq_y = []
for idx in test_idx:
    end_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][1]])
    test_seq_x.append(end_states.reshape(-1))
    test_seq_y.append(data_buffer.info[idx]['region_idx'][0])

In [22]:
train_seq_x = np.array(train_seq_x)
train_seq_y = np.array(train_seq_y)

test_seq_x = np.array(test_seq_x)
test_seq_y = np.array(test_seq_y)

In [23]:
kmeans = KMeans(n_clusters=5, n_init='auto').fit(train_seq_x)

In [24]:
silhouette = silhouette_score(train_seq_x, kmeans.labels_)
db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
mi = mutual_info_score(train_seq_y, kmeans.labels_)

print(f"Silhouette Score: {silhouette:.2f}")
print(f"Davies-Bouldin Index: {db_index:.2f}")
print(f"Calinski-Harabasz Index: {ch_index:.2f}")
print(f"Adjusted Rand Index: {ari:.2f}")
print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.32
Davies-Bouldin Index: 1.17
Calinski-Harabasz Index: 66713.66
Adjusted Rand Index: 0.20
Mutual Information (MI): 0.49


In [62]:
n_clusters = [3, 5, 10, 20,40]

train_seq_x = []
train_seq_y = []
for idx in range(len(data_buffer.info)):

    end_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][-1]])
    train_seq_x.append(end_states.reshape(-1))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

for n in n_clusters:
    kmeans = KMeans(n_clusters=n, n_init='auto').fit(train_seq_x)
    silhouette = silhouette_score(train_seq_x, kmeans.labels_)
    db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
    ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
    ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
    mi = mutual_info_score(train_seq_y, kmeans.labels_)
    
    print(" ============= clustering with two end states ============ ")

    print(f"Silhouette Score: {silhouette:.2f}")
    print(f"Davies-Bouldin Index: {db_index:.2f}")
    print(f"Calinski-Harabasz Index: {ch_index:.2f}")
    print(f"Adjusted Rand Index: {ari:.2f}")
    print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.26
Davies-Bouldin Index: 1.51
Calinski-Harabasz Index: 59271.35
Adjusted Rand Index: 0.09
Mutual Information (MI): 0.17
Silhouette Score: 0.28
Davies-Bouldin Index: 1.36
Calinski-Harabasz Index: 54989.31
Adjusted Rand Index: 0.16
Mutual Information (MI): 0.40
Silhouette Score: 0.33
Davies-Bouldin Index: 1.20
Calinski-Harabasz Index: 51240.55
Adjusted Rand Index: 0.32
Mutual Information (MI): 0.99
Silhouette Score: 0.36
Davies-Bouldin Index: 1.08
Calinski-Harabasz Index: 46366.14
Adjusted Rand Index: 0.24
Mutual Information (MI): 1.25
Silhouette Score: 0.37
Davies-Bouldin Index: 1.07
Calinski-Harabasz Index: 41512.60
Adjusted Rand Index: 0.18
Mutual Information (MI): 1.40


### Using mean of sequence

In [25]:
train_seq_x = []
train_seq_y = []
for idx in train_idx:
    mean_states = data_buffer.info[idx]['obs'].mean(axis=0)
    train_seq_x.append(mean_states)
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

test_seq_x = []
test_seq_y = []
for idx in test_idx:
    mean_states = data_buffer.info[idx]['obs'].mean(axis=0)
    test_seq_x.append(mean_states)
    test_seq_y.append(data_buffer.info[idx]['region_idx'][0])

In [26]:
train_seq_x = np.array(train_seq_x)
train_seq_y = np.array(train_seq_y)

test_seq_x = np.array(test_seq_x)
test_seq_y = np.array(test_seq_y)

In [27]:
kmeans = KMeans(n_clusters=5, n_init='auto').fit(train_seq_x)

In [28]:
silhouette = silhouette_score(train_seq_x, kmeans.labels_)
db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
mi = mutual_info_score(train_seq_y, kmeans.labels_)

print(f"Silhouette Score: {silhouette:.2f}")
print(f"Davies-Bouldin Index: {db_index:.2f}")
print(f"Calinski-Harabasz Index: {ch_index:.2f}")
print(f"Adjusted Rand Index: {ari:.2f}")
print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.41
Davies-Bouldin Index: 1.01
Calinski-Harabasz Index: 80256.69
Adjusted Rand Index: 0.34
Mutual Information (MI): 0.75


In [63]:
n_clusters = [3, 5, 10, 20,40]

train_seq_x = []
train_seq_y = []
for idx in range(len(data_buffer.info)):

    mean_states = data_buffer.info[idx]['obs'].mean(axis=0)
    train_seq_x.append(mean_states)
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

for n in n_clusters:
    kmeans = KMeans(n_clusters=n, n_init='auto').fit(train_seq_x)
    silhouette = silhouette_score(train_seq_x, kmeans.labels_)
    db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
    ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
    ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
    mi = mutual_info_score(train_seq_y, kmeans.labels_)
    
    print(" ============= clustering with sequence mean ============ ")

    print(f"Silhouette Score: {silhouette:.2f}")
    print(f"Davies-Bouldin Index: {db_index:.2f}")
    print(f"Calinski-Harabasz Index: {ch_index:.2f}")
    print(f"Adjusted Rand Index: {ari:.2f}")
    print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.31
Davies-Bouldin Index: 1.42
Calinski-Harabasz Index: 72173.56
Adjusted Rand Index: 0.09
Mutual Information (MI): 0.21
Silhouette Score: 0.40
Davies-Bouldin Index: 1.08
Calinski-Harabasz Index: 82868.36
Adjusted Rand Index: 0.33
Mutual Information (MI): 0.76
Silhouette Score: 0.53
Davies-Bouldin Index: 0.76
Calinski-Harabasz Index: 125073.14
Adjusted Rand Index: 0.47
Mutual Information (MI): 1.31
Silhouette Score: 0.61
Davies-Bouldin Index: 0.62
Calinski-Harabasz Index: 232399.08
Adjusted Rand Index: 0.34
Mutual Information (MI): 1.48
Silhouette Score: 0.52
Davies-Bouldin Index: 0.69
Calinski-Harabasz Index: 331424.93
Adjusted Rand Index: 0.20
Mutual Information (MI): 1.48


### Using Mean of two end states

In [29]:
train_seq_x = []
train_seq_y = []
for idx in train_idx:
    end_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][1]])
    train_seq_x.append(end_states.mean(axis=0))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

test_seq_x = []
test_seq_y = []
for idx in test_idx:
    end_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][1]])
    test_seq_x.append(end_states.mean(axis=0))
    test_seq_y.append(data_buffer.info[idx]['region_idx'][0])

In [30]:
train_seq_x = np.array(train_seq_x)
train_seq_y = np.array(train_seq_y)

test_seq_x = np.array(test_seq_x)
test_seq_y = np.array(test_seq_y)

In [31]:
kmeans = KMeans(n_clusters=5, n_init='auto').fit(train_seq_x)

In [32]:
silhouette = silhouette_score(train_seq_x, kmeans.labels_)
db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
mi = mutual_info_score(train_seq_y, kmeans.labels_)

print(f"Silhouette Score: {silhouette:.2f}")
print(f"Davies-Bouldin Index: {db_index:.2f}")
print(f"Calinski-Harabasz Index: {ch_index:.2f}")
print(f"Adjusted Rand Index: {ari:.2f}")
print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.32
Davies-Bouldin Index: 1.17
Calinski-Harabasz Index: 66894.40
Adjusted Rand Index: 0.20
Mutual Information (MI): 0.49


In [64]:
n_clusters = [3, 5, 10, 20,40]

train_seq_x = []
train_seq_y = []
for idx in range(len(data_buffer.info)):

    mean_states = np.array([data_buffer.info[idx]['obs'][0], data_buffer.info[idx]['obs'][-1]])
    train_seq_x.append(mean_states.mean(axis=0))
    train_seq_y.append(data_buffer.info[idx]['region_idx'][0])

for n in n_clusters:
    kmeans = KMeans(n_clusters=n, n_init='auto').fit(train_seq_x)
    silhouette = silhouette_score(train_seq_x, kmeans.labels_)
    db_index = davies_bouldin_score(train_seq_x, kmeans.labels_)
    ch_index = calinski_harabasz_score(train_seq_x, kmeans.labels_)
    ari = adjusted_rand_score(train_seq_y, kmeans.labels_)
    mi = mutual_info_score(train_seq_y, kmeans.labels_)
    
    print(" ============= clustering with center of two end states ============ ")

    print(f"Silhouette Score: {silhouette:.2f}")
    print(f"Davies-Bouldin Index: {db_index:.2f}")
    print(f"Calinski-Harabasz Index: {ch_index:.2f}")
    print(f"Adjusted Rand Index: {ari:.2f}")
    print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.28
Davies-Bouldin Index: 1.42
Calinski-Harabasz Index: 74891.67
Adjusted Rand Index: 0.13
Mutual Information (MI): 0.28
Silhouette Score: 0.29
Davies-Bouldin Index: 1.28
Calinski-Harabasz Index: 69712.94
Adjusted Rand Index: 0.24
Mutual Information (MI): 0.56
Silhouette Score: 0.38
Davies-Bouldin Index: 0.97
Calinski-Harabasz Index: 71567.66
Adjusted Rand Index: 0.40
Mutual Information (MI): 1.23
Silhouette Score: 0.41
Davies-Bouldin Index: 0.92
Calinski-Harabasz Index: 81223.76
Adjusted Rand Index: 0.28
Mutual Information (MI): 1.42
Silhouette Score: 0.37
Davies-Bouldin Index: 0.97
Calinski-Harabasz Index: 77925.70
Adjusted Rand Index: 0.15
Mutual Information (MI): 1.45


### Using vote from each state

In [33]:
train_seq_x = []
train_seq_y = []
for idx in train_idx:
    train_seq_x.append(data_buffer.info[idx]['obs'])
    train_seq_y.append(data_buffer.info[idx]['region_idx'])

test_seq_x = []
test_seq_y = []
for idx in test_idx:
    test_seq_x.append(data_buffer.info[idx]['obs'])
    test_seq_y.append(data_buffer.info[idx]['region_idx'])

In [34]:
train_seq_x = np.array(train_seq_x)
N, H, C = train_seq_x.shape
train_seq_x = np.array(train_seq_x).reshape(-1, 4)
train_seq_y = np.array(train_seq_y).reshape(-1)

In [35]:
kmeans = KMeans(n_clusters=5, n_init='auto').fit(train_seq_x)

In [36]:
train_pred_y = kmeans.labels_

In [37]:
train_pred_y = train_pred_y.reshape(N, H)

In [38]:
vote_y = []
for i in range(N):
    vote_ = mode(train_pred_y[i])
    vote_y.append(np.ones(H, dtype=np.int32) * vote_)
vote_y = np.array(vote_y).reshape(-1)

In [57]:
select_data_idx = np.array(random.sample(range(N*H), N))

In [58]:
select_x = train_seq_x[select_data_idx]
select_y = vote_y[select_data_idx]
select_gt_y = train_seq_y[select_data_idx]

In [59]:
silhouette = silhouette_score(select_x, select_y)
db_index = davies_bouldin_score(select_x, select_y)
ch_index = calinski_harabasz_score(select_x, select_y)
ari = adjusted_rand_score(select_gt_y, select_y)
mi = mutual_info_score(select_gt_y, select_y)

print(f"Silhouette Score: {silhouette:.2f}")
print(f"Davies-Bouldin Index: {db_index:.2f}")
print(f"Calinski-Harabasz Index: {ch_index:.2f}")
print(f"Adjusted Rand Index: {ari:.2f}")
print(f"Mutual Information (MI): {mi:.2f}")

Silhouette Score: 0.10
Davies-Bouldin Index: 2.09
Calinski-Harabasz Index: 21991.72
Adjusted Rand Index: 0.15
Mutual Information (MI): 0.39


In [68]:
n_clusters = [3, 5, 10, 20,40]

train_seq_x = []
train_seq_y = []
for idx in range(len(data_buffer.info)):

    train_seq_x.append(data_buffer.info[idx]['obs'])
    train_seq_y.append(data_buffer.info[idx]['region_idx'])
    
train_seq_x = np.array(train_seq_x)
N, H, C = train_seq_x.shape
train_seq_x = train_seq_x.reshape(N*H, C)
train_seq_y = np.array(train_seq_y).reshape(-1)

for n in n_clusters:
    kmeans = KMeans(n_clusters=n, n_init='auto').fit(train_seq_x)

    train_pred_y = kmeans.labels_
    train_pred_y = train_pred_y.reshape(N, H)
    
    vote_y = []
    for i in range(N):
        vote_ = mode(train_pred_y[i])
        vote_y.append(np.ones(H, dtype=np.int32) * vote_)
    vote_y = np.array(vote_y).reshape(-1)

    select_data_idx = np.array(random.sample(range(N*H), N))
    select_x = train_seq_x[select_data_idx]
    select_y = vote_y[select_data_idx]
    select_gt_y = train_seq_y[select_data_idx]
    
    silhouette = silhouette_score(select_x, select_y)
    db_index = davies_bouldin_score(select_x, select_y)
    ch_index = calinski_harabasz_score(select_x, select_y)
    ari = adjusted_rand_score(select_gt_y, select_y)
    mi = mutual_info_score(select_gt_y, select_y)
    
    print(" ============= clustering with voting from each state ============ ")

    print(f"Silhouette Score: {silhouette:.2f}")
    print(f"Davies-Bouldin Index: {db_index:.2f}")
    print(f"Calinski-Harabasz Index: {ch_index:.2f}")
    print(f"Adjusted Rand Index: {ari:.2f}")
    print(f"Mutual Information (MI): {mi:.2f}")


Silhouette Score: 0.12
Davies-Bouldin Index: 2.08
Calinski-Harabasz Index: 28819.95
Adjusted Rand Index: 0.05
Mutual Information (MI): 0.12
Silhouette Score: 0.09
Davies-Bouldin Index: 2.28
Calinski-Harabasz Index: 25824.45
Adjusted Rand Index: 0.15
Mutual Information (MI): 0.40
Silhouette Score: 0.10
Davies-Bouldin Index: 1.78
Calinski-Harabasz Index: 20748.71
Adjusted Rand Index: 0.31
Mutual Information (MI): 1.00
Silhouette Score: -0.04
Davies-Bouldin Index: 5.01
Calinski-Harabasz Index: 11921.40
Adjusted Rand Index: 0.27
Mutual Information (MI): 1.19
Silhouette Score: -0.07
Davies-Bouldin Index: 12.15
Calinski-Harabasz Index: 7387.94
Adjusted Rand Index: 0.23
Mutual Information (MI): 1.42


In [67]:
select_x.shape

(167330, 4)