In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="3"

repo_path = os.getenv('MMWAVE_PATH')
import sys
sys.path.append(os.path.join(repo_path, 'models'))
from utils import *
from resnet_amca import ResNetAMCA, AM_logits
import tensorflow as tf
import numpy as np
import argparse
import inspect
import shutil
import yaml
import h5py
from sklearn.metrics import confusion_matrix
from sklearn.manifold import TSNE

In [None]:
@tf.function
def test_step(images):
    logits, embds = model(images, training=False)
    return tf.nn.softmax(logits), embds

In [None]:
train_source_days = 1
train_server_days = 0
train_conference_days = 0

checkpoint_path="/home/kjakkala/mmwave/logs/FinalExp/Source/epochs:1000-init_lr:0.001-num_features:128-model_filters:64-activation_fn:selu-batch_size:64-num_classes:10-train_source_days:{}-anneal:4-s:10-m:0.1-ca:0.001-notes:AMCABaseline/checkpoints".format(train_source_days)
#checkpoint_path="/home/kjakkala/mmwave/logs/FinalExp/SourceTargetLabeled/epochs:1000-init_lr:0.001-num_features:128-model_filters:64-activation_fn:selu-batch_size:64-num_classes:10-train_source_days:{}-train_server_days:{}-train_conference_days:{}-anneal:4-s:10-m:0.1-ca:0.001-notes:AMCABaseline/checkpoints".format(train_source_days, train_server_days, train_conference_days)

dataset_path="/home/kjakkala/mmwave/data"
train_source_unlabeled_days = 0

In [None]:
def get_trg_data(filename, src_classes, train_trg_days, test_all=False):
    X_data_trg, y_data_trg, trg_classes = get_h5dataset(filename)

    #split days of data to train and test
    X_train_trg = X_data_trg[y_data_trg[:, 1] < train_trg_days]
    y_train_trg = y_data_trg[y_data_trg[:, 1] < train_trg_days, 0]
    y_train_trg_day = y_data_trg[y_data_trg[:, 1] < train_trg_days, 1]
    y_train_trg = np.array([
        src_classes.index(trg_classes[y_train_trg[i]])
        for i in range(y_train_trg.shape[0])
    ])

    test_days = 0 if test_all else 3
    X_test_trg = X_data_trg[y_data_trg[:, 1] >= test_days]
    y_test_trg = y_data_trg[y_data_trg[:, 1] >= test_days, 0]
    y_test_trg_day = y_data_trg[y_data_trg[:, 1] >= test_days, 1]
    y_test_trg = np.array([
        src_classes.index(trg_classes[y_test_trg[i]])
        for i in range(y_test_trg.shape[0])
    ])

    if (X_train_trg.shape[0] != 0):
        X_train_trg, trg_mean = mean_center(X_train_trg)
        X_train_trg, trg_min, trg_ptp = normalize(X_train_trg)
        y_train_trg = np.eye(len(src_classes))[y_train_trg]

        X_test_trg, _ = mean_center(X_test_trg, trg_mean)
        X_test_trg, _, _ = normalize(X_test_trg, trg_min, trg_ptp)
        y_test_trg = np.eye(len(src_classes))[y_test_trg]
    else:
        X_test_trg, _ = mean_center(X_test_trg)
        X_test_trg, _, _ = normalize(X_test_trg)
        y_test_trg = np.eye(len(src_classes))[y_test_trg]

    X_train_trg = X_train_trg.astype(np.float32)
    y_train_trg = y_train_trg.astype(np.uint8)
    X_test_trg = X_test_trg.astype(np.float32)
    y_test_trg = y_test_trg.astype(np.uint8)

    return X_train_trg, y_train_trg, y_train_trg_day, X_test_trg, y_test_trg, y_test_trg_day

In [None]:
X_data, y_data, classes = get_h5dataset(
    os.path.join(dataset_path, 'source_data.h5'))
X_data, y_data = balance_dataset(X_data,
                                 y_data,
                                 num_days=10,
                                 num_classes=len(classes),
                                 max_samples_per_class=95)

#split days of data to train and test
X_src = X_data[y_data[:, 1] < train_source_days]
y_src = y_data[y_data[:, 1] < train_source_days, 0]
y_src_day = y_data[y_data[:, 1] < train_source_days, 1]
y_src = np.eye(len(classes))[y_src]
X_train_src, X_test_src, y_train_src, y_test_src, y_train_src_day, y_test_src_day = train_test_split(
    X_src, y_src, y_src_day, stratify=y_src, test_size=0.10, random_state=42)

X_trg = X_data[y_data[:, 1] >= train_source_days]
y_trg = y_data[y_data[:, 1] >= train_source_days]
X_train_trg = X_trg[y_trg[:, 1] < train_source_days +
                    train_source_unlabeled_days]
y_train_trg = y_trg[y_trg[:, 1] < train_source_days +
                    train_source_unlabeled_days, 0]
y_train_trg = np.eye(len(classes))[y_train_trg]

X_test_trg = X_data[y_data[:, 1] >= train_source_days +
                    train_source_unlabeled_days]
y_test_trg = y_data[y_data[:, 1] >= train_source_days +
                    train_source_unlabeled_days, 0]
y_test_trg_day = y_data[y_data[:, 1] >= train_source_days +
                    train_source_unlabeled_days, 1]
y_test_trg = np.eye(len(classes))[y_test_trg]

del X_src, y_src, X_trg, y_trg, X_data, y_data

#mean center and normalize dataset
X_train_src, src_mean = mean_center(X_train_src)
X_train_src, src_min, src_ptp = normalize(X_train_src)

X_test_src, _ = mean_center(X_test_src, src_mean)
X_test_src, _, _ = normalize(X_test_src, src_min, src_ptp)

if (X_train_trg.shape[0] != 0):
    X_train_trg, trg_mean = mean_center(X_train_trg)
    X_train_trg, trg_min, trg_ptp = normalize(X_train_trg)

    X_test_trg, _ = mean_center(X_test_trg, trg_mean)
    X_test_trg, _, _ = normalize(X_test_trg, trg_min, trg_ptp)
else:
    X_test_trg, _ = mean_center(X_test_trg, src_mean)
    X_test_trg, _, _ = normalize(X_test_trg, src_min, src_ptp)

X_train_src = X_train_src.astype(np.float32)
y_train_src = y_train_src.astype(np.uint8)
X_test_src = X_test_src.astype(np.float32)
y_test_src = y_test_src.astype(np.uint8)
X_train_trg = X_train_trg.astype(np.float32)
y_train_trg = y_train_trg.astype(np.uint8)
X_test_trg = X_test_trg.astype(np.float32)
y_test_trg = y_test_trg.astype(np.uint8)

X_train_conf, y_train_conf, y_train_conf_day, X_test_conf, y_test_conf, y_test_conf_day = get_trg_data(
    os.path.join(dataset_path, 'target_conf_data.h5'), classes,
    train_conference_days, test_all=True)
X_train_server, y_train_server, y_train_server_day, X_test_server, y_test_server, y_test_server_day = get_trg_data(
    os.path.join(dataset_path, 'target_server_data.h5'), classes,
    train_server_days, test_all=True)
_, _, _, X_data_office, y_data_office, y_data_office_day = get_trg_data(os.path.join(
    dataset_path, 'target_office_data.h5'),
                                                  classes,
                                                  0,
                                                  test_all=True)

print("Final shapes: ")
print(" Train Src:   ", X_train_src.shape, y_train_src.shape, "\n",
      "Test Src:    ", X_test_src.shape, y_test_src.shape, "\n",
      "Train Trg:   ", X_train_trg.shape, y_train_trg.shape, "\n",
      "Test Trg:    ", X_test_trg.shape, y_test_trg.shape)
print(" Train Conf:  ", X_train_conf.shape, y_train_conf.shape, "\n",
      "Test Conf:   ", X_test_conf.shape, y_test_conf.shape, "\n",
      "Train Server:", X_train_server.shape, y_train_server.shape, "\n",
      "Test Server: ", X_test_server.shape, y_test_server.shape, "\n",
      "Test office: ", X_data_office.shape, y_data_office.shape)

In [None]:
model = ResNetAMCA(10,
                   128,
                   num_filters=64,
                   activation='selu',
                   ca_decay=1e-3)


ckpt = tf.train.Checkpoint(model=model)
ckpt_manager = tf.train.CheckpointManager(ckpt,
                                          checkpoint_path,
                                          max_to_keep=5)
ckpt.restore(ckpt_manager.latest_checkpoint)
if not ckpt_manager.latest_checkpoint:
  print("No checkpoint !!!!")

In [None]:
print('Train')

train_embds = []
train_data = []
for label in range(10):
  tmp = []

  for image in X_train_src[np.argmax(y_train_src, axis=-1)==label]:
    _, embd = test_step(tf.expand_dims(image, axis=0)) 
    tmp.extend(embd)
    train_data.extend(embd)
  
  '''
  for image in X_train_server[np.argmax(y_train_server, axis=-1)==label]:
    _, embd = test_step(tf.expand_dims(image, axis=0)) 
    tmp.extend(embd)

  for image in X_train_conf[np.argmax(y_train_conf, axis=-1)==label]:
    _, embd = test_step(tf.expand_dims(image, axis=0)) 
    tmp.extend(embd)
    
  '''

  train_embds.append(np.mean(tmp, axis=0))
train_embds = np.array(train_embds)
print(train_embds.shape)

In [None]:
sys.stdout = open(checkpoint_path.replace("checkpoints", "accuracies"), 'w')

acc = tf.keras.metrics.CategoricalAccuracy()

print('\nServer')
server_embds = [[] for i in range(5)]
server_data = []
for day in range(5):
  acc.reset_states()
  for label in range(10):
    tmp = []
    for image in X_test_server[np.logical_and(y_test_server_day==day, np.argmax(y_test_server, axis=-1)==label)]:
      pred, embd = test_step(tf.expand_dims(image, axis=0)) 
      tmp.append(embd)
      server_data.extend(embd)
      acc(pred, np.eye(10)[label])
    server_embds[day].append(np.mean(tmp, axis=0).squeeze())
  print(float(acc.result()))
server_embds = np.array(server_embds)

print()
for day in range(5):
  tmp = []
  for label in range(10):
    tmp.append(np.linalg.norm(server_embds[day][label]-train_embds[label]))
  print(np.sum(tmp))
  
  
print('\nConf')
conf_embds = [[] for i in range(5)]
conf_data = []
for day in range(5):
  acc.reset_states()
  for label in range(10):
    tmp = []
    for image in X_test_conf[np.logical_and(y_test_conf_day==day, np.argmax(y_test_conf, axis=-1)==label)]:
      pred, embd = test_step(tf.expand_dims(image, axis=0)) 
      tmp.append(embd)
      conf_data.extend(embd)
      acc(pred, np.eye(10)[label])
    conf_embds[day].append(np.mean(tmp, axis=0).squeeze())
  print(float(acc.result()))
conf_embds = np.array(conf_embds)

print()
for day in range(5):
  tmp = []
  for label in range(10):
    tmp.append(np.linalg.norm(conf_embds[day][label]-train_embds[label]))
  print(np.sum(tmp))
  
  
print('\nOffice')
office_embds = [[] for i in range(5)]
office_data = []
for day in range(5):
  acc.reset_states()
  for label in range(10):
    tmp = []
    for image in X_data_office[np.logical_and(y_data_office_day==day, np.argmax(y_data_office, axis=-1)==label)]:
      pred, embd = test_step(tf.expand_dims(image, axis=0)) 
      tmp.append(embd)
      office_data.extend(embd)
      acc(pred, np.eye(10)[label])
    office_embds[day].append(np.mean(tmp, axis=0).squeeze())
  print(float(acc.result()))
office_embds = np.array(office_embds)

print()
for day in range(5):
  tmp = []
  for label in range(10):
    tmp.append(np.linalg.norm(office_embds[day][label]-train_embds[label]))
  print(np.sum(tmp))
  


print('\nSource')
source_embds = [[] for i in range(10)]
source_data = []
for day in range(train_source_days):
  acc.reset_states()
  for label in range(10):
    tmp = []
    for image in X_test_src[np.logical_and(y_test_src_day==day, np.argmax(y_test_src, axis=-1)==label)]:
      pred, embd = test_step(tf.expand_dims(image, axis=0)) 
      tmp.append(embd)
      source_data.extend(embd)
      acc(pred, np.eye(10)[label])
    source_embds[day].append(np.mean(tmp, axis=0).squeeze())
  print(float(acc.result()))
for day in range(train_source_days, 10):
  acc.reset_states()
  for label in range(10):
    tmp = []
    for image in X_test_trg[np.logical_and(y_test_trg_day==day, np.argmax(y_test_trg, axis=-1)==label)]:
      pred, embd = test_step(tf.expand_dims(image, axis=0)) 
      tmp.append(embd)
      source_data.extend(embd)
      acc(pred, np.eye(10)[label])
    source_embds[day].append(np.mean(tmp, axis=0).squeeze())
  print(float(acc.result()))
source_embds = np.array(source_embds)

print()
for day in range(10):
  tmp = []
  for label in range(10):
    tmp.append(np.linalg.norm(source_embds[day][label]-train_embds[label]))
  print(np.sum(tmp))

In [None]:
X_embedded = TSNE(n_components=2).fit_transform(np.concatenate([train_data, server_data, conf_data, office_data, source_data], axis=0))
np.save(checkpoint_path.replace("checkpoints", "t-SNE"), X_embedded)