In [1]:
import torch
import tqdm
import time
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt

from dgcca.dgcca import DGCCA
from dgcca.anomaly_detection import CcaAnomalyDetector
from utils.load_datasets import load_data
from utils.load_models import load_models

ImportError: numpy.core.multiarray failed to import

In [2]:
torch.cuda.is_available()

  return torch._C._cuda_getDeviceCount() > 0


False

In [7]:
### Config

learning_rate = 0.001
batch_size = 128
name = 'ds_test'
load_features = True
load_dgcca = True

### mm-fit network parameters
num_epochs = 25
eval_every = 1
early_stop = 200
checkpoint = 200
weights_path = ''
conv_layers = 3
kernel_size = 11
kernel_stride = 2
f_in = 768 # number of input units in the first FC layer
layers = 3 # number of FC layers
hidden_units = 100 # number of hidden units
dropout = 0.0
output = 'output/' # path to output folder

### mm-fit dataset parameters
data_path = "/mnt/fastdata/aca18hgw/mm-fit"
num_classes = 11
window_stride = 0.2
window_length = 5
sampling_rate = 50
skeleton_sampling_rate = 30
target_sensor_sampling_rate = 50
grouped = 'GGN'
unseen_test_set = False

# set devices
parallel = False
workers = 0
if torch.cuda.is_available():
    device = 'cuda:0'
    if torch.cuda.device_count() > 1:
        print('Using {} GPUs'.format(torch.cuda.device_count()))
        parallel = True
    workers = torch.cuda.device_count() * 4
else:
    device = 'cpu'

if not os.path.exists(output):
    os.makedirs(output)

In [8]:
## define datasets
IDs = ['01']#, '02', '03', '04', '06', '07', '08', '16', '17', '18']
dataset_split = {'train_cca': 0.5, 'train_detector': 0.2, 'val': 0.15, 'test': 0.15}

# All modalities available in MM-Fit
MODALITIES = ['sw_l_acc', 'sw_l_gyr', 'sw_l_hr', 'sw_r_acc', 'sw_r_gyr', 'sw_r_hr', 'sp_l_acc', 'sp_l_gyr', 'sp_l_mag', 'sp_r_acc', 'sp_r_gyr', 'sp_r_mag', 'eb_l_acc', 'eb_l_gyr', 'pose_2d', 'pose_3d']
# We use a subset of all modalities in this demo.
MODALITIES_SUBSET = ['sw_l_acc', 'sw_l_gyr', 'sw_r_acc', 'sw_r_gyr', 'sp_r_acc', 'sp_r_gyr', 'eb_l_acc', 'eb_l_gyr', 'pose_3d']

In [13]:
if load_features:
    datasets = {}
    for split_name in dataset_split.keys():
        with open('output/{}_{}.pkl'.format(name, split_name), 'rb') as f:
            datasets[split_name] = pickle.load(f)
else:
    dataloaders = load_data(MODALITIES_SUBSET, data_path, ids=IDs, splits=[val for val in dataset_split.values()], loader=True, batch_size=batch_size, window_stride=window_stride, window_length=window_length, skeleton_sampling_rate=skeleton_sampling_rate, target_sensor_sampling_rate=target_sensor_sampling_rate, workers=workers)

    models = load_models(modalities=MODALITIES_SUBSET, device=device)

    for modality, model in models.items():
        if parallel:
            models[modality] = nn.DataParallel(model)
        models[modality].eval()

    all_embeddings = []
    for split_name, loader in zip(dataset_split.keys(), dataloaders):
        embeddings = {}
        for modality in MODALITIES_SUBSET:
            embeddings[modality] = []

        length = len(loader)
        with tqdm.tqdm(total=length) as pbar:
            for i, (data, labels, reps) in enumerate(loader):
                for modality in MODALITIES_SUBSET:
                    embeddings[modality].append(models[modality](data[modality]).detach())

                pbar.update(1)
                pbar.set_description('Embedding {} dataset'.format(split_name))

        for modality, data in embeddings.items():
            cat = torch.cat(data).double()
            embeddings[modality] = cat.reshape((cat.shape[0], -1))

        with open('output/{}_{}.pkl'.format(name, split_name), 'wb') as f:
            pickle.dump(embeddings, f, protocol=4)

        all_embeddings.append(embeddings)

    shapes = {}
    for modality in MODALITIES_SUBSET:
        shapes[modality] = all_embeddings[0][modality].shape[1]
        
    datasets = {}
    for name, data in zip(dataset_split.keys(), all_embeddings):
        datasets[name] = data

Dataset splits: [36503, 14600, 10950, 10950]


Embedding train_cca dataset: 100%|██████████| 286/286 [13:47<00:00,  2.19s/it]
Embedding train_detector dataset: 100%|██████████| 115/115 [05:31<00:00,  2.08s/it]
Embedding val dataset: 100%|██████████| 86/86 [04:08<00:00,  2.50s/it]
Embedding test dataset: 100%|██████████| 86/86 [04:09<00:00,  2.54s/it]


In [15]:
for split in datasets.keys():
    del datasets[split]['pose_3d']

In [17]:
hidden_layers = [256, 512]
cca_input_dim = 64
cca_lr = 1e-2
cca_epochs = 3
cca_dim = 10
cca_truncparam = 1000
cca_window_size = 150

layer_sizes = [[modality.shape[1]] for modality in datasets['train_cca'].values()]
for layers in layer_sizes:
    layers.extend(hidden_layers)

In [18]:
dgcca = DGCCA(layer_sizes, cca_input_dim, device=device)
if load_dgcca:
    dgcca.load_checkpoint('output/{}_dgcca.pth'.format(name))
dgcca.train([output for output in datasets['train_cca'].values()], cca_epochs, lr=cca_lr, cca_dim=cca_dim, cca_hidden_dim=cca_truncparam, incremental=False)
if not load_dgcca:
    dgcca.save_checkpoint('output/{}_dgcca.pth'.format(name))

Epoch [1/3], Loss: -65.5386: 100%|██████████| 286/286 [00:53<00:00,  5.43it/s]
Epoch [2/3], Loss: -69.7972: 100%|██████████| 286/286 [00:52<00:00,  5.46it/s]
Epoch [3/3], Loss: -70.0739: 100%|██████████| 286/286 [00:52<00:00,  5.50it/s]
Embedding training set:: 100%|██████████| 286/286 [00:06<00:00, 46.09it/s]


Decomposed data matrix for view 0
Decomposed data matrix for view 1
Decomposed data matrix for view 2
Decomposed data matrix for view 3
Decomposed data matrix for view 4
Decomposed data matrix for view 5
Decomposed data matrix for view 6
Decomposed data matrix for view 7
Decomposed M_tilde / solved for G
Solved for U in view 0
Solved for U in view 1
Solved for U in view 2
Solved for U in view 3
Solved for U in view 4
Solved for U in view 5
Solved for U in view 6
Solved for U in view 7


In [20]:
detector = CcaAnomalyDetector(dgcca)

load_detector = False

if load_detector:
    detector.thresholds = np.load('output/adthresh.npy')
    detector.classifier = detector.threshold_classifier
else:
    fig = detector.train([modality for modality in datasets['train_detector'].values()], stride=10, window=cca_window_size, plot=True)

Generating noise...


AttributeError: module 'numpy.random' has no attribute 'default_rng'

In [54]:
def evaluate_single(data, labels, grace=0):
    pred = detector.detect_anomalies(data, grace=grace)
    #print(pred)
    tp = ((labels == pred) & (pred == True)).sum()
    tn = ((labels == pred) & (pred == False)).sum()
    fp = ((labels != pred) & (pred == True)).sum()
    fn = ((labels != pred) & (pred == False)).sum()
    return (tp, tn, fp, fn)

def reduce_data(data):
    reduced = []
    for modality in MODALITIES_SUBSET:
        reduced.append(models[modality](data[modality]).detach().double())
    reduced = [red.reshape((red.shape[0], -1)) for red in reduced]
    return reduced

def noise_like(data):
    mean = data.mean().item()
    std = data.std().item()
    return torch.tensor(np.random.default_rng().normal(mean, std, data.shape))

def evaluate(loader, n=10, corrupted=1, grace=0):
    results = np.zeros((4))
    with tqdm.tqdm(total=n) as eval_bar:
        for i in range(n):
            starttime = time.time()
            data, _, _ = next(iter(loader))
            data = reduce_data(data)
            data = data[0:-1]
            inftime = time.time() - starttime
            labels = np.array([True]*(len(data)))
            for modality in np.random.default_rng().choice(len(data), size=corrupted, replace=False):
                data[modality] = noise_like(data[modality])
                labels[modality] = False
            #print(labels)
            results += np.array(evaluate_single(data, labels, grace=grace))
            alltime = time.time() - starttime
            eval_bar.update(1)
            eval_bar.set_description('Sample [{}/{}] | Accuracy: {:.1%} | Precision: {:.1%} | Recall: {:.1%}'.format(i+1, n, (results[0] + results[1])/results.sum(), results[0]/(results[0]+results[2]), results[0]/(results[0]+results[3])))
    return results

In [44]:
train_loader, _, test_loader = load_data(MODALITIES_SUBSET, data, train_ids=TRAIN_W_IDs, val_ids = VAL_W_IDs, test_ids=TEST_W_IDs, loader=True, batch_size=cca_window_size, window_stride=window_stride, window_length=window_length, skeleton_sampling_rate=skeleton_sampling_rate, target_sensor_sampling_rate=target_sensor_sampling_rate, workers=workers)

In [55]:
results = evaluate(test_loader, 30, 2, grace=2)
results

Sample [30/30] | Accuracy: 82.5% | Precision: 100.0% | Recall: 76.7%: 100%|██████████| 30/30 [04:38<00:00,  9.28s/it]


array([138.,  60.,   0.,  42.])