In [None]:
import torch
import torch.nn as nn
from torch import optim
import torchvision.datasets as dset
import torchvision.transforms as transforms

from utils import device_setting, seed_torch
from data_manager import AnomalyDataManager
from model import ResNet50MetricModel, MLP
from metric_layer import ArcMarginProduct#, MetricModel
from resnet import ResNet18
# from trainer import Trainer

import matplotlib.pyplot as plt

from sklearn.manifold import TSNE
import numpy as np
import matplotlib

import copy

In [None]:
def calc_cosine_similarity(np_array_a, np_array_b):
    eps = 1e-9
    dis = np_array_a @ np_array_b.T
    norm_a = (np_array_a * np_array_a).sum(1, keepdims=True) ** (0.5)
    norm_b = (np_array_b * np_array_b).sum(1, keepdims=True) ** (0.5)
    similarity_matrix = dis / (norm_a+eps) / (norm_b.T+eps)

    return similarity_matrix

In [None]:
def one_array_cosine_similarity(np_array_a, np_array_b):
    eps = 1e-9
    dis = np_array_a @ np_array_b.T
    print(dis)
    norm_a = (np_array_a * np_array_a).sum() ** (0.5)
    norm_b = (np_array_b * np_array_b).sum() ** (0.5)
    similarity_matrix = dis / (norm_a+eps) / (norm_b.T+eps)

    return similarity_matrix

In [None]:
class MetricModel(nn.Module):
    def __init__(self, model, metric_layer, num_classes):
        super(MetricModel, self).__init__()
        self.model = model
        self.metric_layer = metric_layer
        self.num_classes = num_classes

    def forward(self, input, label):
        feature = self.model(input)
        output = self.metric_layer(feature, label)

        return feature, output

    def set_center_of_classes(self, train_dataloader, feature_dim, device):
        self.cog_list = [np.array([0]*feature_dim) for _ in range(self.num_classes)]

        num_data_list = [0]*self.num_classes
        for input, label in train_dataloader:
            input = input.to(device)
            label = label.to(device)
            feature, output = self.forward(input, label)
            _, pred_label = torch.max(output, 1)
            feature_cpu = feature.cpu().detach().numpy()
            for i, lbl in enumerate(pred_label.cpu().detach().numpy()):
                self.cog_list[lbl] = self.cog_list[lbl] + feature_cpu[i]

            for lb in label.cpu().detach().numpy():
                num_data_list[lb] = num_data_list[lb] + 1
        
        print(num_data_list)
        self.cog_sum = copy.deepcopy(self.cog_list)
        # print(self.cog_list[-1])
        for i in range(self.num_classes):
            self.cog_list[i] = self.cog_list[i] / num_data_list[i]

        pass

    def inference(self, input):
    #    r"""
    #     attribute: calc cosine similarity and return prob_classe
    #         prob_classes: torch.tensor (bs, class)  
    #                       (x, y) mean mdoel predicts the probability of data x belonging y class
    #    """

        feature = self.model(input)
        prob_classes = calc_cosine_similarity(feature.cpu().detach().numpy(), np.array(self.cog_list))

        return torch.from_numpy(prob_classes).clone()

In [None]:
data_dir = '../data'
dataset = 'kdd'
batch_size = 64
lr = 0.001
momentum = 0.9
max_epoch = 200
criterion = nn.CrossEntropyLoss()
feature_dim = 25
gpu = 0
device = device_setting(gpu=gpu)
data_manager = AnomalyDataManager(dataset=dataset, data_dir=data_dir, trans=None, anomaly_label=9, data_num=2000)

In [None]:
num_classes = data_manager.get_num_classes()
dataloader_dict = data_manager.build_dataloader(batch_size)

In [None]:
seed_torch(0)
model = MLP(in_features=data_manager.input_dim, out_features=feature_dim)
metric_layer = ArcMarginProduct(in_features=feature_dim, out_features=num_classes, s=30.0, m=0.50, easy_margin=False)
optimizer = optim.Adam(list(model.parameters()) + list(metric_layer.parameters()), lr=lr)
criterion = nn.CrossEntropyLoss()

load_epoch = 20
model_path = '../models/kdd/model_epoch_{}.pth'.format(load_epoch)
model.load_state_dict(torch.load(model_path))

model = model.to(device)
metric_layer = metric_layer.to(device)
metric_model = MetricModel(model, metric_layer, num_classes)

In [None]:
metric_model.set_center_of_classes(dataloader_dict['train'], feature_dim, device)


In [None]:
data_num = [67343, 41214, 890, 3599, 2931, 892, 1493, 3633, 2646, 201, 956, 53, 8, 7, 10, 30, 11, 20, 4, 18, 9, 2, 3]

In [None]:
i = 0
metric_model.cog_sum[i] / data_num[i]

In [None]:
np.array(metric_model.cog_list).mean(axis=1)

In [None]:
tsne = TSNE(n_components=2, random_state = 0, perplexity = 30, n_iter = 1000)

X_embedded = tsne.fit_transform(metric_model.cog_list)

In [None]:
# plt.figure(figsize = (30, 30))
# # lbl = dataset_dict['train'][:][1][i]
# plt.scatter(X_embedded[:, 0], 
#             X_embedded[:, 1],
#             c=range(len(X_embedded)), 
#             cmap=plt.cm.jet, 
#             edgecolor='none', 
#             # label = lbl,
#             # alpha=0.7,
#             s=100)

# plt.colorbar(aspect=40, pad=0.08, orientation='vertical')

In [None]:

# eval model
acc = 0
metric_model.eval()
pred_hist = [0]*num_classes

for data, label in dataloader_dict['train']: 
    data = data.to(device)
    # label = label.to(device)
    probs = metric_model.inference(data)
    _, pred_label = torch.max(probs, 1)
    for l in pred_label.cpu().detach().numpy():
        pred_hist[l] += 1
    acc += torch.sum(pred_label == label).item()

acc = acc / len(dataloader_dict['train'].dataset)
# plot, print result
print('Train accuracy: {:4f}'.format(acc))


In [None]:

metric_model.eval()
normal_acc = 0
anomaly_detection_acc = 0
anomly_label_num = 0
for data, label in dataloader_dict['test']:
    data = data.to(device)
    # 学習にない異常データ（ラベル22~）はその他ラベル-1とする
    # label_mask = label > 21
    # label[label_mask] = -1
    # label = label.to(device)
    probs = metric_model.inference(data)
    # print(model(data[1]))
    # print(metric_model.cog_list[1])
    # print(probs[1])
    pred_values, pred_label = torch.max(probs, 1)
    # print(pred_values)
    # print(pred_label)
    # print(label)
    normal_acc += torch.sum(pred_label == label).item()
    # break
    anomaly_mask = pred_values < 0.9
    # print(pred_label)
    anomly_label_num = anomly_label_num + label_mask.sum()
    pred_label[anomaly_mask] = -1
    anomaly_detection_acc += torch.sum(pred_label == label).item()

normal_acc = normal_acc / len(dataloader_dict['test'].dataset)
anomaly_detection_acc = anomaly_detection_acc / len(dataloader_dict['test'].dataset)
print('Test Normal accuracy: {:4f}'.format(normal_acc))
print('Test Anomaly Detection accuracy: {:4f}'.format(anomaly_detection_acc))

In [None]:
# 異常か正常かのみの判断
acc = 0
metric_model.eval()
pred_hist = [0]*num_classes

for data, label in dataloader_dict['train']: 
    data = data.to(device)
    # label = label.to(device)
    probs = metric_model.inference(data)
    _, pred_label = torch.max(probs, 1)
    for i in range(len(pred_label)):
        if pred_label[i] != 0:
            # print(pred_label[i])
            pred_label[i] = 1
        if label[i] != 0:
            label[i] = 1
    acc += torch.sum(pred_label == label).item()

acc = acc / len(dataloader_dict['train'].dataset)
# plot, print result
print('Train accuracy: {:4f}'.format(acc))

In [None]:
# 異常か正常かのみの判断
acc = 0
metric_model.eval()
pred_hist = [0]*num_classes

for data, label in dataloader_dict['test']: 
    data = data.to(device)
    # label = label.to(device)
    probs = metric_model.inference(data)
    _, pred_label = torch.max(probs, 1)
    for i in range(len(pred_label)):
        if pred_label[i] != 0:
            # print(pred_label[i])
            pred_label[i] = 1
        if label[i] != 0:
            label[i] = 1
    acc += torch.sum(pred_label == label).item()

acc = acc / len(dataloader_dict['test'].dataset)
# plot, print result
print('Train accuracy: {:4f}'.format(acc))