# Knowledge distillation

## 1) UC1: Anomaly Detection


In [8]:
import os
os.chdir('/home/benfenati/code/tle-supervised/')

In [9]:
import torch
import pandas as pd
import numpy as np

from Algorithms.models_audio_mae_evaluate import audioMae_vit_base_evaluate

import torch.nn as nn
import torch.optim as optim

from utils import *
from util.engine_pretrain import evaluate
from plot_anomaly import compute_threshold_accuracy

import datetime

from Datasets.AnomalyDetection_SS335.get_dataset import get_dataset as get_dataset_ss335

In [16]:
device = "cuda:2"
dir = "/home/benfenati/code/Datasets/SHM/AnomalyDetection_SS335/"
window_size = 1190
lr = 0.25e-2
total_epochs = 10

### Teacher model

In [11]:
device = torch.device(device)
teacher = audioMae_vit_base_evaluate(norm_pix_loss=False)
teacher.to(device)
# checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/checkpoints/checkpoint-pretrain_all-200.pth", map_location='cpu')
checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/Results/checkpoints/  .pth", map_location='cpu')
checkpoint_model = checkpoint['model']
msg = teacher.load_state_dict(checkpoint_model, strict=False)

params, size = get_model_info(teacher)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 36116189; Size = 137.772


### Student model

In [17]:
embed_dim = 384 # 384, 768(original)
decoder_embed_dim = 256 # 512(original)
student = audioMae_vit_base_evaluate(embed_dim=embed_dim, decoder_embed_dim=decoder_embed_dim, norm_pix_loss=False)
student.to(device)

params, size = get_model_info(student)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 9179101; Size = 35.015


### Training

In [18]:
starting_date = datetime.date(2019,5,22) 
num_days = 7
print("Creating Training Dataset")
dataset = get_dataset_ss335(dir, starting_date, num_days, sensor = 'S6.1.3', time_frequency = "frequency", windowLength = window_size)
sampler_train = torch.utils.data.RandomSampler(dataset)
data_loader_train = torch.utils.data.DataLoader(
    dataset, sampler=sampler_train,
    batch_size=64,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
)
device = torch.device(device)
torch.manual_seed(0)
np.random.seed(0)

optimizer = optim.Adam(student.parameters(), lr=0.001, weight_decay=1e-6)
loss_fn_1 = nn.L1Loss()
loss_fn_2 = nn.L1Loss()
loss_fn_3 = nn.MSELoss()

teacher.eval()

b = 0.5

best_loss = 100000000
best_epoch = 0

for epoch in range(total_epochs):

    student.train()
    train_loss = 0
    counter = 0
    for samples, targets in data_loader_train:
        samples = samples.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
    
        
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            loss_student, pred_student, _ = student(samples, mask_ratio=0.8)

        with torch.no_grad() and torch.cuda.amp.autocast():
            teacher.eval()
            loss_teacher, pred_teacher, _ = teacher(samples, mask_ratio=0.8)
        
        
        loss_1 = loss_student
        loss_2 = loss_fn_3(pred_student, pred_teacher)
        
        loss = b*loss_1 + (1-b)*loss_2

        loss.backward()
        optimizer.step()

        train_loss += loss_student.item()
        # train_loss += loss.item()
        counter +=1

Creating Training Dataset
Loading AnomalyDetection dataset 

### Testing

In [19]:
model_to_evaluate = student
who = "student"

### Creating Testing Dataset for Normal Data
starting_date = datetime.date(2019,5,10)
num_days = 4
print("Creating Testing Dataset -- Normal")
dataset = get_dataset_ss335(dir, starting_date, num_days, sensor = 'S6.1.3', time_frequency = "frequency", windowLength = window_size)
data_loader_test_normal = torch.utils.data.DataLoader(
    dataset, shuffle=False,
    batch_size=1,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
)
losses_normal = evaluate(data_loader_test_normal, model_to_evaluate, device)
df = pd.DataFrame.from_dict(losses_normal)
df.to_csv(f'Results/masked_{window_size}samples_normal_{who}.csv', index = False, header = True)
    
### Creating Testing Dataset for Anomaly Data
starting_date = datetime.date(2019,4,17) 
num_days = 4
print("Creating Testing Dataset -- Anomaly")
dataset = get_dataset_ss335(dir, starting_date, num_days, sensor = 'S6.1.3', time_frequency = "frequency", windowLength = window_size)
data_loader_test_anomaly = torch.utils.data.DataLoader(
    dataset, shuffle=False,
    batch_size=1,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
)
losses_anomaly = evaluate(data_loader_test_anomaly, model_to_evaluate, device)
df = pd.DataFrame.from_dict(losses_anomaly)
df.to_csv(f'Results/masked_{window_size}samples_anomaly_{who}.csv', index = False, header = True)

directory = "/home/benfenati/code/tle-supervised/Results/"
acc_enc = []
sens_enc = []
spec_enc = []

for dim_filtering in [15,30,60,120, 240]:
        print(f"Dim {dim_filtering}")
        print(f"Autoencoder")
        data_normal = pd.read_csv(directory + f"masked_{window_size}samples_normal_{who}.csv")
        data_anomaly = pd.read_csv(directory + f"masked_{window_size}samples_anomaly_{who}.csv")
        spec, sens, acc = compute_threshold_accuracy(data_anomaly.values, data_normal.values, None, min, max, only_acc = 1, dim_filtering = dim_filtering)
        acc_enc.append(acc*100)
        sens_enc.append(sens*100)
        spec_enc.append(spec*100)

Creating Testing Dataset -- Normal
Loading AnomalyDetection dataset Test:  [   0/5279]  eta: 0:08:57  loss: 0.0024 (0.0024)  mae1: 0.0024 (0.0024)  time: 0.1019  data: 0.0781  max mem: 0
Test:  [  10/5279]  eta: 0:01:09  loss: 0.0025 (0.0027)  mae1: 0.0025 (0.0027)  time: 0.0132  data: 0.0072  max mem: 0
Test:  [  20/5279]  eta: 0:00:48  loss: 0.0026 (0.0026)  mae1: 0.0026 (0.0026)  time: 0.0047  data: 0.0001  max mem: 0
Test:  [  30/5279]  eta: 0:00:41  loss: 0.0025 (0.0026)  mae1: 0.0025 (0.0026)  time: 0.0050  data: 0.0001  max mem: 0
Test:  [  40/5279]  eta: 0:00:37  loss: 0.0025 (0.0025)  mae1: 0.0025 (0.0025)  time: 0.0050  data: 0.0001  max mem: 0
Test:  [  50/5279]  eta: 0:00:35  loss: 0.0025 (0.0025)  mae1: 0.0025 (0.0025)  time: 0.0050  data: 0.0001  max mem: 0
Test:  [  60/5279]  eta: 0:00:33  loss: 0.0025 (0.0025)  mae1: 0.0025 (0.0025)  time: 0.0050  data: 0.0001  max mem: 0
Test:  [  70/5279]  eta: 0:00:32  loss: 0.0025 (0.0025)  mae1: 0.0025 (0.0025)  time: 0.0049  data:

## 2) UC2: TLE on Roccaprebalza

In [1]:
import os
import torch
import numpy as np

# from Algorithms.models_audio_mae_regression import audioMae_vit_base_R
from Algorithms.models_audio_mae_regression_modified import audioMae_vit_base_R

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from time import time
from utils import *
import util.misc as misc
from util.misc import interpolate_pos_embed

import datetime
from Datasets.Vehicles_Roccaprebalza.get_dataset import get_dataset as get_dataset_roccaprebalza

from util.engine_pretrain import evaluate_finetune
from vehicles_roccaprebalza_example import compute_accuracy

  from .autonotebook import tqdm as notebook_tqdm


### Params

In [16]:
device = "cuda:2"
car = "y_camion" # y_car, y_camion
dir = "/home/benfenati/code/Datasets/SHM/Vehicles_Roccaprebalza/"

lr = 0.25e-5
total_epochs = 300

### Teacher model

In [17]:
teacher = audioMae_vit_base_R(norm_pix_loss=True, mask_ratio = 0.2)
teacher.to(device)
checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/Results/checkpoints/checkpoint-pretrainig_all_{car}_roccaprebalza_finetune-500.pth", map_location='cpu')
checkpoint_model = checkpoint['model']
state_dict = teacher.state_dict()
msg = teacher.load_state_dict(checkpoint_model, strict=True)

params, size = get_model_info(teacher)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 25624626; Size = 97.750


### Student model

In [18]:
embed_dim = 384 # 768 (original)
decoder_embed_dim = 512 # 256, 512 (original)
student = audioMae_vit_base_R(embed_dim=embed_dim, decoder_embed_dim=decoder_embed_dim, 
                              norm_pix_loss=True, mask_ratio = 0.2)
student.to(device)
checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/Results/checkpoints/checkpoint-student-pretrain_all-200.pth", map_location='cpu')
checkpoint_model = checkpoint['model']
state_dict = student.state_dict()
for k in ['head.weight', 'head.bias']:
    if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
        print(f"Removing key {k} from pretrained checkpoint")
        del checkpoint_model[k]
msg = student.load_state_dict(checkpoint_model, strict=False)
interpolate_pos_embed(student, checkpoint_model)

params, size = get_model_info(student)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 9326130; Size = 35.576


### Training

In [19]:
# Train
dataset_train, dataset_test = get_dataset_roccaprebalza(dir, window_sec_size = 60, shift_sec_size = 2, time_frequency = "frequency", car = car)
sampler_train = torch.utils.data.RandomSampler(dataset_train)
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, sampler=sampler_train,
    batch_size=8,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, shuffle=False,
    batch_size=1,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
    )

torch.manual_seed(0)
np.random.seed(0)

device = torch.device(device)
torch.manual_seed(0)
np.random.seed(0)

optimizer = optim.Adam(student.parameters(), lr=lr, weight_decay=1e-6)
loss_fn_1 = nn.L1Loss()
loss_fn_2 = nn.L1Loss()
loss_fn_3 = nn.MSELoss()

teacher.eval()

b = 0.5
g = 0.6667

best_loss = 100000000
best_epoch = 0

for epoch in range(total_epochs):

    student.train()
    train_loss = 0
    counter = 0
    for samples, targets in data_loader_train:
        samples = samples.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
    
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            middle_student, final_student = student(samples)

        with torch.no_grad() and torch.cuda.amp.autocast():
            teacher.eval()
            middle_teacher, final_teacher = teacher(samples)
        
        final_student = final_student.squeeze()
        final_teacher = final_teacher.squeeze()
        loss_1 = loss_fn_1(final_student, targets.float())
        loss_2 = loss_fn_2(final_student, final_teacher)


        loss_3 = loss_fn_3(middle_student, middle_teacher)
        
        loss = g*(b*loss_1 + (1-b)*loss_2) + (1-g)*loss_3
        # loss = b*loss_1 + (1-b)*loss_2

        loss.backward()
        optimizer.step()

        train_loss += loss_fn_1(final_student, targets).item()
        # train_loss += loss_1.item()
        counter +=1

Loading Roccaprebalza dataset
Loading Roccaprebalza dataset


  new_labels = pd.concat([new_labels, pd.DataFrame.from_dict(dict)])
  new_labels = pd.concat([new_labels, pd.DataFrame.from_dict(dict)])


### Testing

In [20]:
model_to_evaluate = student

dataset_train, dataset_test = get_dataset_roccaprebalza(dir, window_sec_size = 60, shift_sec_size = 2, time_frequency = "frequency", car = car)
sampler_test = torch.utils.data.RandomSampler(dataset_test)
data_loader_test = torch.utils.data.DataLoader(
    dataset_test, shuffle=False,
    batch_size=1,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
)

y_predicted, y_test = evaluate_finetune(data_loader_test, model_to_evaluate, device)
compute_accuracy(y_test, y_predicted)

Loading Roccaprebalza dataset
Loading Roccaprebalza dataset


  new_labels = pd.concat([new_labels, pd.DataFrame.from_dict(dict)])
  new_labels = pd.concat([new_labels, pd.DataFrame.from_dict(dict)])
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Test:  [  0/203]  eta: 0:00:25  loss: 0.0337 (0.0337)  mae1: 0.1836 (0.1836)  time: 0.1235  data: 0.1026  max mem: 0
Test:  [ 10/203]  eta: 0:00:03  loss: 0.0182 (0.0552)  mae1: 0.1348 (0.1831)  time: 0.0157  data: 0.0094  max mem: 0
Test:  [ 20/203]  eta: 0:00:01  loss: 0.0198 (0.1158)  mae1: 0.1406 (0.2568)  time: 0.0046  data: 0.0001  max mem: 0
Test:  [ 30/203]  eta: 0:00:01  loss: 0.0366 (0.1490)  mae1: 0.1914 (0.2831)  time: 0.0043  data: 0.0001  max mem: 0
Test:  [ 40/203]  eta: 0:00:01  loss: 0.0413 (0.1383)  mae1: 0.2031 (0.2786)  time: 0.0045  data: 0.0001  max mem: 0
Test:  [ 50/203]  eta: 0:00:01  loss: 0.0429 (0.1322)  mae1: 0.2070 (0.2710)  time: 0.0048  data: 0.0001  max mem: 0
Test:  [ 60/203]  eta: 0:00:00  loss: 0.0413 (0.1249)  mae1: 0.2031 (0.2678)  time: 0.0046  data: 0.0001  max mem: 0
Test:  [ 70/203]  eta: 0:00:00  loss: 0.0413 (0.1262)  mae1: 0.2031 (0.2715)  time: 0.0042  data: 0.0001  max mem: 0
Test:  [ 80/203]  eta: 0:00:00  loss: 0.1195 (0.1557)  mae1: 0.3

## UC3: TLE on Sacertis

In [1]:
import os
import torch
import numpy as np

# from Algorithms.models_audio_mae_regression import audioMae_vit_base_R
from Algorithms.models_audio_mae_regression_modified import audioMae_vit_base_R

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from time import time
from utils import *
import util.misc as misc
from util.misc import interpolate_pos_embed

import datetime
from Datasets.Vehicles_Sacertis.get_dataset import get_dataset as get_dataset_sacertis

from util.engine_pretrain import evaluate_finetune
from vehicles_roccaprebalza_example import compute_accuracy

  from .autonotebook import tqdm as notebook_tqdm


### Params

In [2]:
device = "cuda:0"
dir = "/home/benfenati/code/Datasets/SHM/Vehicles_Sacertis/"

lr = 0.25e-3
total_epochs = 10

### Teacher model

In [3]:
teacher = audioMae_vit_base_R(norm_pix_loss=True, mask_ratio = 0.2)
teacher.to(device)
checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/Results/checkpoints/checkpoint-pretrainig_all_vehicles_sacertis_finetune-200.pth", map_location='cpu')
checkpoint_model = checkpoint['model']
state_dict = teacher.state_dict()
msg = teacher.load_state_dict(checkpoint_model, strict=True)

params, size = get_model_info(teacher)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 25624626; Size = 97.750


### Student model

In [4]:
embed_dim = 384 # 768 (original)
decoder_embed_dim = 512 # 256, 512 (original)
student = audioMae_vit_base_R(embed_dim=embed_dim, decoder_embed_dim=decoder_embed_dim, 
                              norm_pix_loss=True, mask_ratio = 0.2)
student.to(device)
checkpoint = torch.load(f"/home/benfenati/code/tle-supervised/Results/checkpoints/checkpoint-student-pretrain_all-200.pth", map_location='cpu')
checkpoint_model = checkpoint['model']
state_dict = student.state_dict()
for k in ['head.weight', 'head.bias']:
    if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
        print(f"Removing key {k} from pretrained checkpoint")
        del checkpoint_model[k]
msg = student.load_state_dict(checkpoint_model, strict=False)
interpolate_pos_embed(student, checkpoint_model)

params, size = get_model_info(student)
print("N. params = {}; Size = {:.3f}".format(params, size))

N. params = 9326130; Size = 35.576


### Training

In [5]:
# Train
dataset_train = get_dataset_sacertis(dir, False, True, False,  sensor = "None", time_frequency = "frequency")
sampler_train = torch.utils.data.RandomSampler(dataset_train)
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, sampler=sampler_train,
    batch_size=128,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True)

print("\nDone!")

device = torch.device(device)
torch.manual_seed(0)
np.random.seed(0)

optimizer = optim.Adam(student.parameters(), lr=lr, weight_decay=1e-6)
loss_fn_1 = nn.L1Loss()
loss_fn_2 = nn.L1Loss()
loss_fn_3 = nn.MSELoss()

teacher.eval()

b = 0.5
g = 0.6667

best_loss = 100000000
best_epoch = 0

for epoch in range(total_epochs):

    student.train()
    train_loss = 0
    counter = 0
    if counter != 0:
        print(f"Epoch {epoch} - Loss {train_loss/counter}")

    for samples, targets in data_loader_train:
        samples = samples.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
    
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            middle_student, final_student = student(samples)

        with torch.no_grad() and torch.cuda.amp.autocast():
            teacher.eval()
            middle_teacher, final_teacher = teacher(samples)
        
        final_student = final_student.squeeze()
        final_teacher = final_teacher.squeeze()
        loss_1 = loss_fn_1(final_student, targets.float())
        loss_2 = loss_fn_2(final_student, final_teacher)


        loss_3 = loss_fn_3(middle_student, middle_teacher)
        
        loss = g*(b*loss_1 + (1-b)*loss_2) + (1-g)*loss_3
        # loss = b*loss_1 + (1-b)*loss_2

        loss.backward()
        optimizer.step()

        train_loss += loss_fn_1(final_student, targets).item()
        # train_loss += loss_1.item()
        counter +=1

Loading Sacertis dataset Done!


### Testing

In [6]:
model_to_evaluate = teacher

dataset = get_dataset_sacertis(dir, False, False, True,  sensor = "None", time_frequency = "frequency")
data_loader_test = torch.utils.data.DataLoader(
    dataset, shuffle=False,
    batch_size=1,
    num_workers=1,
    pin_memory='store_true',
    drop_last=True,
)
y_predicted, y_test = evaluate_finetune(data_loader_test, model_to_evaluate, device)
compute_accuracy(y_test, y_predicted)

Loading Sacertis dataset 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Test:  [    0/50000]  eta: 5:38:30  loss: 0.3023 (0.3023)  mae1: 0.5498 (0.5498)  time: 0.4062  data: 0.1726  max mem: 200
Test:  [   10/50000]  eta: 0:32:39  loss: 0.0742 (0.7733)  mae1: 0.2725 (0.6240)  time: 0.0392  data: 0.0158  max mem: 201
Test:  [   20/50000]  eta: 0:18:02  loss: 0.0690 (0.8153)  mae1: 0.2627 (0.6071)  time: 0.0024  data: 0.0001  max mem: 201
Test:  [   30/50000]  eta: 0:12:50  loss: 0.0928 (0.8660)  mae1: 0.3047 (0.6515)  time: 0.0023  data: 0.0001  max mem: 201
Test:  [   40/50000]  eta: 0:10:11  loss: 0.1846 (0.8621)  mae1: 0.4297 (0.6592)  time: 0.0023  data: 0.0001  max mem: 201
Test:  [   50/50000]  eta: 0:08:34  loss: 0.0333 (0.7360)  mae1: 0.1826 (0.5937)  time: 0.0023  data: 0.0001  max mem: 201
Test:  [   60/50000]  eta: 0:07:29  loss: 0.1020 (0.8380)  mae1: 0.3193 (0.6379)  time: 0.0024  data: 0.0001  max mem: 201
Test:  [   70/50000]  eta: 0:06:43  loss: 0.1320 (0.8398)  mae1: 0.3633 (0.6412)  time: 0.0024  data: 0.0001  max mem: 201
Test:  [   80/50