### Loading and Saving Room00 data

In [None]:
import pickle
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
train_data = pickle.load(open("./data/continuous_sections_60_train.pickle", "rb"))
test_data = pickle.load(open("./data/continuous_sections_60_test.pickle", "rb"))

In [None]:
new_train_data = pd.DataFrame()
for frame_idx, frame_data in enumerate(train_data['room00']):
    frame_data['frame_id'] = frame_idx
    new_train_data = pd.concat([new_train_data, frame_data])

In [None]:
new_train_data.reset_index(drop=True, inplace=True)

In [None]:
new_test_data = pd.DataFrame()
for frame_idx, frame_data in enumerate(test_data['room00']):
    frame_data['frame_id'] = frame_idx
    new_test_data = pd.concat([new_test_data, frame_data])
new_test_data.reset_index(drop=True, inplace=True)

In [None]:
new_train_data.to_csv("./data/train_room00.csv")
new_test_data.to_csv("./data/test_room00.csv")

In [None]:
# Obtain the scaling statistics of the training data
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
std_X_train = std.fit_transform(new_train_data.drop(columns = ['frame_id']))

### Preprocessing Data for 1D-CNN

In [None]:
def split_sequence(sequence, n_steps_in, out_steps, output_var):
    X, y = list(), list()
    seq_len = sequence.shape[0]
    sequence.index = range(sequence.shape[0])
    sequence.drop(columns = ['frame_id'], inplace=True)
    for i in range(0, seq_len):
        end_idx = i + (n_steps_in-1)
        out_idx = end_idx + out_steps
        if out_idx >= seq_len:
            break
            
        seq_x = sequence.loc[i:end_idx, :]
        seq_x = seq_x.values
        
        seq_y = sequence.loc[out_idx, output_var]
        X.append(seq_x)
        y.append(seq_y)
        
        
    return np.asarray(X).astype(np.float32), np.asarray(y).astype(np.float32)

### Training and testing data processing for 1D-CNN

In [None]:
from tqdm.notebook import tqdm

unique_frames = np.unique(new_train_data.frame_id)
first_frame = unique_frames[0]
seq_train_X, seq_train_Y = split_sequence(
    new_train_data.loc[new_train_data.frame_id == first_frame, :],
    5, 5, 'co2'
)

for f_id in tqdm(unique_frames):
    if f_id != first_frame:
        frame_X, frame_y = split_sequence(
            new_train_data.loc[new_train_data.frame_id == f_id, :],
            5, 5, 'co2'
        )
        seq_train_X = np.concatenate((seq_train_X, frame_X))
        seq_train_Y = np.concatenate((seq_train_Y, frame_y))    

In [None]:
unique_frames = np.unique(new_test_data.frame_id)
first_frame = unique_frames[0]
seq_test_X, seq_test_Y = split_sequence(
    new_test_data.loc[new_test_data.frame_id == first_frame, :],
    5, 5, 'co2'
)

for f_id in tqdm(unique_frames):
    if f_id != first_frame:
        frame_X, frame_y = split_sequence(
            new_test_data.loc[new_test_data.frame_id == f_id, :],
            5, 5, 'co2'
        )
        seq_test_X = np.concatenate((seq_test_X, frame_X))
        seq_test_Y = np.concatenate((seq_test_Y, frame_y))

In [None]:
# change the relationship type
seq_train_X[:, [1, 2]] = seq_train_X[:, [2, 1]]
seq_test_X[:, [1, 2]] = seq_test_X[:, [2, 1]]

### Creating the loader and the 1D-CNN model for the DL regression Problem

In [None]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    
    def __init__(self, scaler, feature_values, output_values):
        self.mean = scaler.mean_
        self.std = np.sqrt(scaler.var_)
        self.feature_values = feature_values
        self.output_values = output_values
        
    def __len__(self):
        return self.feature_values.shape[0]
    
    def __getitem__(self, idx):
        feature_vals = self.feature_values[idx]
        feature_vals = (feature_vals - self.mean) / (self.std)
        label = self.output_values[idx]
        
        return feature_vals, label
    
class OneDCNN(nn.Module):
    
    def __init__(self, nb_features):
        super(OneDCNN, self).__init__()
        self.nb_features = nb_features
        self.conv1 = nn.Conv1d(self.nb_features, 32, kernel_size = 1)
        self.fc1 = nn.Linear(160, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        
    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = X.reshape(-1, np.prod(X.shape[1:]))
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = F.relu(self.fc3(X))
        
        
        return X

### Training of the CNN Model

In [None]:
def fit_model(cnn_model, train_loader, device, nb_epochs = 1):
    optimizer = torch.optim.Adam(cnn_model.parameters(), lr = 1e-3)
    error = nn.MSELoss()
    cnn_model = cnn_model.to(device)
    cnn_model.train()
    set_epoch_loss = []
    for epoch in range(nb_epochs):
        set_losses = []
        for batch_idx, (feature_batch, y_batch) in enumerate(train_loader):
            optimizer.zero_grad()
            
            var_X_batch = feature_batch.float().to(device)
            y_batch = y_batch.float().to(device)
            output = cnn_model(var_X_batch)
            loss = error(output.reshape(-1,), y_batch)
            set_losses.append(loss.cpu().detach().numpy())
            if batch_idx % 5000 == 0:
                print(f"Epoch: {epoch+1}/{nb_epochs}, Batch: {batch_idx}, Loss: {np.mean(set_losses)}")
            
            loss.backward()
            optimizer.step()
        
        set_epoch_loss.extend(set_losses)
    
    return set_epoch_loss

def evaluate(cnn_model, test_loader, device):
    predicted_values, output_values = [], []
    
    cnn_model.eval()
    with torch.no_grad():
        for idx, (X_test, y_test) in enumerate(test_loader):
            X_test = X_test.float().to(device)
            pred_values = cnn_model(X_test)
            predicted_values.extend(pred_values.reshape(-1,).cpu().detach().numpy())
            output_values.extend(y_test.numpy())
            
    
    return predicted_values, output_values

In [None]:
training_dataset = CustomDataset(std, seq_train_X, seq_train_Y)
training_loader = DataLoader(training_dataset, batch_size = 16, shuffle = True)

testing_dataset = CustomDataset(std, seq_test_X, seq_test_Y)
testing_loader = DataLoader(testing_dataset, batch_size = 16, shuffle = True)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
cnn_model = OneDCNN(nb_features = 5)
set_epoch_loss = fit_model(cnn_model, training_loader, device, nb_epochs = 15)

### Creating the dataset loader for AE model and the AE model

In [None]:
class AE_all(nn.Module):
    
    def __init__(self, input_values):
        super(AE_all, self).__init__()
        self.features = input_values
        self.encoder = nn.Sequential(
            nn.Linear(self.features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, self.features)
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class ActivationsDataset(Dataset):
    
    def __init__(self, feature_values):
        self.features = feature_values
#         self.output = feature_values
        
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, idx):
        feature_vals = self.features[idx]
        label = self.features[idx]
        
        return feature_vals, label

def fit_ae_model(ae_model, train_loader, device, nb_epochs = 1):
    optimizer = torch.optim.Adam(ae_model.parameters(), lr = 1e-3)
    error = nn.MSELoss()
    ae_model = ae_model.to(device)
    ae_model.train()
    set_epoch_loss = []
    for epoch in range(nb_epochs):
        set_losses = []
        for batch_idx, (feature_batch, y_batch) in enumerate(train_loader):
            optimizer.zero_grad()
            
            var_X_batch = feature_batch.float().to(device)
            y_batch = y_batch.float().to(device)
            output = ae_model(var_X_batch)
            loss = error(output, y_batch)
            set_losses.append(loss.cpu().detach().numpy())
            if batch_idx % 5000 == 0:
                print(f"Epoch: {epoch+1}/{nb_epochs}, Batch: {batch_idx}, Loss: {np.mean(set_losses)}")
            
            loss.backward()
            optimizer.step()
        
        set_epoch_loss.extend(set_losses)
    
    return set_epoch_loss

def evaluate_ae(ae_model, test_loader, device):
    predicted_values = torch.Tensor()
    output_values = torch.Tensor()
    
    ae_model.eval()
    with torch.no_grad():
        for idx, (X_test, y_test) in enumerate(test_loader):
            X_test = X_test.float().to(device)
            pred_values = ae_model(X_test)
            predicted_values = torch.cat((predicted_values, pred_values.cpu().detach()), dim = 0)
            output_values = torch.cat((output_values, y_test), dim = 0)
            
    
    return predicted_values, output_values

def evaluate_with_hooks(model, data_loader, activation_layer, device, nb_examples = 3000):
    set_activations = []
    activation = {}
    predicted_values, output_values = [], []
    
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
            
        return hook
    
    for n, model_layer in model.named_children():
        if n == activation_layer:
            h = model_layer.register_forward_hook(get_activation(activation_layer))
    
    model.eval()
    total_values = 0
    for idx, (X_test, y_test) in tqdm(enumerate(data_loader)):
        X_test = X_test.float().to(device)
        output = model(X_test)
        predicted_values.extend(output.reshape(-1,).detach().cpu().numpy())
        output_values.append(y_test.detach().numpy())
        set_activations.append(activation[activation_layer])
        total_values += X_test.shape[0]
        
    return set_activations, predicted_values, output_values

### Retrieiving activation for the model

In [None]:
activations_train, _, _ = evaluate_with_hooks(cnn_model, training_loader, 'conv1', device, nb_examples = 3000)
tensor_activations_train = torch.Tensor()
for s in tqdm(activations_train):
    tensor_activations_train = torch.cat((tensor_activations_train, s.detach().cpu()), dim = 0)
tensor_activations_train = tensor_activations_train.reshape(-1, np.prod(tensor_activations_train.shape[1:]))

act_full_train = ActivationsDataset(tensor_activations_train)
act_full_train_loader = DataLoader(act_full_train, batch_size = 16, shuffle = True)

ae_model_full = AE_all(160)
set_ae_epochs = fit_ae_model(ae_model_full, act_full_train_loader, device, nb_epochs = 10)

### Drift Simulation

In [None]:
# This function is responsible for extracting the activations of the convolutional layer ('conv1')
def retrieve_activation(X_test, cnn_model):
    set_activations = []
    activation = {}
    activation_layer = 'conv1'
    
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach().clone()

        return hook

    for n, model_layer in cnn_model.named_children():
        if n == activation_layer:
            h = model_layer.register_forward_hook(get_activation(activation_layer))
    cnn_model.eval()
    X_test = X_test.float().to(device)
    cnn_output = cnn_model(X_test)
    set_activations = activation[activation_layer]
    set_features_test = torch.Tensor(set_activations)
    set_features_test = set_features_test.reshape(-1, np.prod(set_features_test.size()[1:]))
    
    h.remove()
    return set_features_test, cnn_output

# This function retrieves the latent representation of the Autoencoder
def get_low_rank_encoded_values(X_test, cnn_model, ae_model):
    set_features_test, _ = retrieve_activation(X_test, cnn_model)
    ae_model_full.eval()
    with torch.no_grad():
        set_features_test = set_features_test.float().to(device)
        low_rank_space = ae_model_full.encoder(set_features_test)
        
    return low_rank_space

# The drift simulation and retrieval of latent representation of values upon their replacement
def run_experiment(f):
    predicted_values, output_values = torch.Tensor(), torch.Tensor()
    cnn_model_output, orig_values = torch.Tensor(), torch.Tensor()
    starting_value = 16 * 120
    feature_to_replaced = f
    last_batch = []
    # explanation = torch.Tensor()
    set_encoded_values = torch.Tensor()
    drifted_encoded_values = []

    with torch.no_grad():
        for idx, (X_test, y_test) in tqdm(enumerate(testing_loader)):
            if idx < 120:
                last_batch = [X_test, y_test]

            else:
                for idx_n in range(X_test.shape[0]):
                    replacement_values = torch.zeros_like(X_test[idx_n][:, feature_to_replaced]) + (std.mean_[feature_to_replaced] + 2 * np.sqrt(std.mean_[feature_to_replaced]))
                    replacement_values = (replacement_values - std.mean_[feature_to_replaced]) / (np.sqrt(std.var_[feature_to_replaced]))
                    X_test[idx_n][:, feature_to_replaced] = replacement_values.clone()

            if idx == 240:
                break

            set_features_test, cnn_output = retrieve_activation(X_test, cnn_model)
            orig_values = torch.cat((orig_values, y_test), dim = 0)
            cnn_model_output = torch.cat((cnn_model_output, cnn_output.reshape(-1,).detach().cpu()))
            
            if idx < 120: 
                ae_model_full.eval()
                with torch.no_grad():
                    set_features_test = set_features_test.float().to(device)
                    low_rank_space = ae_model_full.encoder(set_features_test)
                    set_encoded_values = torch.cat((set_encoded_values, low_rank_space.cpu().detach()), dim = 0)
            else:
                d_values = {}
                for i in range(5): # The feature values
                    new_X = X_test.clone()
                    instance_batch = last_batch[0]
                    for idx_n in range(X_test.shape[0]):
                        replacement_values = torch.zeros_like(instance_batch[idx_n][:, i])
                        new_X[idx_n][:, i] = replacement_values.clone()
                    d_values[i] = get_low_rank_encoded_values(new_X, cnn_model, ae_model_full).cpu().detach()
                drifted_encoded_values.append(d_values)
                
    return drifted_encoded_values, set_encoded_values, cnn_model_output, orig_values

In [None]:
X_test, y_test = next(iter(testing_loader))

In [None]:
# The training data representative

new_X = X_test.clone()
for i in range(new_X.shape[0]):
    new_X[i, :] = torch.zeros_like(new_X[i, :]) 

In [None]:
# The latent representative of the training data

set_encoded_values = torch.Tensor()
set_features_test, _ = retrieve_activation(new_X, cnn_model)
ae_model_full.eval()
with torch.no_grad():
    set_features_test = set_features_test.float().to(device)
    low_rank_space = ae_model_full.encoder(set_features_test)
    enc_vals = torch.cat((set_encoded_values, low_rank_space.cpu().detach()), dim = 0)

In [None]:
# Simulation of the drifting feature 0 (temperature in our case)

all_drift_enc_f0 = {}
all_nodrift_enc_f0 = {}
all_cnn_output_f0 = {}
all_orig_values_f0 = {}t_enc, cnn_output, orig_values = run_experiment(0)
    all_d
for exp in tqdm(range(1, 31)):
    drift_enc, no_drifrift_enc_f0[f'exp_{exp}'] = drift_enc
    all_nodrift_enc_f0[f'exp_{exp}'] = no_drift_enc    
    all_cnn_output_f0[f'exp_{exp}'] = cnn_output        
    all_orig_values_f0[f'exp_{exp}'] = orig_values

### Calculate Minkowski Distance and detection accuracy

In [None]:
from scipy.spatial.distance import minkowski

set_dist_v = []

for exp in all_nodrift_enc_f0.keys():
    set_encoded_mean = []
    exp_no_drift_values = all_nodrift_enc_f0[exp]
    exp_drift_values = all_drift_enc_f0[exp]
    
    for i in range(0, 5):
        set_values = []
        for k in range(len(exp_drift_values)):
            set_drift_vals = torch.mean(exp_drift_values[k][i], dim = 0).numpy()
            distance_mink = minkowski(torch.mean(enc_vals,dim=0),  set_drift_vals, 16)

            set_values.append(distance_mink)
        set_encoded_mean.append(set_values)
    set_dist_v.append(set_encoded_mean)

In [None]:
set_dist_v = np.array(set_dist_v)
set_accuracies = []
for ex in range(set_dist_v.shape[0]):
    all_args = np.argmin(set_dist_v[ex], axis = 0)
    perc = np.sum(all_args == 0) / len(all_args)
    set_accuracies.append(perc)
print(np.mean(set_accuracies), np.std(set_accuracies))

### An example of calculating average distance (this is with temperature) -- Equivalent to Figure 3

In [None]:
set_means_values = []
for exp in range(set_dist_v.shape[0]):
    mean_dist = np.mean(set_dist_v[exp], axis = 1)
    set_means_values.append(mean_dist)
set_means_values=np.array(set_means_values)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style = 'darkgrid')


fig, ax = plt.subplots(figsize = (15, 9))

markers = ['x', 'o', '^', '*', '+']
set_features = ['temperature', 'humidity', 'pressure', 'CO2', 'PIR']

for idx in range(set_means_values.shape[1]):
    plt.plot(set_means_values[:, idx],
            marker = markers[idx], label = set_features[idx], markevery = 1, ms = 20)

plt.tick_params(labelsize = 35)
plt.legend(set_features, fontsize = 25)
plt.xlabel("Experiment Number", fontsize = 40)
plt.ylabel("Minkowski Distance", fontsize = 40)
plt.title("Distance of features with drifting temperature", fontsize = 40)