In [1]:
from captum.attr import IntegratedGradients, DeepLift

import random
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.preprocessing import RobustScaler
from sklearn.utils import resample
from sklearn import cluster
from sklearn.ensemble import IsolationForest

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch

from wquantiles import quantile_1D

import pickle

import matplotlib.pyplot as plt

import pdb

class ConfigStruct:
    def __init__(self, **entries):
        self.__dict__.update(entries)

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cuda


In [3]:
config = dict(
    epochs=100,
    batch_size=256, #2048
    learning_rate=0.008, #0.008
    weight_decay=1e-5,
    dropout=0.05,
    shuffle=True,
    test_size=0.2,
    split_seed=42,
    random_seed=1234,
    top10_apps_filter=False,
    only_duplicates=False,
    meancount75_filter=False,
    starttime_filter=False,
    isolation_forest_test=False,
    feature_agglomeration=False,
    feature_agglomeration_nclusters=64,
    stratified_split=False,
    smooth_l1_loss_beta=1
)

In [4]:
config = ConfigStruct(**config)

In [5]:
MODEL_FILENAME = "Model_D_(finetuned)"
MODEL_DIR = r"../models/"
MODEL_PATH = Path(MODEL_DIR, MODEL_FILENAME).with_suffix(".pth")

DATASET_DIR = r"../data/"
DATASET_NAME = "theta_posix_with_apps_no_negative_outliers_no_time_witherrors"
DATASET_PATH = Path(DATASET_DIR, DATASET_NAME).with_suffix(".csv")

PICKLE_DIR = r"../models/pickle"
FEATUREAGGLO_NAME = r"Model_D_(finetuned)_featureagglomeration"
FEATUREAGGLO_PATH = Path(PICKLE_DIR, FEATUREAGGLO_NAME).with_suffix(".pkl")
ROBUSTSCALER_NAME = r"Model_D_(finetuned)_robustscaler"
ROBUSTSCALER_PATH = Path(PICKLE_DIR, ROBUSTSCALER_NAME).with_suffix(".pkl")
ISOLATIONFOREST_NAME = r"Model_D_(finetuned)_isolationforest"
ISOLATIONFOREST_PATH = Path(PICKLE_DIR, ISOLATIONFOREST_NAME).with_suffix(".pkl")

In [6]:
# Load the data
df_theta_posix = pd.read_csv(DATASET_PATH)
df_theta_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,exe,mean,error
0,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,29.684507,64,cp2k.psmp,31.913841,-2.229334
1,194,172,0,34,1499,6,54,0,0,0,...,0,0,0,0,0,28.155456,16,pw.x,11.403251,16.752206
2,46037,40869,0,4713059,1719073,1271774,5429,0,0,0,...,0,0,0,0,-1,71229.030892,128,train.x-2.0.3-ifort_intelmpi,,0.0
3,194,172,0,34,1492,6,54,0,0,0,...,0,0,0,0,2,1.70764,16,pw.x,6.519022,-4.811382
4,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,34.010366,64,cp2k.psmp,33.63173,0.378636


In [7]:
if config.only_duplicates:
    df_theta_posix = df_theta_posix[df_theta_posix["mean"].isna() == False]

In [8]:
if config.top10_apps_filter:
    apps_count_series = df_theta_posix.groupby(by=["app"]).count()["nprocs"].sort_values(ascending=False)
    df_theta_posix = df_theta_posix[df_theta_posix.app.isin(apps_count_series[0:10].index)]

In [9]:
if config.starttime_filter:
    df_theta_posix = df_theta_posix[df_theta_posix.start_time_sec < df_theta_posix.start_time_sec.quantile(0.25)]
    df_theta_posix = df_theta_posix.drop(["start_time_sec"],axis=1)

In [10]:
if config.meancount75_filter:
    mean_counts = df_theta_posix.groupby("mean",dropna=False)["mean"].transform("count")
    mean_counts_quantile = pd.Series(mean_counts.unique()).quantile(0.75)
    df_theta_posix = df_theta_posix[df_theta_posix.index.isin(mean_counts[mean_counts > mean_counts_quantile].index)]

In [11]:
df_theta_posix

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,exe,mean,error
0,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,29.684507,64,cp2k.psmp,31.913841,-2.229334
1,194,172,0,34,1499,6,54,0,0,0,...,0,0,0,0,0,28.155456,16,pw.x,11.403251,16.752206
2,46037,40869,0,4713059,1719073,1271774,5429,0,0,0,...,0,0,0,0,-1,71229.030892,128,train.x-2.0.3-ifort_intelmpi,,0.000000
3,194,172,0,34,1492,6,54,0,0,0,...,0,0,0,0,2,1.707640,16,pw.x,6.519022,-4.811382
4,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,34.010366,64,cp2k.psmp,33.631730,0.378636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218106,618,0,0,15040,10450,21,1555,0,0,0,...,0,0,0,0,-1,6.904750,64,vasp_ncl,,0.000000
218107,68,-22,-22,7829,0,0,116,0,0,-22,...,0,0,0,0,-1,1.194060,16,pw.x,1.221651,-0.027592
218108,34713,34116,0,8745,49849,240,504,-335,0,0,...,0,0,0,0,-1,220.660687,96,qmcpack,,0.000000
218109,68,-22,-22,7829,0,0,116,0,0,-22,...,0,0,0,0,-1,1.274516,16,pw.x,1.221651,0.052865


In [12]:
# Drop column with application names
df_theta_posix = df_theta_posix.drop(['exe'],axis=1)
df_theta_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_1M_4M,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,mean,error
0,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,0,29.684507,64,31.913841,-2.229334
1,194,172,0,34,1499,6,54,0,0,0,...,0,0,0,0,0,0,28.155456,16,11.403251,16.752206
2,46037,40869,0,4713059,1719073,1271774,5429,0,0,0,...,500,0,0,0,0,-1,71229.030892,128,,0.0
3,194,172,0,34,1492,6,54,0,0,0,...,0,0,0,0,0,2,1.70764,16,6.519022,-4.811382
4,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,0,34.010366,64,33.63173,0.378636


In [13]:
# Separate bandwidth from input features
POSIX_TOTAL_TIME_df = df_theta_posix.pop('POSIX_TOTAL_TIME')
POSIX_TOTAL_TIME_df.head()

0       29.684507
1       28.155456
2    71229.030892
3        1.707640
4       34.010366
Name: POSIX_TOTAL_TIME, dtype: float64

In [14]:
# Separate duplicate set mean from input features and drop errors
dup_set_means_series = df_theta_posix.pop('mean')
df_theta_posix = df_theta_posix.drop(["error"],axis=1)
df_theta_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_1K_10K,WRITE_10K_100K,WRITE_100K_1M,WRITE_1M_4M,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,nprocs
0,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,0,0,0,0,64
1,194,172,0,34,1499,6,54,0,0,0,...,0,0,0,0,0,0,0,0,0,16
2,46037,40869,0,4713059,1719073,1271774,5429,0,0,0,...,3546,0,0,500,0,0,0,0,-1,128
3,194,172,0,34,1492,6,54,0,0,0,...,0,0,0,0,0,0,0,0,2,16
4,7891771,7861736,0,3,424661,60035,90055,0,0,0,...,0,0,0,0,0,0,0,0,0,64


In [15]:
# Fix seeds for reproducibility
random.seed(config.random_seed)
np.random.seed(config.random_seed)

torch.manual_seed(config.random_seed)
torch.cuda.manual_seed_all(config.random_seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [16]:
if config.isolation_forest_test:
    df_theta_posix = df_theta_posix.reset_index()[outlier_labels == 1].drop(["index"],axis=1)
    POSIX_TOTAL_TIME_df = POSIX_TOTAL_TIME_df.reset_index()[outlier_labels == 1].drop(["index"],axis=1)
    dup_set_means_series = dup_set_means_series.reset_index()[outlier_labels == 1].drop(["index"],axis=1)

In [17]:
with open(FEATUREAGGLO_PATH,'rb') as f:
    agglo = pickle.load(f)
if config.feature_agglomeration:
    theta_posix = agglo.transform(df_theta_posix)
else:
    theta_posix = df_theta_posix

In [18]:
# Scale the input features
with open(ROBUSTSCALER_PATH,'rb') as f:
    scaler = pickle.load(f)
theta_posix_scaled = scaler.transform(theta_posix)
theta_posix_scaled

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


array([[ 1.07328932e+01,  1.07072829e+01,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-1.56821409e+03, -1.82439169e+03,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00, -4.80000000e+01],
       [-1.55904182e+03, -1.81489192e+03,  0.00000000e+00, ...,
         0.00000000e+00, -1.00000000e+00,  6.40000000e+01],
       ...,
       [-1.56130752e+03, -1.81646825e+03,  0.00000000e+00, ...,
         0.00000000e+00, -1.00000000e+00,  3.20000000e+01],
       [-1.56823930e+03, -1.82443697e+03, -2.20000000e+01, ...,
         0.00000000e+00, -1.00000000e+00, -4.80000000e+01],
       [-1.56823609e+03, -1.82443721e+03, -2.30000000e+01, ...,
         0.00000000e+00, -1.00000000e+00, -4.80000000e+01]])

In [19]:
tensor_X = torch.Tensor(theta_posix_scaled).to(device)
tensor_y = torch.Tensor(POSIX_TOTAL_TIME_df.values).view(-1, 1).to(device)

test_dataset = TensorDataset(tensor_X, tensor_y)
test_dataloader = DataLoader(test_dataset, batch_size=config.batch_size)

In [20]:
model = nn.Sequential(
    nn.Linear(config.feature_agglomeration_nclusters if config.feature_agglomeration else 89, 512),
    nn.Dropout(p=config.dropout),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.Dropout(p=config.dropout),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.Dropout(p=config.dropout),
    nn.ReLU(),
    nn.Linear(128, 1)
).to(device)

In [21]:
checkpoint = torch.load(MODEL_PATH, map_location=torch.device(device))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

Sequential(
  (0): Linear(in_features=89, out_features=512, bias=True)
  (1): Dropout(p=0.05, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=256, bias=True)
  (4): Dropout(p=0.05, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=128, bias=True)
  (7): Dropout(p=0.05, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=128, out_features=1, bias=True)
)

In [22]:
lower = 0
stride = 30000
upper = stride

In [23]:
df_ig_attr_annotated_full = pd.DataFrame([])
df_dl_attr_annotated_full = pd.DataFrame([])

In [24]:
ig = IntegratedGradients(model)
deep_lift = DeepLift(model)

In [25]:
test_data = df_theta_posix
test_data = test_data.reset_index()
test_data.to_csv(f"./captum/Theta_captum_test_data.csv")

In [26]:
while lower < len(df_theta_posix):
    print(f"lower {lower} to upper {upper}")
    print(tensor_X[lower:upper].shape)
    ex = torch.reshape(tensor_X[lower:upper],(upper-lower,config.feature_agglomeration_nclusters if config.feature_agglomeration else 89))

    print("Integrated Gradients")
    
    ig_attr = ig.attribute(ex, n_steps=50)
    df_ig_attr_annotated_curr = pd.DataFrame(ig_attr.cpu().detach().numpy(), columns = list(df_theta_posix.columns))
    df_ig_attr_annotated_full = pd.concat([df_ig_attr_annotated_full,df_ig_attr_annotated_curr])

    print("Deep Lift")
    deep_lift_attr = deep_lift.attribute(ex)
    df_dl_attr_annotated_curr = pd.DataFrame(deep_lift_attr.cpu().detach().numpy(), columns = list(df_theta_posix.columns))
    df_dl_attr_annotated_full = pd.concat([df_dl_attr_annotated_full,df_dl_attr_annotated_curr])
    
    lower += stride
    upper += min(stride,len(df_theta_posix) - upper)

lower 0 to upper 30000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift


  gradient_mask = apply_gradient_requirements(inputs_tuple)
               activations. The hooks and attributes will be removed
            after the attribution is finished
  return func(*args, **kwargs)


lower 30000 to upper 60000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 60000 to upper 90000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 90000 to upper 120000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 120000 to upper 150000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 150000 to upper 180000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 180000 to upper 210000
torch.Size([30000, 89])
Integrated Gradients
Deep Lift
lower 210000 to upper 218111
torch.Size([8111, 89])
Integrated Gradients
Deep Lift


In [27]:
df_ig_attr_annotated_full.reset_index().drop(["index"],axis=1).to_csv(f"./captum/Theta_captum_ig_result.csv")
df_dl_attr_annotated_full.reset_index().drop(["index"],axis=1).to_csv(f"./captum/Theta_captum_dl_result.csv")