In [1]:
import torch.nn as nn
import torch
from pathlib import Path
from captum.attr import IntegratedGradients, DeepLift
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import time

import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

import matplotlib.pyplot as plt

import pdb

In [2]:
config = {
    "epochs":30,
    "batch_size":2048,
    "learning_rate":0.008,
    "weight_decay":1e-5,
    "dropout":0.05,
    "shuffle":True,
    "test_size":0.2,
    "split_seed":42,
    "random_seed":1234,
    "stratified_split":False,
    "smooth_l1_loss_beta":1
}

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [4]:
DATASET_DIR = r"../data/"
DATASET_NAME = "blue_waters_posix_with_paths_no_negative_outliers_no_time"
DATASET_PATH = Path(DATASET_DIR, DATASET_NAME).with_suffix(".csv")

MODEL_DIR = r"../modelst"
MODEL_FILENAME = "Small_net_SpEC_sampling_no_IQR.tar"
MODEL_PATH = Path(MODEL_DIR, MODEL_FILENAME)

In [5]:
df_blue_waters_posix = pd.read_csv(DATASET_PATH)
df_blue_waters_posix.pop("path")
df_blue_waters_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_1M_4M,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,exe,lustre
0,1280,-1281,-1281,1807,0,0,2561,0,0,-1281,...,0,0,0,0,0,184,0.970498,213,./Hsigma,1
1,168420,-5232,-5232,7505724,5401751,11345341,394840,0,1,-5232,...,0,0,0,0,0,0,147.540409,80,./SpEC,1
2,62567,-1477,-1477,2300215,441912,1214829,280660,0,1,-1477,...,0,0,0,0,0,-1,24.994056,48,./SpEC,1
3,2366,-2367,-2367,3342,0,0,4733,0,0,-2367,...,0,0,0,0,0,127,0.487127,394,./Hsigma,1
4,8709,-288,-288,1521073,6331589,2013343,28430,0,0,-288,...,756,0,0,0,0,-1,14389.735087,256,CCTM_v52_Linux2_x86_64intel,1


In [6]:
filter_spec = (df_blue_waters_posix.exe.str.strip().isin(["SpEC", "./SpEC"]))
df_blue_waters_posix_nospec = df_blue_waters_posix[filter_spec == False]
df_blue_waters_posix_nospec.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_1M_4M,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,exe,lustre
0,1280,-1281,-1281,1807,0,0,2561,0,0,-1281,...,0,0,0,0,0,184,0.970498,213,./Hsigma,1
3,2366,-2367,-2367,3342,0,0,4733,0,0,-2367,...,0,0,0,0,0,127,0.487127,394,./Hsigma,1
4,8709,-288,-288,1521073,6331589,2013343,28430,0,0,-288,...,756,0,0,0,0,-1,14389.735087,256,CCTM_v52_Linux2_x86_64intel,1
5,8709,-288,-288,1516465,8334307,2006943,28430,0,0,-288,...,756,0,0,0,0,-1,14340.456718,256,CCTM_v52_Linux2_x86_64intel,1
6,8709,-288,-288,1511857,13971437,2006175,28430,0,0,-288,...,756,0,0,0,0,-1,15207.986212,256,CCTM_v52_Linux2_x86_64intel,1


In [7]:
# Randomly sample 50000 benchmark runs out of 326799 and recombine the dataframes
df_blue_waters_posix_spec = df_blue_waters_posix[filter_spec]
df_blue_waters_posix_spec = resample(df_blue_waters_posix_spec, n_samples=50000, random_state=0)
df_blue_waters_posix = pd.concat([df_blue_waters_posix_nospec, df_blue_waters_posix_spec])
df_blue_waters_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,WRITE_1M_4M,WRITE_4M_10M,WRITE_10M_100M,WRITE_100M_1G,WRITE_1G_PLUS,rank,POSIX_TOTAL_TIME,nprocs,exe,lustre
0,1280,-1281,-1281,1807,0,0,2561,0,0,-1281,...,0,0,0,0,0,184,0.970498,213,./Hsigma,1
3,2366,-2367,-2367,3342,0,0,4733,0,0,-2367,...,0,0,0,0,0,127,0.487127,394,./Hsigma,1
4,8709,-288,-288,1521073,6331589,2013343,28430,0,0,-288,...,756,0,0,0,0,-1,14389.735087,256,CCTM_v52_Linux2_x86_64intel,1
5,8709,-288,-288,1516465,8334307,2006943,28430,0,0,-288,...,756,0,0,0,0,-1,14340.456718,256,CCTM_v52_Linux2_x86_64intel,1
6,8709,-288,-288,1511857,13971437,2006175,28430,0,0,-288,...,756,0,0,0,0,-1,15207.986212,256,CCTM_v52_Linux2_x86_64intel,1


In [8]:
# Drop column with application names
df_blue_waters_posix = df_blue_waters_posix.drop(['exe'], axis=1)

In [9]:
# Separate bandwidth from input features
POSIX_TOTAL_TIME_df = df_blue_waters_posix.pop('POSIX_TOTAL_TIME')

In [10]:
model = nn.Sequential(
    nn.Linear(90, 1024),
    nn.Dropout(p=config["dropout"]),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.Dropout(p=config["dropout"]),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.Dropout(p=config["dropout"]),
    nn.ReLU(),
    nn.Linear(128, 1),
).to(device)

In [11]:
# Load previously trained state if available
if Path(MODEL_PATH).is_file():
    print("Loading pretrained model...")

    checkpoint = torch.load(MODEL_PATH, map_location=torch.device(device))
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    model_epoch = checkpoint['epoch']
    print(f"Current epoch: {model_epoch}")

model.eval()

Sequential(
  (0): Linear(in_features=90, out_features=1024, bias=True)
  (1): Dropout(p=0.05, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=1024, out_features=512, bias=True)
  (4): Dropout(p=0.05, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=512, out_features=128, bias=True)
  (7): Dropout(p=0.05, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=128, out_features=1, bias=True)
)

In [12]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(df_blue_waters_posix,
                                                    POSIX_TOTAL_TIME_df,
                                                    test_size=config["test_size"],
                                                    random_state=config["split_seed"],
                                                    stratify=df_blue_waters_posix["nprocs"] if config["stratified_split"] else None)

In [13]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled

array([[-0.01618075,  0.05062959,  0.05062959, ..., -0.04513269,
        -0.06855959,  0.13533436],
       [-0.01454276,  0.05060156,  0.05060156, ..., -0.04513269,
         0.27464501,  0.13533436],
       [-0.01502839,  0.03235012,  0.03235012, ..., -0.04513269,
        -0.02427513,  0.13533436],
       ...,
       [-0.00913502,  0.04258326,  0.04258326, ..., -0.04513269,
         0.00893822,  0.13533436],
       [-0.01493012,  0.02794846,  0.02794846, ..., -0.04513269,
        -0.05748848,  0.13533436],
       [-0.01585104,  0.04384488,  0.04384488, ..., -0.04513269,
        -0.05748848,  0.13533436]])

In [14]:
tensor_X_train = torch.Tensor(X_train_scaled).to(device)
tensor_X_train

tensor([[-0.0162,  0.0506,  0.0506,  ..., -0.0451, -0.0686,  0.1353],
        [-0.0145,  0.0506,  0.0506,  ..., -0.0451,  0.2746,  0.1353],
        [-0.0150,  0.0324,  0.0324,  ..., -0.0451, -0.0243,  0.1353],
        ...,
        [-0.0091,  0.0426,  0.0426,  ..., -0.0451,  0.0089,  0.1353],
        [-0.0149,  0.0279,  0.0279,  ..., -0.0451, -0.0575,  0.1353],
        [-0.0159,  0.0438,  0.0438,  ..., -0.0451, -0.0575,  0.1353]],
       device='cuda:0')

In [15]:
tensor_y_train = torch.Tensor(y_train.values).view(-1, 1).to(device)
tensor_y_train

tensor([[1.1055e-01],
        [1.9792e+00],
        [7.6017e+00],
        ...,
        [1.7390e+04],
        [2.5100e+02],
        [3.8229e+00]], device='cuda:0')

In [16]:
training_dataset = TensorDataset(tensor_X_train, tensor_y_train)
training_dataset

<torch.utils.data.dataset.TensorDataset at 0x150facb3b8b0>

In [17]:
test_abs = int(len(training_dataset) * 0.8)
test_abs

339108

In [18]:
train_subset, val_subset = random_split(training_dataset, [test_abs, len(training_dataset) - test_abs])

In [19]:

training_dataloader = DataLoader(train_subset, batch_size=config["batch_size"], shuffle=config["shuffle"])
validation_dataloader = DataLoader(val_subset, batch_size=config["batch_size"], shuffle=config["shuffle"])

In [20]:
X_test_scaled = scaler.transform(X_test)
X_test_scaled

array([[-0.0160703 ,  0.04992869,  0.04992869, ..., -0.03823735,
        -0.07582501,  0.13533436],
       [-0.00913502,  0.04258326,  0.04258326, ..., -0.04513269,
         0.00893822,  0.13533436],
       [-0.0160703 ,  0.04992869,  0.04992869, ..., -0.01065596,
        -0.07582501,  0.13533436],
       ...,
       [-0.01611497,  0.05029316,  0.05029316, ..., -0.04513269,
        -0.07409515,  0.13533436],
       [-0.00913502,  0.04258326,  0.04258326, ..., -0.04513269,
         0.00893822,  0.13533436],
       [-0.01594524,  0.0490035 ,  0.0490035 , ..., -0.04513269,
        -0.06855959,  0.13533436]])

In [21]:
tensor_X_test = torch.Tensor(X_test_scaled).to(device)
tensor_X_test

tensor([[-0.0161,  0.0499,  0.0499,  ..., -0.0382, -0.0758,  0.1353],
        [-0.0091,  0.0426,  0.0426,  ..., -0.0451,  0.0089,  0.1353],
        [-0.0161,  0.0499,  0.0499,  ..., -0.0107, -0.0758,  0.1353],
        ...,
        [-0.0161,  0.0503,  0.0503,  ..., -0.0451, -0.0741,  0.1353],
        [-0.0091,  0.0426,  0.0426,  ..., -0.0451,  0.0089,  0.1353],
        [-0.0159,  0.0490,  0.0490,  ..., -0.0451, -0.0686,  0.1353]],
       device='cuda:0')

In [22]:
tensor_y_test = torch.Tensor(y_test.values).view(-1, 1).to(device)
tensor_y_test

tensor([[1.6721e-01],
        [1.7705e+04],
        [4.1766e-01],
        ...,
        [9.2079e+00],
        [1.4627e+04],
        [1.5255e+00]], device='cuda:0')

In [23]:
test_dataset = TensorDataset(tensor_X_test, tensor_y_test)
test_dataloader = DataLoader(test_dataset, batch_size=config["batch_size"])

In [24]:
lower = 0
stride = 30000
upper = stride

In [25]:
df_ig_attr_annotated_full = pd.DataFrame([])
df_dl_attr_annotated_full = pd.DataFrame([])

In [26]:
ig = IntegratedGradients(model)
deep_lift = DeepLift(model)

In [27]:
test_data = X_test
test_data = test_data.reset_index()
test_data.to_csv("./captum/blue_waters_captum_test_data.csv")

In [28]:
while lower < len(X_test):
    print(f"lower {lower} to upper {upper}")
    ex = torch.reshape(tensor_X_test[lower:upper], (upper-lower,90))

    print("Integrated Gradients")
    
    ig_attr = ig.attribute(ex, n_steps=50)
    df_ig_attr_annotated_curr = pd.DataFrame(ig_attr.cpu().detach().numpy(), columns = list(df_blue_waters_posix.columns))
    df_ig_attr_annotated_full = pd.concat([df_ig_attr_annotated_full,df_ig_attr_annotated_curr])

    print("Deep Lift")
    deep_lift_attr = deep_lift.attribute(ex)
    df_dl_attr_annotated_curr = pd.DataFrame(deep_lift_attr.cpu().detach().numpy(), columns = list(df_blue_waters_posix.columns))
    df_dl_attr_annotated_full = pd.concat([df_dl_attr_annotated_full,df_dl_attr_annotated_curr])
    
    lower += stride
    upper += min(stride,len(X_test) - upper)

lower 0 to upper 30000
Integrated Gradients
Deep Lift


  gradient_mask = apply_gradient_requirements(inputs_tuple)
               activations. The hooks and attributes will be removed
            after the attribution is finished
  return func(*args, **kwargs)


lower 30000 to upper 60000
Integrated Gradients
Deep Lift
lower 60000 to upper 90000
Integrated Gradients
Deep Lift
lower 90000 to upper 105972
Integrated Gradients
Deep Lift


In [29]:
df_ig_attr_annotated_full.reset_index().drop(["index"],axis=1).to_csv("./captum/blue_waters_captum_ig_result.csv")
df_dl_attr_annotated_full.reset_index().drop(["index"],axis=1).to_csv("./captum/blue_waters_captum_dl_result.csv")