In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from collections import defaultdict
from typing import List, Dict
from typing import Tuple, Any
from torch.nn import MSELoss
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
from logger import init_logging
from base_module import MModule
from data import MDataset, Graph, load_graphs
from importlib import reload
from config import Config
import config
from data import MDataset, Graph, GraphNode, load_graphs, save_dataset_pkl, load_dataset_pkl, save_scalers_pkl, load_scalers_pkl
import data
from base_module import MModule, pad_np_vectors
import base_module
from executor import single_train_loop, nested_detach
import executor
from objects import ModelType
import objects
from metric import MetricUtil
import metric
from logger import init_logging, logging
import logger
import gcn
from gcn import GCNLayer

# reload(gcn)
# reload(config)
# reload(data)
# reload(base_module)
# reload(executor)
# reload(objects)
# reload(metric)
# reload(logger)
init_logging()

In [None]:
dataset_environment_str = "T4_CPUALL"
normalizer_cls = StandardScaler  # MinMaxScaler
dummy = False
model_type = ModelType.GCNGrouping
method_prefix = "GroupingBased"

In [None]:
scalers = None
# op_feature_scaler, y_scaler = None, None
eval_graphs = None

In [None]:
train_configs = {
    ModelType.MLPTestGrouping.name: Config.from_dict({
        "model": "MLPTestGrouping",
        "all_seed": 42,
        "dataset_environment_str": dataset_environment_str,
        "dataset_subgraph_grouping_count": 20,
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": False,
        "batch_size": 32,
        "eval_steps": 5000,
        "learning_rate": 1e-4,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.GCNGrouping.name: Config.from_dict({
        "model": "GCNGrouping",
        "dataset_environment_str": dataset_environment_str,
        "dataset_subgraph_grouping_count": 25,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": False,
        "batch_size": 32,
        "eval_steps": 25,
        "learning_rate": 1e-4,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
}

conf: Config = train_configs[model_type.name]

In [None]:
def full_graph_feature(graph: Graph, subgraph_count: int = 10, dataset_params: Dict = {}) -> Tuple[
        Dict[str, np.ndarray], Dict]:
    subgraphs, node_id_to_group_idx = graph.subgraphs(
        subgraph_count=subgraph_count)

    feature_matrix = list()
    for subgraph in subgraphs:
        subgraph_features = list()
        for node in subgraph:
            node_feature = np.array(node.op.to_feature_array(
                mode=dataset_params.get("mode", "complex")))
            subgraph_features.append(node_feature)
        if len(subgraph_features) == 0:
            feature_matrix.append(np.zeros(1))
            continue
        subgraph_features = pad_np_vectors(subgraph_features)
        feature = np.sum(subgraph_features, axis=0)
        feature = np.append(feature, len(subgraph))
        feature_matrix.append(feature)
    adj_matrix = [
        [0.] * len(subgraphs) for _ in range(len(subgraphs))
    ]
    for curr_idx in range(len(subgraphs)):
        if curr_idx + 1 < len(subgraphs):
            adj_matrix[curr_idx][curr_idx+1] = 1.

    adj_matrix = np.array(adj_matrix)

    # adjacency_matrix = list()
    # for i, subgraph in enumerate(subgraphs):
    #     vector = np.zeros(len(subgraphs) + 1)
    #     for node in subgraph:
    #         neighbor_group_indices = list()
    #         for neighbor in node.neighbors:
    #             neighbor_group_idx = node_id_to_group_idx[neighbor.node_id]
    #             if neighbor_group_idx != i:
    #                 neighbor_group_indices.append(neighbor_group_idx)
    #         for idx in neighbor_group_indices:
    #             vector[idx] = 1
    #     adjacency_matrix.append(vector)

    feature_matrix = pad_np_vectors(feature_matrix)
    feature_matrix = np.array(feature_matrix)
    adj_matrix = np.array(adj_matrix)

    x = {
        "x_graph_id": graph.ID,
        "x_feature_matrix": feature_matrix,
        "x_adjacency_matrix": adj_matrix,
    }
    y = {
        "y_graph_id": graph.ID,
        "y_graph_duration": (graph.graph_duration,)
    }
    return x, y


def init_dataset(graphs: List[Graph]) -> MDataset:
    X = list()
    Y = list()

    feature_matrix_maxsize = 0
    adjacency_matrix_maxsize = 0

    for graph in graphs:
        x, y = full_graph_feature(graph,
                                  subgraph_count=conf.dataset_subgraph_grouping_count,
                                  dataset_params=conf.dataset_params)
        feature_matrix_size = len(x["x_feature_matrix"][0])
        adjacency_matrix_size = len(x["x_adjacency_matrix"][0])
        # print(f"feature_matrix_size: {x['x_feature_matrix'].shape}, adjacency_matrix_size: {x['x_adjacency_matrix'].shape}")
        feature_matrix_maxsize = max(
            feature_matrix_maxsize, feature_matrix_size)
        adjacency_matrix_maxsize = max(
            adjacency_matrix_maxsize, adjacency_matrix_size)

        X.append(x)
        Y.append(y)
    for x in X:
        x["x_feature_matrix"] = pad_np_vectors(
            x["x_feature_matrix"], maxsize=feature_matrix_maxsize)
        x["x_adjacency_matrix"] = pad_np_vectors(
            x["x_adjacency_matrix"], maxsize=adjacency_matrix_maxsize)

    dataset = MDataset(X, Y)
    return dataset

In [None]:
def get_scalers(ds: MDataset):
    scaler_cls = conf.dataset_normalizer_cls
    graph_feature_array = list()
    y_array = list()

    for data in ds:
        feature, label = data
        x_feature_matrix = feature["x_feature_matrix"]
        assert isinstance(x_feature_matrix, list)
        graph_feature_array.extend(x_feature_matrix)
        y_array.append(label["y_graph_duration"])

    graph_feature_array = np.array(graph_feature_array)
    y_array = np.array(y_array)

    graph_feature_scaler = scaler_cls()
    graph_feature_scaler.fit(graph_feature_array)

    y_scaler = scaler_cls()
    y_scaler.fit(y_array)
    return graph_feature_scaler, y_scaler

In [None]:
def preprocess_dataset(ds: MDataset) -> MDataset:
    y_array = list()

    graph_feature_scaler, y_scaler = scalers
    graph_feature_arrays = list()
    for data in ds:
        feature, label = data
        # x. transform for each x feature matrix. do not merge them.
        x_feature_matrix = feature["x_feature_matrix"]
        x_feature_matrix = np.array(x_feature_matrix, dtype=np.float32)

        graph_feature_array = graph_feature_scaler.transform(x_feature_matrix)
        graph_feature_arrays.append(graph_feature_array)
        # y. transform altogether
        y_array.append(label["y_graph_duration"])

    y_array = np.array(y_array, dtype=np.float32)
    y_array = y_scaler.transform(y_array)

    processed_features = list()
    processed_labels = list()
    for i, data in enumerate(ds):
        feature, label = data
        x_adjacency_matrix = np.array(
            feature["x_adjacency_matrix"], dtype=np.float32)
        processed_features.append({
            "x_graph_id": feature["x_graph_id"],
            "x_feature_matrix": torch.Tensor(graph_feature_arrays[i]),
            "x_adjacency_matrix": torch.Tensor(x_adjacency_matrix)
        })
        # print(f"x_feature_matrix: {graph_feature_arrays[i].shape}, x_adjacency_matrix: {x_adjacency_matrix.shape}")
        processed_labels.append({
            "y_graph_id": label["y_graph_id"],
            "y_graph_duration": torch.Tensor(y_array[i]),
        })

    ds = MDataset(processed_features, processed_labels)
    return ds

In [None]:
def compute_evaluate_metrics(input_batches, output_batches, eval_loss) -> Dict[str, float]:
    batches_len = len(input_batches)

    def compute_graph_duration(_logits):
        _, y_scaler = scalers
        transformed: np.ndarray = y_scaler.inverse_transform(_logits)
        duration_dim = (0, 1)
        durations = transformed[:, duration_dim[0]:duration_dim[1]].sum(axis=1)
        return durations

    graph_id_to_duration_pred = defaultdict(int)
    for idx in range(batches_len):
        inputs = input_batches[idx]
        logits = output_batches[idx]
        logits = nested_detach(logits)
        logits = logits.cpu().numpy()
        graph_ids = inputs["x_graph_id"]
        graph_durations = compute_graph_duration(logits)
        for i, graph_id in enumerate(graph_ids):
            graph_duration = graph_durations[i].item()
            graph_id_to_duration_pred[graph_id] = graph_duration
    duration_metrics = MetricUtil.compute_duration_metrics(
        eval_graphs, graph_id_to_duration_pred)
    return {"eval_loss": eval_loss, **duration_metrics}

In [None]:
def compute_durations(input_batches, output_batches, scalers, eval_graphs) -> Dict[str, float]:

    def compute_graph_duration(_logits):
        _, y_scaler = scalers
        transformed: np.ndarray = y_scaler.inverse_transform(_logits)
        duration_dim = (0, 1)
        durations = transformed[:, duration_dim[0]:duration_dim[1]].sum(axis=1)
        return durations

    batches_len = len(input_batches)
    graph_id_to_duration_pred = defaultdict(int)
    for idx in range(batches_len):
        inputs = input_batches[idx]
        logits = output_batches[idx]
        logits = nested_detach(logits)
        logits = logits.cpu().numpy()
        graph_ids = inputs["x_graph_id"]
        graph_durations = compute_graph_duration(logits)
        for i, graph_id in enumerate(graph_ids):
            graph_duration = graph_durations[i].item()
            graph_id_to_duration_pred[graph_id] = graph_duration

    y_hat, y = list(), list()
    for graph in eval_graphs:
        y_hat.append(graph_id_to_duration_pred[graph.ID])
        y.append(graph.graph_duration)
    return np.array(y_hat), np.array(y)

In [None]:

def to_device(conf: Config, features, labels):
    features["x_feature_matrix"] = features["x_feature_matrix"].to('cuda')
    features["x_adjacency_matrix"] = features["x_adjacency_matrix"].to(
        'cuda')
    labels["y_graph_duration"] = labels["y_graph_duration"].to('cuda')
    return features, labels

In [None]:

class MLPTest_GroupingModel(MModule):

    def __init__(self, input_shape, output_dimension, **kwargs):
        super().__init__(**kwargs)
        self.flatten = torch.nn.Flatten()
        self.linear1 = torch.nn.Linear(
            in_features=input_shape[0] * input_shape[1], out_features=128)
        self.output = torch.nn.Linear(128, output_dimension)
        self.loss_fn = MSELoss()

    def forward(self, X):
        X = X["x_feature_matrix"]
        X = self.flatten(X)
        X = self.linear1(X)
        Y = self.output(X)
        return Y

    def compute_loss(self, outputs, Y):
        graph_duration = Y["y_graph_duration"]
        loss = self.loss_fn(outputs, graph_duration)
        return loss


def init_MLPTestGrouping_model() -> MModule | Any:
    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    shape = len(sample_x_dict["x_feature_matrix"]), len(
        sample_x_dict["x_feature_matrix"][0])
    return MLPTest_GroupingModel(input_shape=shape,
                                 output_dimension=len(sample_y_dict["y_graph_duration"]))

In [None]:


class GCNGroupingModel(MModule):
    def __init__(self, dim_feats, dim_h, y_graph_duration_len, n_layers, dropout):
        super(GCNGroupingModel, self).__init__()
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GCNLayer(dim_feats, dim_h, F.relu, 0))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GCNLayer(dim_h, dim_h, F.relu, dropout))
        # output layer
        self.layers.append(
            GCNLayer(dim_h, y_graph_duration_len, None, dropout))
        self.loss_fn = MSELoss()

    def forward(self, X):
        adj, features = X["x_adjacency_matrix"], X["x_feature_matrix"]
        h = features
        for layer in self.layers:
            h = layer(adj, h)
        graph_duration = torch.sum(h, dim=[1])
        return graph_duration

    def compute_loss(self, outputs, Y) -> torch.Tensor:
        y_graph_duration = Y["y_graph_duration"]
        loss = self.loss_fn(outputs, y_graph_duration)
        return loss


def init_GCNGrouping_model(preprocessed_train_ds) -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        return {
            "dim_h": None,
            "n_layers": 8,
            "dropout": 0.01
        }

    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_feature_matrix"][0])
    y_graph_duration_len = len(sample_y_dict["y_graph_duration"])
    model_params = conf.model_params

    final_model_params = default_model_params()
    default_dim_h = x_node_feature_size if final_model_params.get("dim_h") is None else final_model_params.get(
        "dim_h")
    final_model_params["dim_h"] = model_params.get("dim_h", default_dim_h)
    final_model_params["n_layers"] = model_params.get(
        "n_layers", final_model_params["n_layers"])
    final_model_params["dropout"] = model_params.get(
        "dropout", final_model_params["dropout"])
    return GCNGroupingModel(
        dim_feats=x_node_feature_size,
        y_graph_duration_len=y_graph_duration_len,
        **final_model_params
    )

In [None]:
# eval_process
eval_size = 200_000
batch_size = 32
models = {
    'T4_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/grouping/T4_CPUALL/GCNGrouping/single_train2024-01-09_12-43-44/ckpt_2575.pth',
    'P4_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/grouping/P4_CPUALL/GCNGrouping/single_train2024-01-09_12-48-48/ckpt_1525.pth',
    'RTX2080Ti_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/grouping/RTX2080Ti_CPUALL/GCNGrouping/single_train2024-01-09_12-56-14/ckpt_2350.pth',
    'RTX3080Ti_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/grouping/RTX3080Ti_CPUALL/GCNGrouping/single_train2024-01-10_01-57-12/ckpt_2825.pth',
}
res = []
for dataset_environment_str, model_ckpt in models.items():
    eval_graphs = load_graphs(dataset_environment_str,
                              train_or_eval="eval",
                              use_dummy=False,
                              max_row=eval_size)

    eval_ds = init_dataset(eval_graphs)
    scalers = load_scalers_pkl(dataset_environment_str, method_prefix, 'train',
                               'Standard')
    # op_feature_scaler, y_scaler = scalers
    preprocessed_eval_ds = preprocess_dataset(eval_ds)
    model = torch.load(model_ckpt)
    model.eval()
    ds = preprocessed_eval_ds
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False)
    input_batchs, output_batchs = list(), list()
    for data in dl:

        features, labels = data
        features, labels = to_device("cuda", features, labels)
        with torch.no_grad():
            outputs = model(features)
        input_batchs.append(features)
        output_batchs.append(outputs)
    y_hat, y = compute_durations(
        input_batchs, output_batchs, scalers, eval_graphs)
    rand_y, rand_y_hat = [], []
    models_y, models_y_hat = [], []
    for i in range(len(eval_graphs)):
        graph = eval_graphs[i]
        if 'rand' in graph.ID:
            rand_y.append(y[i])
            rand_y_hat.append(y_hat[i])
        else:
            models_y.append(y[i])
            models_y_hat.append(y_hat[i])
    res.append(
        {
            'dataset': dataset_environment_str,
            'mre': MetricUtil.mre(y, y_hat),
            'rmse': MetricUtil.rmse(y, y_hat),
            'rand_mre': MetricUtil.mre(rand_y, rand_y_hat),
            'rand_rmse': MetricUtil.rmse(rand_y, rand_y_hat),
            'models_mre': MetricUtil.mre(models_y, models_y_hat),
            'models_rmse': MetricUtil.rmse(models_y, models_y_hat),
        }
    )

In [None]:
import pandas as pd
import os
df = pd.DataFrame(res)
df.to_csv(os.path.join('/root/guohao/DLT-perf-model/notebooks/exp/total_compare',
          f'grouping.result.csv'), index=False)

In [None]:
# train process
# envs = ['T4_CPUALL', 'P4_CPUALL', 'RTX2080Ti_CPUALL', 'RTX3080Ti_CPUALL',
#         'T4_CPU100', 'P4_CPU100', 'RTX2080Ti_CPU100', 'T4_CPU80', 'P4_CPU80', 'RTX2080Ti_CPU80']
envs = ['T4_CPUALL', 'P4_CPUALL', 'RTX2080Ti_CPUALL', 'RTX3080Ti_CPUALL',]
for env in envs:
    train_configs = {
        ModelType.MLPTestGrouping.name: Config.from_dict({
            "model": "MLPTestGrouping",
            "all_seed": 42,
            "dataset_environment_str": env,
            "dataset_subgraph_grouping_count": 20,
            "dataset_params": {
                "duration_summed": False,
            },
            "dataset_dummy": False,
            "batch_size": 128,
            "eval_steps": 5000,
            "learning_rate": 1e-4,
            "epochs": 200,
            "optimizer": "Adam",
            "meta_configs": {
                "learning_rate": 0.005,
                "meta_learning_rate": 0.001,
                "meta_train_steps": 1000,
                "meta_fast_adaption_step": 5,
                "meta_dataset_train_environment_strs": [dataset_environment_str],
                "meta_dataset_eval_environment_strs": [dataset_environment_str],
            },
        }),
        ModelType.GCNGrouping.name: Config.from_dict({
            "model": "GCNGrouping",
            "dataset_environment_str": env,
            "dataset_subgraph_grouping_count": 25,
            "all_seed": 42,
            "dataset_params": {
                "duration_summed": False,
            },
            "dataset_dummy": False,
            "batch_size": 32,
            "eval_steps": 25,
            "learning_rate": 1e-4,
            "epochs": 100,
            "optimizer": "Adam",
            "meta_configs": {
                "learning_rate": 0.005,
                "meta_learning_rate": 0.001,
                "meta_train_steps": 1000,
                "meta_fast_adaption_step": 5,
                "meta_dataset_train_environment_strs": [dataset_environment_str],
                "meta_dataset_eval_environment_strs": [dataset_environment_str],
            },
        }),
    }
    eval_graphs = load_graphs(env,
                              train_or_eval="eval",
                              use_dummy=dummy,
                              max_row=200_000)
    train_graphs = load_graphs(env,
                               train_or_eval="train",
                               use_dummy=dummy,
                               max_row=1000_000)
    train_ds = init_dataset(train_graphs)
    eval_ds = init_dataset(eval_graphs)
    scalers = get_scalers(train_ds)
    save_scalers_pkl(scalers, env, method_prefix, 'train',
                     conf.dataset_normalization)
    # op_feature_scaler, y_scaler = scalers

    preprocessed_train_ds = preprocess_dataset(train_ds)
    preprocessed_eval_ds = preprocess_dataset(eval_ds)

    model_type = ModelType.GCNGrouping
    conf = train_configs[model_type.name]

    model = init_GCNGrouping_model(preprocessed_train_ds)
    model = model.to(conf.device)
    single_train_loop(model_type, conf, preprocessed_train_ds,
                      preprocessed_eval_ds, model, compute_evaluate_metrics, to_device, suffix="grouping")