In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from abc import abstractmethod
from collections import defaultdict
from functools import lru_cache
from itertools import count
from typing import List, Dict
from typing import Tuple, Any
from sklearn import ensemble
from sklearn.metrics import mean_squared_error
from torch.nn import MSELoss, LSTM, GRU, RNN, L1Loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
from logger import init_logging
from base_module import MModule
from data import MDataset, Graph, load_graphs
from importlib import reload
from config import Config
import config
from data import MDataset, Graph, GraphNode, load_graphs, save_dataset_pkl, load_dataset_pkl, save_scalers_pkl, load_scalers_pkl
import data
from base_module import MModule, pad_np_vectors
import base_module
from executor import single_train_loop, nested_detach, grid_search_loop
import executor
from objects import ModelType
import objects
from metric import MetricUtil
import metric
from logger import init_logging, logging
import logger
import gcn
from gcn import GCNLayer
import transformer
from transformer import TransformerModel
reload(config)
reload(data)
reload(base_module)
reload(executor)
reload(objects)
reload(metric)
reload(logger)
reload(gcn)
reload(transformer)
init_logging()

In [None]:
dataset_environment_str = "T4_CPUALL"
normalizer_cls = StandardScaler  # MinMaxScaler
batch_size = 128
method_prefix = "SubgraphBased"
scalers = None

In [None]:
def subgraph_features(graph: Graph, subgraph_node_size: int = 10, step: int = 5, dataset_params: Dict = {}) -> \
        Tuple[List[Dict], List[Dict]]:
    subgraphs, _ = graph.subgraphs(
        subgraph_node_size=subgraph_node_size, step=step)
    X, Y = list(), list()

    def subgraph_feature(nodes: List[GraphNode]):
        feature_matrix = list()
        for node in nodes:
            feature = node.op.to_feature_array(
                mode=dataset_params.get("mode", "complex"))
            feature = np.array(feature)
            feature_matrix.append(feature)

        feature_matrix = pad_np_vectors(feature_matrix)
        feature_matrix = np.array(feature_matrix)

        adj_matrix = [
            [0.] * len(nodes) for _ in range(len(nodes))
        ]
        for curr_idx, node in enumerate(nodes):
            if curr_idx + 1 < len(nodes):
                adj_matrix[curr_idx][curr_idx+1] = 1.

        adj_matrix = np.array(adj_matrix)
        # x
        feature = {
            "x_graph_id": graph.ID,
            "x_node_ids": "|".join([str(node.node_id) for node in nodes]),
            "x_subgraph_feature": feature_matrix,
            "x_adj_matrix": adj_matrix
        }

        # y
        subgraph_duration = sum(node.duration + node.gap for node in subgraph)
        nodes_durations = list()
        for node in subgraph:
            node_duration_label = (
                node.duration, node.gap
            )
            nodes_durations.append(node_duration_label)

        label = {
            "y_graph_id": graph.ID,
            "y_nodes_durations": nodes_durations,
            "y_subgraph_durations": (subgraph_duration,)
        }

        return feature, label

    for i, subgraph in enumerate(subgraphs):
        x, y = subgraph_feature(subgraph)
        X.append(x)
        Y.append(y)

    return X, Y


def init_dataset(graphs: List[Graph]) -> MDataset:
    X = list()
    Y = list()

    subgraph_feature_maxsize = 0

    for graph in graphs:
        X_, Y_ = subgraph_features(graph=graph,
                                   subgraph_node_size=12,
                                   step=3,
                                   dataset_params={
                                       "duration_summed": False
                                   })
        for x in X_:
            subgraph_feature_size = len(x["x_subgraph_feature"][0])
            subgraph_feature_maxsize = max(
                subgraph_feature_maxsize, subgraph_feature_size)

        X.extend(X_)
        Y.extend(Y_)

    for x in X:
        x["x_subgraph_feature"] = pad_np_vectors(
            x["x_subgraph_feature"], maxsize=subgraph_feature_maxsize)

    dataset = MDataset(X, Y)
    return dataset

In [None]:

def preprocess_dataset(ds: MDataset, scalers) -> MDataset:
    x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers

    processed_features = list()
    processed_labels = list()

    for data in ds:
        feature, label = data
        x_subgraph_feature = feature["x_subgraph_feature"]
        assert isinstance(x_subgraph_feature, list)
        x_subgraph_feature = np.array(x_subgraph_feature).astype(np.float32)
        transformed_x_subgraph_feature = x_subgraph_feature_scaler.transform(
            x_subgraph_feature)

        x_adj_matrix = feature["x_adj_matrix"]
        x_adj_matrix = np.array(x_adj_matrix).astype(np.float32)

        y_nodes_durations = label["y_nodes_durations"]
        assert isinstance(y_nodes_durations, list)
        y_nodes_durations = np.array(y_nodes_durations).astype(np.float32)
        transformed_y_nodes_durations = y_nodes_durations_scaler.transform(
            y_nodes_durations)

        y_subgraph_durations = label["y_subgraph_durations"]
        y_subgraph_durations_array = (y_subgraph_durations,)
        y_subgraph_durations_array = y_subgraph_durations_scaler.transform(
            y_subgraph_durations_array)
        transformed_y_subgraph_durations = y_subgraph_durations_array[0]

        processed_features.append({
            "x_graph_id": feature["x_graph_id"],
            "x_node_ids": feature["x_node_ids"],
            "x_subgraph_feature": torch.Tensor(transformed_x_subgraph_feature),
            "x_adj_matrix": torch.Tensor(x_adj_matrix)
        })

        processed_labels.append({
            "y_graph_id": label["y_graph_id"],
            "y_nodes_durations": torch.Tensor(transformed_y_nodes_durations),
            "y_subgraph_durations": torch.Tensor(transformed_y_subgraph_durations)
        })

    ds = MDataset(processed_features, processed_labels)
    return ds

In [None]:

class LSTMModel(MModule):
    def __init__(self, feature_size, nodes_durations_len, num_layers, bidirectional, **kwargs):
        super().__init__(**kwargs)
        self.lstm = LSTM(input_size=feature_size, hidden_size=feature_size, num_layers=num_layers, batch_first=True,
                         bidirectional=bidirectional)
        num_directions = 2 if bidirectional else 1
        self.project = torch.nn.Linear(
            in_features=feature_size * num_directions, out_features=nodes_durations_len)
        self.loss_fn = L1Loss()

    @staticmethod
    def grid_search_model_params() -> Dict[str, List[Any]]:
        return {
            "num_layers": [4, 6, 8],
            "bidirectional": [True, False],
            "learning_rate": [1e-4, 1e-5],
            'batch_size': [32, 64],
            'epochs': [20],
            'optimizer': ['Adam', 'SGD'],
        }

    def forward(self, X):
        X = X["x_subgraph_feature"]
        out, _ = self.lstm(X)
        Y = self.project(out)
        return Y

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_nodes_durations"]
        loss = self.loss_fn(outputs, node_durations)
        return loss

In [None]:
def to_device(device, features, labels):
    features['x_subgraph_feature'] = features['x_subgraph_feature'].to(
        device)
    features['x_adj_matrix'] = features['x_adj_matrix'].to(device)
    labels['y_nodes_durations'] = labels['y_nodes_durations'].to(device)
    labels['y_subgraph_durations'] = labels['y_subgraph_durations'].to(
        device)
    return features, labels

In [None]:
def compute_graph_nodes_durations(outputs_, node_ids_str_, scalers):
    x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers
    node_to_durations = defaultdict(list)
    for i, output_ in enumerate(outputs_):
        node_ids = node_ids_str_[i]
        node_ids_ = node_ids.split("|")
        assert len(output_) == len(node_ids_)
        transformed: np.ndarray = y_nodes_durations_scaler.inverse_transform(
            output_)
        for i, node_id in enumerate(node_ids_):
            node_to_durations[node_id].append(np.sum(transformed[i]))
    node_to_duration = {k: np.average(v)
                        for k, v in node_to_durations.items()}
    return node_to_duration


def compute_durations(input_batches, output_batches, scalers, eval_graphs):
    graph_id_to_node_to_duration = defaultdict(lambda: defaultdict(list))
    for inputs, outputs in zip(input_batches, output_batches):
        outputs = nested_detach(outputs)
        outputs = outputs.cpu().numpy()
        graph_ids = inputs["x_graph_id"]
        graph_groups = defaultdict(list)
        for i, graph_id in enumerate(graph_ids):
            graph_groups[graph_id].append(i)

        for graph_id, indices in graph_groups.items():
            group_x_node_ids = [v for i, v in enumerate(
                inputs["x_node_ids"]) if i in indices]
            group_outputs = [v for i, v in enumerate(outputs) if i in indices]
            node_to_durations = compute_graph_nodes_durations(
                group_outputs, group_x_node_ids, scalers)
            for node, duration in node_to_durations.items():
                graph_id_to_node_to_duration[graph_id][node].append(duration)
    graph_id_to_duration_pred = dict()
    # average
    for graph_id, node_to_duration in graph_id_to_node_to_duration.items():
        duration_pred = 0
        for _, duration_preds in node_to_duration.items():
            duration_pred += np.average(duration_preds)
        graph_id_to_duration_pred[graph_id] = duration_pred

    # get y and y_pred
    y_hat, y = list(), list()
    for graph in eval_graphs:
        pred = graph_id_to_duration_pred[graph.ID]
        ground_truth = graph.graph_duration
        y_hat.append(pred)
        y.append(ground_truth)
    return np.array(y_hat), np.array(y)

In [None]:
eval_size = 200000
models = {
    'T4_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/subgraph/T4_CPUALL/LSTM/single_train2024-01-08_11-12-26/ckpt_255000.pth',
    'P4_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/subgraph/P4_CPUALL/LSTM/single_train2024-01-11_09-06-17/ckpt_160000.pth',
    'RTX2080Ti_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/subgraph/RTX2080Ti_CPUALL/LSTM/single_train2023-12-21_17-13-15/ckpt_255000.pth',
    'RTX3080Ti_CPUALL': '/root/guohao/DLT-perf-model/notebooks/ckpts/subgraph/RTX3080Ti_CPUALL/LSTM/single_train2024-01-10_02-48-43/ckpt_255000.pth',
}
res = []
for dataset_environment_str, model_ckpt in models.items():
    eval_graphs = load_graphs(dataset_environment_str,
                              train_or_eval="eval",
                              use_dummy=False,
                              max_row=eval_size)

    eval_ds = init_dataset(eval_graphs)
    scalers = load_scalers_pkl(dataset_environment_str, method_prefix, 'train',
                               'Standard')
    preprocessed_eval_ds = preprocess_dataset(eval_ds, scalers)
    model = torch.load(model_ckpt)
    model.eval()
    ds = preprocessed_eval_ds
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False)
    input_batchs, output_batchs = list(), list()
    for data in dl:
        features, labels = data
        features, labels = to_device("cuda", features, labels)
        with torch.no_grad():
            outputs = model(features)
        input_batchs.append(features)
        output_batchs.append(outputs)
    y_hat, y = compute_durations(
        input_batchs, output_batchs, scalers, eval_graphs)
    rand_y, rand_y_hat = [], []
    models_y, models_y_hat = [], []
    for i in range(len(eval_graphs)):
        graph = eval_graphs[i]
        if 'rand' in graph.ID:
            rand_y.append(y[i])
            rand_y_hat.append(y_hat[i])
        else:
            models_y.append(y[i])
            models_y_hat.append(y_hat[i])
    res.append(
        {
            'dataset': dataset_environment_str,
            'mre': MetricUtil.mre(y, y_hat),
            'rmse': MetricUtil.rmse(y, y_hat),
            'rand_mre': MetricUtil.mre(rand_y, rand_y_hat),
            'rand_rmse': MetricUtil.rmse(rand_y, rand_y_hat),
            'models_mre': MetricUtil.mre(models_y, models_y_hat),
            'models_rmse': MetricUtil.rmse(models_y, models_y_hat),
        }
    )

In [None]:
import pandas as pd
import os
df = pd.DataFrame(res)
df.to_csv(os.path.join('/root/guohao/DLT-perf-model/notebooks/exp/total_compare',
          f'subgraph.result.csv'), index=False)

In [None]:
def plot_predict(y, y_hat):
    import matplotlib.pyplot as plt
    import numpy as np

    data = []
    for i in range(len(y)):
        data.append([y[i]/1000, y_hat[i]/1000])

    # 线性拟合计算参数
    SumXiYi = 0
    SumXi = 0
    SumYi = 0
    SumXi2 = 0
    PointX = []
    PointY = []
    for item in range(len(data)):
        XiYi = data[item][0] * data[item][1]
        SumXiYi += XiYi
        SumXi += data[item][0]
        SumYi += data[item][1]
        SumXi2 += data[item][0] * data[item][0]
        PointX.append(data[item][0])
        PointY.append(data[item][1])

    w = (len(data) * SumXiYi - SumXi * SumYi) / \
        (len(data) * SumXi2 - SumXi * SumXi)
    b = (SumXi2 * SumYi - SumXiYi * SumXi) / \
        (len(data) * SumXi2 - SumXi * SumXi)
    w = round(w, 2)
    b = round(b, 2)

    X = np.arange(0, 2000, 0.1)
    Y = w * X + b

    plt.plot(X, Y, color='red')
    plt.scatter(PointX, PointY, s=1,  color='blue')  # 散点图
    plt.xlabel('measured time (ms)')
    plt.ylabel('predicted time (ms)')

    fig = plt.figure()  # 务必保留此行，设置绘图对象

    fig.show()

    return w, b, fig  # 务必按此顺序返回


y_, y_hat_ = [], []
for i in range(len(y)):
    if y[i] < 2000000 and y_hat[i] < 2000000:
        y_.append(y[i])
        y_hat_.append(y_hat[i])
print(len(y_), len(y_hat_))
plot_predict(y_, y_hat_)