# 1. 可行性预测器的应用

- 加载可行性预测器模型
- 解析装箱实例
- 将实例的特征传递给预测器，得到预测结果

In [20]:
import torch as T
import pickle as pkl
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
from sklearn.metrics import confusion_matrix
from collections import defaultdict

## 步骤一：加载可行性预测器

In [21]:
predictor = T.jit.load('Feasibility_Predictor.pt')
device = T.device("cuda" if T.cuda.is_available() else "cpu")
predictor  = predictor.to(device)

In [22]:
# 打印模型结构
print(predictor)

# for name, parameters in predictor.named_parameters():
#     print(name, ':', parameters.size())
state_dict = predictor.state_dict()
for name, param in state_dict.items():
    print(name, param.size())


RecursiveScriptModule(
  original_name=FFNNFeasibilityChecker
  (linear1): RecursiveScriptModule(original_name=Linear)
  (linear2): RecursiveScriptModule(original_name=Linear)
  (linear3): RecursiveScriptModule(original_name=Linear)
)
linear1.bias torch.Size([128])
linear1.weight torch.Size([128, 17])
linear2.bias torch.Size([32])
linear2.weight torch.Size([32, 128])
linear3.bias torch.Size([1])
linear3.weight torch.Size([1, 32])


## 步骤二：解析装箱实例

In [23]:
def extract_manual_feature(items, bin_width, bin_height):
    """
    The method is to extract features from a set of items
    Parameters:
    ----------
    items (a list of np.arrays)
    bin_width (int): the width of a bin
    bin_height (int): the height of a bin
    Returns (a list of metrics)
    -------
    notes:
    we extract five types of features:
    1) the ratio between width and height, and four statistical metrics (mean, min, max, std) as the features
    2) the ratio between width and the bin width, four statistical metrics (mean, min, max, std) as the features
    3) the ratio between height and the bin height, four statistical metrics (mean, min, max, std) as the features
    4) the ratio between area of a item and the bin capacity, four statistical metrics (mean, min, max, std) as the features
    5) the ratio between total area of the items and the bin capacity, a single metric
    """
    MAX_W_H_RATIO = 18 # 定义了一个常量，用于归一化
    capacity = bin_width * bin_height
    w_h_ratios = np.asarray(list(map(lambda x: x[0] / x[1], items))) / MAX_W_H_RATIO
    w_bin_ratios = np.asarray(list(map(lambda x: x[0] / bin_width, items)))
    h_bin_ratios = np.asarray(list(map(lambda x: x[1] / bin_height, items)))
    area_capacity_ratios = np.asarray(list(map(lambda x: (x[0] * x[1]) / capacity, items)))
    total_area = np.asarray(list(map(lambda x: x[0] * x[1], items))).sum()
    w_h_features = [w_h_ratios.mean(), w_h_ratios.min(), w_h_ratios.max(), w_h_ratios.std()]
    w_bin_features = [w_bin_ratios.mean(), w_bin_ratios.min(), w_bin_ratios.max(), w_bin_ratios.std()]
    h_bin_features = [h_bin_ratios.mean(), h_bin_ratios.min(), h_bin_ratios.max(), h_bin_ratios.std()]
    area_capacity_features = [area_capacity_ratios.mean(), area_capacity_ratios.min(),
                              area_capacity_ratios.max(), area_capacity_ratios.std()]
    total_area_capacity_features = [total_area / capacity]
    extracted_features = [w_h_features, w_bin_features, h_bin_features, area_capacity_features,
                          total_area_capacity_features]
    result = []
    for x in extracted_features:
        result.extend(x)
    return result

In [24]:
# Demo instance A, result: infeasible 
# width, height
# 6,4
# 17,1
# 8,1
# 15,2
# 2,6
# 13,1
# 4,7
# 7,4
# 6,4
# 2,5

# Demo instance B, result: feasible:
# 2,9
# 3,4
# 2,6
# 13,1
# 4,7
# 7,4
# 6,4
# 2,5
instance = [
           # width, height
            [2,9], 
            [3,4],
            [2,6],
            [13,1],
            [4,7],
            [7,4],
            [6,4],
            [2,5]
            ]
features = extract_manual_feature(instance, bin_width = 20, bin_height = 10)

## 步骤三：将实例的特征传递给预测器，得到预测结果

In [25]:
predictor.eval()

RecursiveScriptModule(
  original_name=FFNNFeasibilityChecker
  (linear1): RecursiveScriptModule(original_name=Linear)
  (linear2): RecursiveScriptModule(original_name=Linear)
  (linear3): RecursiveScriptModule(original_name=Linear)
)

In [26]:
features = T.tensor(features, dtype=T.float).to(device)
features

tensor([0.1287, 0.0123, 0.7222, 0.2262, 0.2438, 0.1000, 0.6500, 0.1775, 0.5000,
        0.1000, 0.9000, 0.2236, 0.0906, 0.0500, 0.1400, 0.0352, 0.7250])

In [27]:
with T.no_grad():
    y = predictor(features)
y
if y > 0.5:
    print("infeasible")
else:
    print("feasible")
print(y)

feasible
tensor([0.0026])


# 2. 关于如何解析.pkl文件并在测试样本上运行可行性预测器的说明

In [28]:
# 读取.pkl文件
with open("./training_samples/TrainingSamples.pkl", "rb") as fp:
    training_dataset = pkl.load(fp)

In [29]:
print(training_dataset[0])

{'key': '2L_VRPTW-25-C1-1-PC2_Batch4.csv0_0.tr', 'label': 0, 'bin': array([20, 10]), 'items': array([[ 2,  6],
       [13,  1],
       [ 4,  7],
       [ 7,  4],
       [ 6,  4],
       [ 2,  5]]), 'packing_class': 'PC2'}


## 自定义数据集

In [30]:
class ItemDataset(Dataset):
    def __init__(self, dataset_file_path):
        super(ItemDataset, self).__init__()
        # open the .pkl file, the dataset 
        with open(dataset_file_path, "rb") as fp:
            dataset = pkl.load(fp)
        self.dataset = {k: v for k, v in dataset.items() if v['label'] == 1 or v['label'] == 0}
    
    def __len__(self):
        """
        返回数据集的大小
        """
        return len(self.dataset.keys())

    def __getitem__(self, i):
        """
        返回第i个样本的特征和标签
        """
        sample_idx = list(self.dataset.keys())[i]
        sample = self.dataset[sample_idx]
        items, label = sample['items'], sample['label']
        max_width, max_height = sample['bin']
        manual_features = np.asarray(extract_manual_feature(items, max_width, max_height))
        return manual_features, label

In [31]:
# dataset = ItemDataset("./testing_samples/HybridClasses.pkl")
# print(dataset.dataset.keys())
# print(list(dataset.dataset.keys())[8])

## 为模型准备data loader 

In [32]:
def create_loaders(dataset, samplers):
    test_loader = DataLoader(dataset, batch_size=1, sampler=samplers["test"])
    return test_loader

def create_samplers(dataset, val_size=3000, split=True):
    """Create samplers to randomly sample from the dataset
    """
    dataset_idxs = list(range(len(dataset)))
    return {"test": SubsetRandomSampler(dataset_idxs)}

def eval(model, dataloader, criterion, device):
    """
    在给定的模型上进行评估，并计算测试损失和准确率，以验证模型在测试集上的性能。
    :param model: 神经网络模型
    :param dataloader: 数据加载器
    :param criterion: 损失函数
    :param device: 设备
    :return: 平均测试损失，平均测试准确率、所有真实标签和所有预测标签
    """
    model.eval()
    test_loss = 0 # 测试损失
    total_correct = 0 # 正确的样本数
    n_sample = 0 # 样本总数
    all_y = [] # 所有真实标签
    all_pred = [] # 所有预测标签
    with T.no_grad():
        for batch in dataloader:
            x, y = batch # 获取输入和标签
            x, y = x.float().to(device), y.float().to(device)
            y_pred = model(x) # 模型对输入数据 x 进行预测
            y_pred = y_pred.reshape(-1) # 将预测结果展平
            loss = criterion(y_pred, y)
            all_y.append(y)
            all_pred.append((y_pred >= 0.5).float()) # 将预测结果转换为0或1
            test_loss += (x.shape[0] * loss.item()) # 计算测试损失
            total_correct += ((y_pred >= 0.5).float() == y).sum().item()
            n_sample += x.shape[0]
    return test_loss / n_sample, total_correct / n_sample, all_y, all_pred

def inference_mode(model, test_loader, criterion, device):
    """
    :param model: 神经网络模型
    :param test_loader: 测试数据集
    :param criterion: 损失函数
    :param device: 设备
    :return: 
    """
    test_loss, test_acc, all_y, all_pred = eval(model, test_loader, criterion, device)
    # 打印测试损失和测试准确率，保留三位小数
    print(f"Test loss {test_loss:.3f} Test acc {test_acc:.3f}")
    # 将Tensor元素转换为Python标量
    all_y = [x.item() for x in all_y]
    all_pred = [x.item() for x in all_pred]
    # 打印混淆矩阵
    print(confusion_matrix(all_y, all_pred))

## 对测试样本进行推断

In [33]:
def main(dataset_file_path):
    """
    
    :param:
    :return:
    """
    dataset = ItemDataset(dataset_file_path)
    samplers = create_samplers(dataset, split=False)
    test_loader = create_loaders(dataset, samplers)
    # feature_train_loader
    # Load model
    device = T.device("cuda" if T.cuda.is_available() else "cpu")
    print("Loading the model")
    model = T.jit.load('Feasibility_Predictor.pt')
    model = model.to(device)
    criterion = nn.BCELoss()
    # Train or Infer
    inference_mode(model,test_loader,criterion,device)

## 在混合测试样本上运行测试模型，其中包括所有包装类的实例

In [34]:
main(dataset_file_path = "./testing_samples/HybridClasses.pkl")

Loading the model
Test loss 0.121 Test acc 0.958
[[19773  1074]
 [  662 20185]]


In [35]:
main(dataset_file_path = "./testing_samples/PackingClass2.pkl")

Loading the model
Test loss 0.268 Test acc 0.882
[[5219 1275]
 [ 176 5588]]


In [36]:
main(dataset_file_path = "./testing_samples/PackingClass3.pkl")

Loading the model
Test loss 0.153 Test acc 0.945
[[4773  214]
 [ 337 4650]]


In [37]:
main(dataset_file_path = "./testing_samples/PackingClass4.pkl")

Loading the model
Test loss 0.235 Test acc 0.907
[[207   3]
 [ 36 174]]


In [38]:
main(dataset_file_path = "./testing_samples/PackingClass5.pkl")

Loading the model
Test loss 0.009 Test acc 1.000
[[62720    14]
 [    0     0]]
