# Datasets

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class MyDataset1(Dataset):
    def __init__(self, input_file='conductance_datasets.npy', label_file='Y_labels.npy'):
        datas = np.load(input_file)
        self.ori_in_shape = datas.shape
        labels = np.load(label_file)
        self.out_shape = labels.shape
        datas = datas.reshape(labels.shape[0], -1)
        self.in_shape = datas.shape
        self.datas = datas
        self.labels = labels

    def __getitem__(self, idx):
        return {"input": torch.from_numpy(self.datas[idx]).float(), "label":  torch.from_numpy(self.labels[idx]).float()}

    def __len__(self):
        return len(self.datas)

if __name__ == '__main__':
    dataset = MyDataset1(input_file='../conductance_datasets.npy', label_file='../Y_labels.npy')
    train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
    for i, d in enumerate(train_dataloader):
        print(i, d['input'].shape, d['label'].shape)

# Model

In [None]:
import torch

class MyNet(torch.nn.Module):
    def __init__(self, seq_num=4500, out_dim=5, hidden_dim=1024) -> None:
        super(MyNet, self).__init__()
        self.mlp = torch.nn.Sequential(torch.nn.Linear(seq_num, hidden_dim), torch.nn.ReLU6(), torch.nn.BatchNorm1d(hidden_dim),
                                       torch.nn.Linear(hidden_dim, hidden_dim // 2), torch.nn.ReLU6(), torch.nn.BatchNorm1d(hidden_dim // 2),
                                       torch.nn.Linear(hidden_dim // 2, hidden_dim // 4), torch.nn.ReLU6(), torch.nn.BatchNorm1d(hidden_dim // 4),
                                       torch.nn.Linear(hidden_dim // 4, hidden_dim // 8), torch.nn.ReLU6(), torch.nn.BatchNorm1d(hidden_dim // 8),
                                       torch.nn.Linear(hidden_dim // 8, out_dim))

    def forward(self, x):
        return self.mlp(x.squeeze())
        
if __name__ == '__main__':
    net = MyNet(seq_num=4500).cuda()
    # input = torch.rand([16, 4500]).cuda()
    # out = net(input)
    # print(out.shape)

    # 输出各层的参数数量
    for name, param in net.named_parameters():
        print(f"Layer: {name}, Parameters: {param.numel()}")






# train

In [15]:
from model import MyNet
from dataset import MyDataset1
from torch.utils.data import DataLoader
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np


writer = SummaryWriter()
def train():
    train_dataset = MyDataset1(input_file='/home/cusps/桌面/ML_disorder/data0/train/train_data.npy', 
                         label_file='/home/cusps/桌面/ML_disorder/data0/train/train_labels.npy')

    vali_dataset = MyDataset1(input_file='/home/cusps/桌面/ML_disorder/data0/vali/vali_data.npy', 
                         label_file='/home/cusps/桌面/ML_disorder/data0/vali/vali_labels.npy')
    
    net = MyNet(seq_num=train_dataset.in_shape[1], out_dim=train_dataset.out_shape[1], hidden_dim=1024).cuda()
    
    # net.load_state_dict(torch.load('model_weights.pth', weights_only=True))
    # net.train()
    
    train_dataloader = DataLoader(train_dataset, batch_size=16384 * 2, shuffle=True)
    vali_dataloader = DataLoader(vali_dataset, batch_size=16384 * 2, shuffle=True)

    
    optimizer = torch.optim.Adam(params=net.parameters(), lr=1e-4, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.99, patience=100, threshold=1e-4, 
                                                           threshold_mode='rel',cooldown=100, min_lr=1e-20)
    epoch = 1000

    record_loss = []
    for eid in range(epoch):
        all_loss = []
        vali_loss = []

        net_train = net.train()
        for i, batch in enumerate(train_dataloader):
            input = batch['input'].unsqueeze(-1).cuda()
            out = net(input)
            # loss = torch.nn.MSELoss()(out, batch['label'].cuda())
            loss = torch.nn.functional.l1_loss(out, batch['label'].cuda())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            all_loss.append(loss.detach().cpu().item())

            
        net_vali = net.eval()
        for j, batch in enumerate(vali_dataloader):
            input = batch['input'].unsqueeze(-1).cuda()
            out = net(input)
            # loss = torch.nn.MSELoss()(out, batch['label'].cuda())
            loss = torch.nn.functional.l1_loss(out, batch['label'].cuda())
            vali_loss.append(loss.detach().cpu().item())

            
        print(f"Epoch {eid}: {np.mean(all_loss)} {np.mean(vali_loss)} {optimizer.state_dict()['param_groups'][0]['lr']}")
        scheduler.step(np.mean(all_loss))
        writer.add_scalar("loss: ",np.mean(all_loss), global_step=eid)
        writer.add_scalar("learn rate: ",optimizer.state_dict()['param_groups'][0]['lr'], global_step=eid)

        scheduler.step(np.mean(vali_loss))
        
        record_loss.append(np.mean(vali_loss))
        writer.add_scalar("validation loss: ",np.mean(vali_loss), global_step=eid)


        
        if  record_loss[eid] == min(record_loss):
            torch.save(net.state_dict(), 'model_weights.pth')
            print("save model")

if __name__ == '__main__':
    train()



Epoch 0: 1.251953363418579 1.1906276941299438 0.0001
save model
Epoch 1: 0.9653822779655457 1.1876380443572998 0.0001
save model
Epoch 2: 0.8310081958770752 1.1843574047088623 0.0001
save model
Epoch 3: 0.7438316345214844 1.1806236505508423 0.0001
save model
Epoch 4: 0.6741254925727844 1.1764767169952393 0.0001
save model
Epoch 5: 0.6137790083885193 1.171654462814331 0.0001
save model
Epoch 6: 0.5664411187171936 1.1660360097885132 0.0001
save model
Epoch 7: 0.5265085697174072 1.1596918106079102 0.0001
save model
Epoch 8: 0.49061957001686096 1.152283787727356 0.0001
save model
Epoch 9: 0.45945292711257935 1.1435526609420776 0.0001
save model
Epoch 10: 0.4300873875617981 1.1335148811340332 0.0001
save model
Epoch 11: 0.4030851423740387 1.122032880783081 0.0001
save model
Epoch 12: 0.3788587152957916 1.1085395812988281 0.0001
save model
Epoch 13: 0.357928067445755 1.0928807258605957 0.0001
save model
Epoch 14: 0.33894872665405273 1.0758581161499023 0.0001
save model
Epoch 15: 0.3218072056

# test

In [14]:
from model import MyNet
from dataset import MyDataset1
import torch
import numpy as np

def test():
    # 加载测试数据集
    test_dataset = MyDataset1(input_file='/home/cusps/桌面/ML_disorder/data0/test/test_data.npy',
                              label_file='/home/cusps/桌面/ML_disorder/data0/test/test_labels.npy')
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16384, shuffle=False)

    # 初始化模型
    net = MyNet(seq_num=test_dataset.in_shape[1], out_dim=test_dataset.out_shape[1], hidden_dim=1024).cuda().eval()
    
    # 加载训练好的权重
    net.load_state_dict(torch.load('model_weights.pth', weights_only=True))
    
    # 测试模型
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for batch in test_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            predictions = net(input)
            all_predictions.append(predictions.cpu().numpy())
            all_labels.append(batch['label'].numpy())
    
    # 合并所有批次的预测结果和真实标签
    all_predictions = np.vstack(all_predictions)
    all_labels = np.vstack(all_labels)
    

    # mse_loss = np.mean((all_predictions - all_labels) ** 2)
    # print(f"Test MSE Loss: {mse_loss}")

    l1_loss = np.mean(np.abs(all_predictions - all_labels))
    print(f"Test l1 Loss: {l1_loss}")
    
    
    # 可选：保存预测结果
    np.save('test_predictions.npy', all_predictions)
    np.save('test_labels.npy', all_labels)


if __name__ == '__main__':
    test()



Test l1 Loss: 0.18247239291667938


In [10]:
'''inner product fidelity'''

import numpy as np

exact_label = np.load("test_labels.npy")
prediction_label = np.load("test_predictions.npy")


F_list = []
def calculate_F(A, B):
    return np.dot(A, B)/np.sqrt(np.dot(A, A) * np.dot(B, B))

for i in range(exact_label.shape[0]):
    F = calculate_F(exact_label[i], prediction_label[i])
    F_list.append(F)

print(min(F_list),max(F_list),np.mean(F_list))


0.5452303 0.9997776 0.9869819


In [None]:
'''所有F的分布'''

%matplotlib tk
import matplotlib.pyplot as plt
import numpy as np
plt.figure()
plt.plot(np.arange(len(F_list)),F_list,'o')
plt.xlim(0,1000)
plt.title('F distribution')
plt.ylabel('F value')

plt.show()


In [11]:
'''R2'''
import numpy as np

def calculate_r2(label_list, pred_list):

    label_array = np.array(label_list)
    pred_array = np.array(pred_list)

    ss_res = np.sum((label_array - pred_array) ** 2)
    ss_tot = np.sum((label_array - np.mean(label_array, axis=0) ) ** 2)

    r2 = 1 - (ss_res / ss_tot)
    return r2

r2 = calculate_r2(exact_label, prediction_label)
print(f"R2 = {r2}")

R2 = 0.9564235620200634


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV files
train_loss = pd.read_csv('loss/trainloss.csv')
x1 = train_loss.iloc[:, 1]  
y1 = train_loss.iloc[:, 2]  


vali_loss = pd.read_csv('loss/validationloss.csv')
x2 = vali_loss.iloc[:, 1]  
y2 = vali_loss.iloc[:, 2]  


# 绘制图形
plt.plot(x1, y1,label='train')
plt.plot(x2, y2,label='validation')
plt.xlabel('epoch')
plt.ylabel('MAE')
plt.title('loss')
plt.legend()
plt.grid()
plt.show()


In [None]:
'''测试无disorder的表现情况'''

from model import MyNet
from dataset import MyDataset1
import torch
import numpy as np

def test():

    # 初始化模型
    net = MyNet(seq_num=test_dataset.in_shape[1], out_dim=test_dataset.out_shape[1], hidden_dim=1024).cuda().eval()
    
    # 加载训练好的权重
    net.load_state_dict(torch.load('model_weights.pth', weights_only=True))
    
    # 测试模型
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for batch in test_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            predictions = net(input)
            all_predictions.append(predictions.cpu().numpy())
            all_labels.append(batch['label'].numpy())
    

    


if __name__ == '__main__':
    test()

