# Datasets

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class MyDataset1(Dataset):
    def __init__(self, input_file='conductance_datasets.npy', label_file='Y_labels.npy'):
        datas = np.load(input_file)
        self.ori_in_shape = datas.shape
        datas = datas.reshape(datas.shape[0], -1)
        self.in_shape = datas.shape
        self.datas = datas


        labels = np.load(label_file)
        self.ori_out_shape = labels.shape
        labels = labels.reshape(labels.shape[0], -1)
        self.out_shape = labels.shape
        self.labels = labels


    def __getitem__(self, idx):
        return {"input": torch.from_numpy(self.datas[idx]).float(), "label":  torch.from_numpy(self.labels[idx]).float()}

    def __len__(self):
        return len(self.datas)

if __name__ == '__main__':
    path = "/home/cusps/python/ML_nanowire/disorder/delta_chem_dis/delta_chem_dis"

    dataset = MyDataset1(input_file = path + '/train/train_data.npy', label_file= path + '/train/train_labels.npy')
    # dataset = MyDataset1(input_file = path + '/test/test_data.npy', label_file= path + '/test/test_labels.npy')
    # dataset = MyDataset1(input_file = path + '/vali/vali_data.npy', label_file= path + '/vali/vali_labels.npy')

    print(dataset.ori_in_shape)
    print(dataset.ori_out_shape)

    print(dataset.in_shape)
    print(dataset.out_shape)

    data_dataloader = DataLoader(dataset, batch_size=10000, shuffle=True)
    for i, d in enumerate(data_dataloader):
        print(i, d['input'].shape, d['label'].shape)



(8000, 5, 15, 15, 4)
(8000, 2, 5)
(8000, 4500)
(8000, 10)
0 torch.Size([8000, 4500]) torch.Size([8000, 10])


# Model

In [6]:
import torch

class MyNet(torch.nn.Module):
    def __init__(self, seq_num=4500, out_dim=10, hidden_dim=1024) -> None:
        super(MyNet, self).__init__()
        self.mlp = torch.nn.Sequential(torch.nn.Linear(seq_num, hidden_dim), torch.nn.ReLU(), torch.nn.BatchNorm1d(hidden_dim),#torch.nn.Dropout(0.1),
                                       torch.nn.Linear(hidden_dim, hidden_dim // 2), torch.nn.ReLU(), torch.nn.BatchNorm1d(hidden_dim // 2),#torch.nn.Dropout(0.1),
                                       torch.nn.Linear(hidden_dim // 2, hidden_dim // 4), torch.nn.ReLU(), torch.nn.BatchNorm1d(hidden_dim // 4),#torch.nn.Dropout(0.1),
                                       torch.nn.Linear(hidden_dim // 4, hidden_dim // 8), torch.nn.ReLU(), torch.nn.BatchNorm1d(hidden_dim // 8),
                                       torch.nn.Linear(hidden_dim // 8, out_dim))

    def forward(self, x):
        return self.mlp(x.squeeze())
        

if __name__ == '__main__':
    net = MyNet().cuda()
    input = torch.rand([16, 4500]).cuda()
    out = net(input)
    print(out.shape)

    # # 输出各层的参数数量
    # for name, param in net.named_parameters():
    #     print(f"Layer: {name}, Parameters: {param.numel()}")






torch.Size([16, 10])


# train

In [8]:
from torch.utils.data import DataLoader
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import torch.nn.functional as F

writer = SummaryWriter()


def train():
    path = "/home/cusps/python/ML_nanowire/disorder/delta_chem_dis/delta_chem_dis"
    
    train_dataset = MyDataset1(input_file = path + '/train/train_data.npy', label_file= path + '/train/train_labels.npy')
    vali_dataset = MyDataset1(input_file = path + '/vali/vali_data.npy', label_file= path + '/vali/vali_labels.npy')


    net = MyNet().cuda()
    
    train_dataloader = DataLoader(train_dataset, batch_size = 10000, shuffle=True, pin_memory=True)
    vali_dataloader = DataLoader(vali_dataset, batch_size = 10000, shuffle=False, pin_memory=True)
    

    optimizer = torch.optim.Adam(params=net.parameters(), lr=1e-4, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.99, patience=100, threshold=1e-4, 
                                                           threshold_mode='rel',cooldown=100, min_lr=1e-20)



    record_vali_loss = []
    for epoch in range(500):
        train_loss = []
        vali_loss = []

        net.train()
        for batch in train_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            out = net(input)
            loss = torch.nn.MSELoss()(out, batch['label'].cuda())
            # loss = torch.nn.functional.l1_loss(out, batch['label'].cuda())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss.append(loss.detach().cpu().item())

            
        net.eval()
        for batch in vali_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            out = net(input)
            loss = torch.nn.MSELoss()(out, batch['label'].cuda())
            # loss = torch.nn.functional.l1_loss(out, batch['label'].cuda())
            vali_loss.append(loss.detach().cpu().item())

            
        print(f"Epoch {epoch}: {np.mean(train_loss)} {np.mean(vali_loss)} {optimizer.state_dict()['param_groups'][0]['lr']}")

        writer.add_scalar("loss: ",np.mean(train_loss), global_step=epoch)
        writer.add_scalar("learn rate: ",optimizer.state_dict()['param_groups'][0]['lr'], global_step=epoch)

        scheduler.step(np.mean(vali_loss))
        
        record_vali_loss.append(np.mean(vali_loss))
        writer.add_scalar("validation loss: ",np.mean(vali_loss), global_step=epoch)


        
        if  record_vali_loss[epoch] == min(record_vali_loss):
            torch.save(net.state_dict(), 'model_weights.pth')
            print("save model")




if __name__ == '__main__':
    train()



Epoch 0: 0.45995262265205383 0.1287875473499298 0.0001
save model
Epoch 1: 0.3186526298522949 0.12868452072143555 0.0001
save model
Epoch 2: 0.25823432207107544 0.1289598047733307 0.0001
Epoch 3: 0.21425428986549377 0.12925595045089722 0.0001
Epoch 4: 0.1820845752954483 0.12959882616996765 0.0001
Epoch 5: 0.15886089205741882 0.12997519969940186 0.0001
Epoch 6: 0.1413014680147171 0.13036158680915833 0.0001
Epoch 7: 0.12713557481765747 0.13069196045398712 0.0001
Epoch 8: 0.11492162942886353 0.13091078400611877 0.0001
Epoch 9: 0.1045125424861908 0.13094885647296906 0.0001
Epoch 10: 0.09576442837715149 0.13075608015060425 0.0001
Epoch 11: 0.08870066702365875 0.13034303486347198 0.0001
Epoch 12: 0.08275318145751953 0.12972614169120789 0.0001
Epoch 13: 0.07766813784837723 0.12891417741775513 0.0001
Epoch 14: 0.07312433421611786 0.12788966298103333 0.0001
save model
Epoch 15: 0.0690755844116211 0.12666384875774384 0.0001
save model
Epoch 16: 0.0654187947511673 0.12524613738059998 0.0001
save 

# loss

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib tk

loss_file = '/home/cusps/python/ML_disorder/mlp/loss/Jun19_11-30-49_cusps/'

# Read the CSV files
train_loss = pd.read_csv(loss_file + 'train.csv')
x1 = train_loss.iloc[:, 1]  
y1 = train_loss.iloc[:, 2]  


vali_loss = pd.read_csv(loss_file + 'vali.csv')
x2 = vali_loss.iloc[:, 1]  
y2 = vali_loss.iloc[:, 2]  


# 绘制图形
plt.plot(x1, y1,label='train')
plt.plot(x2, y2,label='validation')
plt.xlabel('epoch')
plt.ylabel('MSE')
plt.title('loss')
plt.legend()
plt.grid()
plt.show()


# test

In [None]:
from model import MyNet
from dataset import MyDataset1
import torch
import numpy as np

def test():
    # 加载测试数据集
    path = "/home/cusps/python/ML_disorder/new_data"

    test_dataset = MyDataset1(input_file = path + '/test/test_data.npy',
                              label_file = path + '/test/test_labels.npy')
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=5000, shuffle=False)

    # 初始化模型
    net = MyNet(seq_num=test_dataset.in_shape[1], out_dim=test_dataset.out_shape[1], hidden_dim=1024).cuda().eval()
    
    # 加载训练好的权重
    net.load_state_dict(torch.load('model_weights.pth', weights_only=True))
    
    # 测试模型
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for batch in test_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            predictions = net(input)
            all_predictions.append(predictions.cpu().numpy())
            all_labels.append(batch['label'].numpy())
    
    # 合并所有批次的预测结果和真实标签
    all_predictions = np.vstack(all_predictions)
    all_labels = np.vstack(all_labels)
    

    mse_loss = np.mean((all_predictions - all_labels) ** 2)
    print(f"Test MSE Loss: {mse_loss}")

    # l1_loss = np.mean(np.abs(all_predictions - all_labels))
    # print(f"Test l1 Loss: {l1_loss}")
    
    
    # 可选：保存预测结果
    np.save('test_predictions.npy', all_predictions)
    np.save('test_labels.npy', all_labels)


if __name__ == '__main__':
    test()



# 衡量预测结果的好坏

In [None]:
'''inner product fidelity'''

%matplotlib tk
import matplotlib.pyplot as plt
import numpy as np


exact_label = np.load("test_labels.npy")
prediction = np.load("test_predictions.npy")


F_list = []
def calculate_F(A, B):
    # return np.dot(A, B)/np.sqrt(np.dot(A, A) * np.dot(B, B))

    return np.dot(A, B)/ (np.linalg.norm(A) * np.linalg.norm(B))

for i in range(exact_label.shape[0]):
    F = calculate_F(exact_label[i], prediction[i])
    F_list.append(F)

print(min(F_list),max(F_list),np.mean(F_list))
print(len(F_list),'\n')


'''所有F的分布'''
plt.figure()
plt.plot(np.arange(len(F_list)),F_list,'o')
plt.xlim(0,len(F_list))
plt.title('F distribution')
plt.ylabel('F value')
plt.show()


'''R2'''
def calculate_r2(label, pred):

    label = np.array(label)
    pred = np.array(pred)

    ss_res = np.sum((label - pred) ** 2)
    ss_tot = np.sum((label - np.mean(label, axis=0) ) ** 2)

    r2 = 1 - (ss_res / ss_tot)
    return r2

r2 = calculate_r2(exact_label, prediction)
print(f"R2 = {r2}")

In [None]:
'''测试无disorder的表现情况'''

from model import MyNet
from dataset import MyDataset1
import torch
import numpy as np

def test():

    # 初始化模型
    net = MyNet(seq_num=test_dataset.in_shape[1], out_dim=test_dataset.out_shape[1], hidden_dim=1024).cuda().eval()
    
    # 加载训练好的权重
    net.load_state_dict(torch.load('model_weights.pth', weights_only=True))
    
    # 测试模型
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for batch in test_dataloader:
            input = batch['input'].unsqueeze(-1).cuda()
            predictions = net(input)
            all_predictions.append(predictions.cpu().numpy())
            all_labels.append(batch['label'].numpy())
    

    


if __name__ == '__main__':
    test()

