In [1]:
import argparse
import math
import os
from sklearn.model_selection import KFold
import pandas as pd
import torch
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from sklearn.model_selection import KFold
from datasets import get_train_test_data, MMoE_Dataset
from models import MMoE_Model
import numpy as np
import random

def create_dir(file_path):
    if not os.path.exists(os.path.dirname(file_path)):
        os.makedirs(os.path.dirname(file_path))



def trainer(train_loader, model, model_save_path, device ,lr, epochs, early_stop_num,verbose=True, writer_flag=False):

    criterion = nn.MSELoss(reduction='mean')


    params = model.parameters()
    theta1 = model.theta1
    theta2 = model.theta2
    theta3 = model.theta3
    theta4 = model.theta4

    similarity_criterion = nn.CosineSimilarity()


    optimizer = torch.optim.Adam(params, lr=lr)

    create_dir(model_save_path)

    n_epochs, best_loss, step, early_stop_count = epochs, math.inf, 0, 0


    best_loss1 = best_loss2 = best_loss3 = best_loss4 = math.inf
    writer = None
    if writer_flag:
        writer = SummaryWriter()
    ran = range(n_epochs)
    if not verbose:
        ran = tqdm(range(n_epochs), position=0, leave=True)
    for epoch in ran:
        model.train()
        loss_total_record = []
        loss1_record = []
        loss2_record = []
        loss3_record = []
        loss4_record = []

        for x, y1, y2, y3, y4 in train_loader:
            optimizer.zero_grad()
            x, y1, y2, y3, y4 = x.to(device), y1.to(device), y2.to(device), y3.to(device), y4.to(device),

            pred1, pred2, pred3, pred4,s1,s2,s3,s4,_ = model(x)

            loss1 = criterion(pred1, y1)
            loss2 = criterion(pred2, y2)
            loss3 = criterion(pred3, y3)
            loss4 = criterion(pred4, y4)

            loss5 = similarity_criterion(s1, s2) + similarity_criterion(s1, s3) + similarity_criterion(s1, s4) \
                    + similarity_criterion(s2, s3) + similarity_criterion(s2, s4) + similarity_criterion(s3, s4)
            loss5 = loss5.sum()
            loss5.backward(retain_graph=True)
            loss_total = loss1 / (theta1 ** 2) +  loss2 / (theta2 ** 2) + loss3 / (theta3 ** 2) + loss4 / (theta4 ** 2) +  2 * (torch.log(theta1) +torch.log(theta2) + torch.log(theta3) + torch.log(theta4))



            loss_total.backward()
            optimizer.step()
            step += 1

            loss_total_record.append(loss_total.detach().item())
            loss1_record.append(loss1.detach().item())
            loss2_record.append(loss2.detach().item())
            loss3_record.append(loss3.detach().item())
            loss4_record.append(loss4.detach().item())

        mean_train_loss_total = sum(loss_total_record) / len(loss_total_record)

        mean_train_loss1 = sum(loss1_record) / len(loss1_record)
        mean_train_loss2 = sum(loss2_record) / len(loss2_record)
        mean_train_loss3 = sum(loss3_record) / len(loss3_record)
        mean_train_loss4 = sum(loss4_record) / len(loss4_record)
        if writer_flag:
            writer.add_scalar('Loss_total/train', mean_train_loss_total, step)
            writer.add_scalar('Loss1/train', mean_train_loss1, step)
            writer.add_scalar('Loss2/train', mean_train_loss2, step)
            writer.add_scalar('Loss3/train', mean_train_loss3, step)
            writer.add_scalar('Loss4/train', mean_train_loss4, step)

        if verbose and epoch % 100 == 99:
            print(
                f'Epoch [{epoch + 1}/{n_epochs}]: Train loss_total: {mean_train_loss_total:.6f}, loss1: {mean_train_loss1:.6f},loss2: {mean_train_loss2:.6f},loss3: {mean_train_loss3:.6f},loss4: {mean_train_loss4:.6f}')

        if mean_train_loss_total < best_loss:
            best_loss = mean_train_loss_total
            best_loss1 = mean_train_loss1
            best_loss2 = mean_train_loss2
            best_loss3 = mean_train_loss3
            best_loss4 = mean_train_loss4

            torch.save(model.state_dict(), model_save_path)  # Save your best model
            if verbose:
                print(
                    f"\nSave with loss_total: {mean_train_loss_total:.6f}, loss1: {mean_train_loss1:.6f},loss2: {mean_train_loss2:.6f},loss3: {mean_train_loss3:.6f},loss4: {mean_train_loss4:.6f}")
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count >= early_stop_num:
            print(f'\nModel is not improving, so we halt the training session at epoch: {epoch + 1}.')
            print(
                f"\n Best Model loss_total: {best_loss:.6f}, loss1: {best_loss1:.6f},loss2: {best_loss2:.6f},loss3: {best_loss3:.6f},loss4: {best_loss4:.6f}")
            print(f"\ntheta1:{theta1},theta2:{theta2},theta3:{theta3},theta4:{theta4}")
            return

    print(f'\nTrain all epochs.')
    print(
        f"\n Best Model loss_total: {best_loss:.6f}, loss1: {best_loss1:.6f},loss2: {best_loss2:.6f},loss3: {best_loss3:.6f},loss4: {best_loss4:.6f}")
    print(f"\ntheta1:{theta1},theta2:{theta2},theta3:{theta3},theta4:{theta4}")




def predict(model,device,data,y):
    data = torch.Tensor(data).to(device)
    y1 = torch.Tensor(y[:,0]).to(device)
    y2 = torch.Tensor(y[:,1]).to(device)
    y3 = torch.Tensor(y[:,2]).to(device)
    y4 = torch.Tensor(y[:,3]).to(device)

    criterion = nn.MSELoss(reduction="mean")
    model.eval()
    pred1,pred2,pred3,pred4,_,_,_,_,_,= model(data)

    pred_list = [pred1,pred2,pred3,pred4]
    rmse1  = criterion(pred1,y1).item()**0.5
    rmse2  = criterion(pred2,y2).item() ** 0.5
    rmse3  = criterion(pred3,y3).item()** 0.5
    rmse4  = criterion(pred4,y4).item()** 0.5
    rmse_list = [rmse1,rmse2,rmse3,rmse4]

    r1 = r2_score(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    r2 = r2_score(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    r3 = r2_score(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    r4 = r2_score(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    m1 = mean_absolute_error(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    m2 = mean_absolute_error(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    m3 = mean_absolute_error(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    m4 = mean_absolute_error(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    mape1 = mean_absolute_percentage_error(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    mape2 = mean_absolute_percentage_error(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    mape3 = mean_absolute_percentage_error(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    mape4 = mean_absolute_percentage_error(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    r_list = [r1,r2,r3,r4]
    m_list = [m1,m2,m3,m4]
    mape_list = [mape1,mape2,mape3,mape4]
    return rmse_list,r_list,m_list,pred_list,mape_list




if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Train')
    parser.add_argument('--file_path', type=str, default='data/new_data/process_data/air2.csv',help='')
    parser.add_argument('--model_save_dir', type=str,default='air2_model_result', help='')
    parser.add_argument('--res_dir', type=str,default='air2_res_result', help='')



    parser.add_argument('--input_dim', type=int,default=19, help='')
    parser.add_argument('--represent_dim', type=int, default=100, help='')
    parser.add_argument('--pair_embedding_dim', type=int, default=5, help='')
    parser.add_argument('--expert_num', type=int, default=5, help='')
    parser.add_argument('--epochs',  type=int, default=2000, help='')
    parser.add_argument('--early_stop_num',  type=int, default=200, help='')

    parser.add_argument('--lr', type=float, default=0.0005, help='')
    parser.add_argument("--verbose", action="store_true", help="")
    parser.add_argument("--writer_flag", action="store_true", help="")
    parser.add_argument('--batch_size',  type=int, default=128, help='')
    parser.add_argument('--test_ratio', type=float, default=0.2, help='')
    parser.add_argument('--seed',  type=int, default=42, help='')


    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)

    karg = parser.parse_args(args=[])


    file_name = os.path.basename(karg.file_path)
    print(f"\n\n\n====start to train {file_name}====")
    #接下来在这里在套一个外层大循环，这和循环直接执行10次
    for p in range(10):
        rand_num = random.randint(0, 100)
        kf = KFold(n_splits=10,shuffle=True,random_state=42)
        #在此处我将完成交叉验证10次的选取,接下来我将保存结果，将所有数据都报错起来
        or_data = pd.read_csv(karg.file_path).values
        k_num=0
        for train_index , test_index in kf.split(or_data):  # 调用split方法切分数据
            x_train, x_test, y_train, y_test = get_train_test_data(karg.file_path, train_index , test_index)
            train_dataset = MMoE_Dataset(x_train, y_train[:, 0], y_train[:, 1], y_train[:, 2], y_train[:, 3])
            train_dataloader = DataLoader(train_dataset, karg.batch_size, shuffle=True, pin_memory=True)
            model_save_path =  os.path.join(karg.model_save_dir,file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+".ckpt")
            create_dir(model_save_path)
            model = MMoE_Model(input_dim=karg.input_dim, represent_dim=karg.represent_dim, pair_embedding_dim=karg.pair_embedding_dim,
                               expert_num=karg.expert_num).to(device)

            trainer(train_dataloader, model, model_save_path, device, karg.lr, karg.epochs, karg.early_stop_num, karg.verbose, karg.writer_flag)
            rmse, r2, m, _, mape = predict(model, device, x_train, y_train)
            print("=====")
            print(mape)
            rmse, r2, m, _, mape = predict(model, device, x_test, y_test)
            print(f"file:{file_name}, rmse:{rmse}, r2:{r2},mae:{m},mape:{mape}")

            result = pd.DataFrame([rmse + r2 + m + mape],
                                  columns=["eads_rmse", "delta_e_rmse", "eb_rmse", "db_rmse", "eads_r2", "delta_e_r2", "eb_r2",
                                           "db_r2", "eads_mae",
                                           "delta_e_mae", "eb_mae", "db_mae", "eads_mape", "delta_e_mape", "eb_mape",
                                           "db_mape"])

            pred1, pred2, pred3, pred4, s1, s2, s3, s4, gates = model(torch.Tensor(x_train).to(device))
            weight = pd.DataFrame(gates.cpu().mean(1).squeeze(-1).detach().numpy())

            top2_index_list = []
            for i in range(4):
                top2 = np.sort(weight.iloc[i])[-2:]
                top2_index = np.argsort(weight.iloc[i])[-2:]
                top2_index_list.append(top2_index.values)
            top2_index_df = pd.DataFrame((top2_index_list))
            top2_index_df.columns = ["top2", "top1"]


            result_path = os.path.join(karg.res_dir, file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+".csv")
            top2_path = os.path.join(karg.res_dir, file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+"_top2.csv")
            create_dir(result_path)
            top2_index_df.to_csv(top2_path, index=False)
            result.to_csv(result_path, index_label='num')
            k_num=k_num+1

cuda:0



====start to train air2.csv====
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [25:01<00:00,  1.33it/s]



Train all epochs.

 Best Model loss_total: -33.318186, loss1: 0.000525,loss2: 0.000012,loss3: 0.001217,loss4: 0.000006

theta1:Parameter containing:
tensor([0.0285], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0035], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0405], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0038], device='cuda:0', requires_grad=True)
=====
[0.34402692, 0.15272714, 0.0017284557, 0.00033385868]
file:air2.csv, rmse:[0.05300043241643845, 0.005297002828750186, 0.0724741755266507, 0.0032698111583253325], r2:[0.9928416301193249, 0.9794155445617663, 0.9944107985599315, 0.9965896224973966],mae:[0.024889838, 0.0032759153, 0.0348359, 0.0022314684],mape:[1.1796571, 0.3010304, 0.0044150213, 0.00053114333]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [27:32<00:00,  1.21it/s]



Train all epochs.

 Best Model loss_total: -33.789369, loss1: 0.000308,loss2: 0.000009,loss3: 0.000930,loss4: 0.000009

theta1:Parameter containing:
tensor([0.0268], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0030], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0411], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0029], device='cuda:0', requires_grad=True)
=====
[0.33723506, 0.11443641, 0.0035981128, 0.00031990788]
file:air2.csv, rmse:[0.031040323619789364, 0.0061038553620199115, 0.06333952931018172, 0.0068477258622270136], r2:[0.9968166668899593, 0.977498097535553, 0.9947661820864879, 0.9820622819571816],mae:[0.017938668, 0.0034619311, 0.040900365, 0.0026224381],mape:[0.18185414, 0.17827305, 0.0047278707, 0.000624382]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [27:39<00:00,  1.21it/s]



Train all epochs.

 Best Model loss_total: -34.415719, loss1: 0.000317,loss2: 0.000007,loss3: 0.001070,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0235], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0042], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0356], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0058], device='cuda:0', requires_grad=True)
=====
[0.3854042, 0.13294871, 0.0023829967, 0.0003714873]
file:air2.csv, rmse:[0.02294276024862702, 0.0050823160990513885, 0.0432951946207792, 0.0027612223117026565], r2:[0.9973231706305182, 0.9798626075924424, 0.9969097714588613, 0.9967299034436576],mae:[0.015915226, 0.0031640148, 0.02937195, 0.0020154591],mape:[0.43504086, 0.7287519, 0.0032758713, 0.00048105186]
the number of train samples is: 1984
the number of test samples is: 221


 76%|███████▌  | 1516/2000 [21:06<06:44,  1.20it/s]



Model is not improving, so we halt the training session at epoch: 1517.

 Best Model loss_total: -32.663517, loss1: 0.000485,loss2: 0.000018,loss3: 0.001322,loss4: 0.000009

theta1:Parameter containing:
tensor([0.0268], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0057], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0435], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0071], device='cuda:0', requires_grad=True)
=====
[0.56955636, 0.18950734, 0.004546798, 0.0012533566]
file:air2.csv, rmse:[0.037093549729560214, 0.0061488092199846415, 0.06971893843206865, 0.007751355724242496], r2:[0.9957238650434758, 0.9718240301878671, 0.9941298435315821, 0.9794140490046902],mae:[0.022929262, 0.003902205, 0.050461188, 0.0058847032],mape:[0.5936763, 0.2065953, 0.0056919316, 0.001401869]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [27:49<00:00,  1.20it/s]



Train all epochs.

 Best Model loss_total: -33.800039, loss1: 0.000402,loss2: 0.000010,loss3: 0.000933,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0216], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0032], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0361], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0025], device='cuda:0', requires_grad=True)
=====
[0.33346698, 0.17111523, 0.0020793884, 0.0004212651]
file:air2.csv, rmse:[0.033740567538304435, 0.005711229910292069, 0.04931398423545445, 0.003674415188234596], r2:[0.9948675664638259, 0.9761617903698657, 0.9965015183875878, 0.9951750704654903],mae:[0.017237913, 0.0032687904, 0.028177207, 0.002510168],mape:[0.36066788, 0.25713977, 0.003182833, 0.0005977686]
the number of train samples is: 1985
the number of test samples is: 220


 96%|█████████▌| 1914/2000 [26:31<01:11,  1.20it/s]



Model is not improving, so we halt the training session at epoch: 1915.

 Best Model loss_total: -33.684632, loss1: 0.000515,loss2: 0.000008,loss3: 0.001159,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0261], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0030], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0396], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0035], device='cuda:0', requires_grad=True)
=====
[0.28353506, 0.121303916, 0.002253422, 0.0005315503]
file:air2.csv, rmse:[0.027962068188807775, 0.005308524751561292, 0.0473010844426413, 0.0036769772505834326], r2:[0.9967193944698999, 0.9806792055817325, 0.9968280739980994, 0.9950396415485346],mae:[0.016700212, 0.0031471436, 0.0272897, 0.0028028227],mape:[1.229659, 0.25310364, 0.0032014234, 0.00066995074]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [27:50<00:00,  1.20it/s]



Train all epochs.

 Best Model loss_total: -33.692348, loss1: 0.000347,loss2: 0.000008,loss3: 0.001160,loss4: 0.000009

theta1:Parameter containing:
tensor([0.0242], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0040], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0393], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0052], device='cuda:0', requires_grad=True)
=====
[0.30663827, 0.15432224, 0.0030511646, 0.00051450386]
file:air2.csv, rmse:[0.05014040639903369, 0.006727778587474002, 0.07587883285263616, 0.003690685843774119], r2:[0.9926492205467441, 0.9677765798334498, 0.9935736608557687, 0.995778987680598],mae:[0.02816545, 0.0039378344, 0.044786107, 0.002722116],mape:[0.28907377, 0.28882858, 0.0052362694, 0.0006511324]
the number of train samples is: 1985
the number of test samples is: 220


 90%|████████▉ | 1795/2000 [24:52<02:50,  1.20it/s]


Model is not improving, so we halt the training session at epoch: 1796.

 Best Model loss_total: -33.625880, loss1: 0.000449,loss2: 0.000010,loss3: 0.001301,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0271], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0029], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0437], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0056], device='cuda:0', requires_grad=True)
=====
[0.45706394, 0.14313476, 0.0039325785, 0.0009862384]
file:air2.csv, rmse:[0.04160368230057527, 0.007434018961634199, 0.07293205084748816, 0.005599551678550059], r2:[0.9942890089459473, 0.9570647272470141, 0.993311933137001, 0.9890822210520864],mae:[0.019525923, 0.0038875309, 0.046293702, 0.004421618],mape:[0.37956476, 0.29025212, 0.0052336394, 0.0010614118]
the number of train samples is: 1985
the number of test samples is: 220



 66%|██████▌   | 1310/2000 [17:58<09:28,  1.21it/s]



Model is not improving, so we halt the training session at epoch: 1311.

 Best Model loss_total: -31.981780, loss1: 0.000509,loss2: 0.000016,loss3: 0.001792,loss4: 0.000013

theta1:Parameter containing:
tensor([0.0323], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0042], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0556], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0046], device='cuda:0', requires_grad=True)
=====
[0.3950266, 0.27318218, 0.0021190466, 0.00070928456]
file:air2.csv, rmse:[0.07972060035007161, 0.006494889269965085, 0.04986675452818781, 0.004966782168276307], r2:[0.9727888651350086, 0.9700146841539296, 0.996019752309072, 0.9895749122011847],mae:[0.028150866, 0.003776564, 0.030072445, 0.0037972536],mape:[0.65404564, 0.22424123, 0.003390845, 0.0009075591]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [27:47<00:00,  1.20it/s]



Train all epochs.

 Best Model loss_total: -33.877189, loss1: 0.000368,loss2: 0.000011,loss3: 0.000967,loss4: 0.000008

theta1:Parameter containing:
tensor([0.0214], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0033], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0352], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0024], device='cuda:0', requires_grad=True)
=====
[0.5849679, 0.14677636, 0.002536653, 0.00038482904]
file:air2.csv, rmse:[0.03553912502735312, 0.004534698209492815, 0.11997230833017611, 0.0027490818052316405], r2:[0.9966212484522935, 0.9846159553126244, 0.9842921376658057, 0.9975552149769532],mae:[0.021876516, 0.0032675054, 0.036819678, 0.0020141364],mape:[0.47285295, 0.18579625, 0.0042343014, 0.0004808894]
the number of train samples is: 1984
the number of test samples is: 221


 63%|██████▎   | 1263/2000 [17:40<10:19,  1.19it/s]



Model is not improving, so we halt the training session at epoch: 1264.

 Best Model loss_total: -32.337430, loss1: 0.000621,loss2: 0.000014,loss3: 0.001440,loss4: 0.000009

theta1:Parameter containing:
tensor([0.0317], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0038], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0490], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0055], device='cuda:0', requires_grad=True)
=====
[0.3944594, 0.22019857, 0.0039823223, 0.0010198982]
file:air2.csv, rmse:[0.05088002657617867, 0.007429018864619038, 0.07955331490666284, 0.006481572262791793], r2:[0.9934029479330951, 0.9595105494088181, 0.993265587212012, 0.986599598394268],mae:[0.025194513, 0.004531279, 0.044807356, 0.0050044036],mape:[0.7300959, 0.6017376, 0.0056270915, 0.0011890703]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [27:53<00:00,  1.20it/s]



Train all epochs.

 Best Model loss_total: -34.479961, loss1: 0.000308,loss2: 0.000009,loss3: 0.000861,loss4: 0.000006

theta1:Parameter containing:
tensor([0.0223], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0036], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0475], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0126], device='cuda:0', requires_grad=True)
=====
[0.30390993, 0.17211984, 0.0023280436, 0.000614702]
file:air2.csv, rmse:[0.028025418516154883, 0.005697800131173125, 0.06528247099575009, 0.007672429543196723], r2:[0.9974050215459642, 0.9803923689031675, 0.994440163020424, 0.9774814632457022],mae:[0.01750492, 0.0038564852, 0.032112867, 0.0034100264],mape:[0.59633225, 0.25678205, 0.0037304875, 0.0008122133]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [27:56<00:00,  1.19it/s]



Train all epochs.

 Best Model loss_total: -33.786724, loss1: 0.000509,loss2: 0.000008,loss3: 0.001066,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0244], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0075], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0413], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0039], device='cuda:0', requires_grad=True)
=====
[0.35407108, 0.16299783, 0.0022906747, 0.0007619889]
file:air2.csv, rmse:[0.022907330451739833, 0.0055381965531776396, 0.03441887508768859, 0.003667073560826482], r2:[0.9973314319172187, 0.9760879619770243, 0.998046989471764, 0.994232374476754],mae:[0.016191997, 0.0037398015, 0.02498367, 0.003133737],mape:[0.25769523, 0.6585973, 0.002744669, 0.0007493963]
the number of train samples is: 1984
the number of test samples is: 221


 84%|████████▍ | 1688/2000 [23:35<04:21,  1.19it/s]



Model is not improving, so we halt the training session at epoch: 1689.

 Best Model loss_total: -33.144238, loss1: 0.000476,loss2: 0.000012,loss3: 0.001571,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0256], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0067], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0405], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0052], device='cuda:0', requires_grad=True)
=====
[0.3065983, 0.18516162, 0.0023564927, 0.0009580846]
file:air2.csv, rmse:[0.026057026227678873, 0.006802949489074338, 0.06143069088453692, 0.005914057316952617], r2:[0.997889893771395, 0.9655101487005349, 0.9954425811631147, 0.9880164181736929],mae:[0.016752936, 0.004094771, 0.03827424, 0.0047083613],mape:[0.3442443, 0.21642359, 0.004400768, 0.0011273398]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [28:00<00:00,  1.19it/s]



Train all epochs.

 Best Model loss_total: -33.922558, loss1: 0.000405,loss2: 0.000010,loss3: 0.001035,loss4: 0.000006

theta1:Parameter containing:
tensor([0.0277], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0102], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0448], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0042], device='cuda:0', requires_grad=True)
=====
[0.30919135, 0.20039721, 0.0017801512, 0.00040111347]
file:air2.csv, rmse:[0.031908789532355035, 0.006978794990765596, 0.04501026207088744, 0.0034878270152943055], r2:[0.9954097186971071, 0.964406130276985, 0.9970855105680125, 0.9956526519712968],mae:[0.015361404, 0.003955854, 0.02574417, 0.0023545485],mape:[0.3383428, 0.32160398, 0.0028663378, 0.0005629949]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [28:19<00:00,  1.18it/s]



Train all epochs.

 Best Model loss_total: -33.621000, loss1: 0.000372,loss2: 0.000009,loss3: 0.001180,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0235], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0038], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0376], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0042], device='cuda:0', requires_grad=True)
=====
[0.2811349, 0.15237345, 0.0018368715, 0.0007673461]
file:air2.csv, rmse:[0.022016731402686247, 0.006564446450038962, 0.04545699032221689, 0.004053852687502934], r2:[0.9979661398282063, 0.970455715342037, 0.9970705761448864, 0.9939706975686894],mae:[0.016007412, 0.003610132, 0.027396906, 0.003395137],mape:[1.4322135, 0.31585056, 0.0031741813, 0.0008108445]
the number of train samples is: 1985
the number of test samples is: 220


 94%|█████████▍| 1879/2000 [26:22<01:41,  1.19it/s]



Model is not improving, so we halt the training session at epoch: 1880.

 Best Model loss_total: -33.371889, loss1: 0.000401,loss2: 0.000010,loss3: 0.001217,loss4: 0.000008

theta1:Parameter containing:
tensor([0.0237], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0051], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0386], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0033], device='cuda:0', requires_grad=True)
=====
[0.1925288, 0.16375642, 0.00221261, 0.00037836912]
file:air2.csv, rmse:[0.029879669222040425, 0.006856490428237149, 0.05547623986732045, 0.003323759677445703], r2:[0.9973895866225005, 0.9665318279449695, 0.996564923758466, 0.9965765678717057],mae:[0.016442323, 0.004142815, 0.033745706, 0.0023021698],mape:[0.24274531, 0.28809044, 0.0038354243, 0.0005492944]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [27:37<00:00,  1.21it/s]



Train all epochs.

 Best Model loss_total: -34.007611, loss1: 0.000351,loss2: 0.000010,loss3: 0.001031,loss4: 0.000006

theta1:Parameter containing:
tensor([0.0207], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0040], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0398], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0063], device='cuda:0', requires_grad=True)
=====
[0.24046806, 0.25630847, 0.0017478537, 0.0002877275]
file:air2.csv, rmse:[0.04276240584325962, 0.00772622294797528, 0.05889693736144249, 0.003088719970874168], r2:[0.9939664600832383, 0.9536231366928996, 0.9956383668207015, 0.996678113353205],mae:[0.018918881, 0.0051622, 0.029677933, 0.0020536012],mape:[0.22226867, 0.2649984, 0.003575229, 0.0004892126]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [26:57<00:00,  1.24it/s]



Train all epochs.

 Best Model loss_total: -33.993309, loss1: 0.000387,loss2: 0.000009,loss3: 0.001140,loss4: 0.000007

theta1:Parameter containing:
tensor([0.0224], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0038], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0356], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0046], device='cuda:0', requires_grad=True)
=====
[0.3736142, 0.13299966, 0.0016381416, 0.00041831704]
file:air2.csv, rmse:[0.03633877651894959, 0.006777804092643421, 0.04125644879298649, 0.005467709866557189], r2:[0.9943461264968008, 0.967345487746653, 0.9972755953049757, 0.987366012187491],mae:[0.019119691, 0.0036760895, 0.026812363, 0.0028404344],mape:[0.3903692, 0.18714015, 0.0029855347, 0.00067648536]
the number of train samples is: 1985
the number of test samples is: 220


100%|██████████| 2000/2000 [26:35<00:00,  1.25it/s]



Train all epochs.

 Best Model loss_total: -33.472685, loss1: 0.000404,loss2: 0.000014,loss3: 0.001239,loss4: 0.000006

theta1:Parameter containing:
tensor([0.0297], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0047], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0477], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0049], device='cuda:0', requires_grad=True)
=====
[0.29567644, 0.17268515, 0.0037421759, 0.00040491045]
file:air2.csv, rmse:[0.0381907280796078, 0.005078345553888163, 0.11450741069614155, 0.0033080189419089747], r2:[0.9960982571769917, 0.9807061784474435, 0.9856905726103395, 0.9964600158700245],mae:[0.022340769, 0.0032829777, 0.049044073, 0.0021378994],mape:[0.237042, 0.1706279, 0.005595582, 0.0005103703]
the number of train samples is: 1984
the number of test samples is: 221


 86%|████████▌ | 1719/2000 [22:58<03:45,  1.25it/s]



Model is not improving, so we halt the training session at epoch: 1720.

 Best Model loss_total: -32.805520, loss1: 0.000429,loss2: 0.000011,loss3: 0.001517,loss4: 0.000010

theta1:Parameter containing:
tensor([0.0233], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0033], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0404], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0036], device='cuda:0', requires_grad=True)
=====
[0.38485947, 0.15970233, 0.0038933395, 0.00063724304]
file:air2.csv, rmse:[0.05326542972874118, 0.006944772404531939, 0.08890460871805597, 0.004397438498299344], r2:[0.9927698684702979, 0.9646169735102326, 0.9915893078440882, 0.9938318257302594],mae:[0.021597853, 0.004010428, 0.047296967, 0.003332274],mape:[1.0355428, 0.30394584, 0.0058666603, 0.0007955414]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [26:37<00:00,  1.25it/s]



Train all epochs.

 Best Model loss_total: -33.705855, loss1: 0.000375,loss2: 0.000009,loss3: 0.001066,loss4: 0.000009

theta1:Parameter containing:
tensor([0.0254], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0034], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0409], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0038], device='cuda:0', requires_grad=True)
=====
[0.73854285, 0.22052383, 0.0031006392, 0.00031283547]
file:air2.csv, rmse:[0.032608801583196, 0.005511123386230611, 0.07258566342450809, 0.005913249889344452], r2:[0.9964868290918528, 0.9816561290399493, 0.9931266152129953, 0.9866239807519342],mae:[0.026132887, 0.0034380928, 0.042300135, 0.0024920525],mape:[1.2577739, 0.20502177, 0.0048209457, 0.0005929164]
the number of train samples is: 1984
the number of test samples is: 221


100%|██████████| 2000/2000 [26:45<00:00,  1.25it/s]



Train all epochs.

 Best Model loss_total: -33.893036, loss1: 0.000449,loss2: 0.000009,loss3: 0.001144,loss4: 0.000008

theta1:Parameter containing:
tensor([0.0245], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0044], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0400], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0058], device='cuda:0', requires_grad=True)
=====
[0.23355399, 0.13987978, 0.0016999595, 0.00056362065]
file:air2.csv, rmse:[0.020048579796518064, 0.006094772514041065, 0.03743896341339397, 0.0035943842022480685], r2:[0.997955926013962, 0.971040244428488, 0.9976892189312829, 0.9944587612224705],mae:[0.013549727, 0.0033314414, 0.023965264, 0.0028432333],mape:[0.32113123, 0.49891508, 0.00268157, 0.00068019325]
the number of train samples is: 1984
the number of test samples is: 221


 93%|█████████▎| 1868/2000 [26:36<01:52,  1.17it/s]


Model is not improving, so we halt the training session at epoch: 1869.

 Best Model loss_total: -33.187032, loss1: 0.000521,loss2: 0.000011,loss3: 0.001344,loss4: 0.000008

theta1:Parameter containing:
tensor([0.0255], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0033], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0440], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0037], device='cuda:0', requires_grad=True)
=====
[0.38434336, 0.1922243, 0.003497228, 0.00036229932]
file:air2.csv, rmse:[0.02308521405569458, 0.005599385355162079, 0.06208012029848321, 0.003477348960884961], r2:[0.9983437630998144, 0.976634370217811, 0.9953457122842609, 0.995857026518246],mae:[0.014982136, 0.0034696278, 0.04429593, 0.0025044242],mape:[0.34045523, 0.16880664, 0.0050162296, 0.00059696997]
the number of train samples is: 1984
the number of test samples is: 221



 94%|█████████▍| 1887/2000 [27:19<01:30,  1.25it/s]

2