In [1]:
import argparse
import math
import os
from sklearn.model_selection import KFold
import pandas as pd
import torch
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from sklearn.model_selection import KFold
from datasets import get_train_test_data, MMoE_Dataset
from models import MMoE_Model
import numpy as np
import random

def create_dir(file_path):
    if not os.path.exists(os.path.dirname(file_path)):
        os.makedirs(os.path.dirname(file_path))



def trainer(train_loader, model, model_save_path, device ,lr, epochs, early_stop_num,verbose=True, writer_flag=False):

    criterion = nn.MSELoss(reduction='mean')


    params = model.parameters()
    theta1 = model.theta1
    theta2 = model.theta2
    theta3 = model.theta3
    theta4 = model.theta4

    similarity_criterion = nn.CosineSimilarity()


    optimizer = torch.optim.Adam(params, lr=lr)

    create_dir(model_save_path)

    n_epochs, best_loss, step, early_stop_count = epochs, math.inf, 0, 0


    best_loss1 = best_loss2 = best_loss3 = best_loss4 = math.inf
    writer = None
    if writer_flag:
        writer = SummaryWriter()
    ran = range(n_epochs)
    if not verbose:
        ran = tqdm(range(n_epochs), position=0, leave=True)
    for epoch in ran:
        model.train()
        loss_total_record = []
        loss1_record = []
        loss2_record = []
        loss3_record = []
        loss4_record = []

        for x, y1, y2, y3, y4 in train_loader:
            optimizer.zero_grad()
            x, y1, y2, y3, y4 = x.to(device), y1.to(device), y2.to(device), y3.to(device), y4.to(device),

            pred1, pred2, pred3, pred4,s1,s2,s3,s4,_ = model(x)

            loss1 = criterion(pred1, y1)
            loss2 = criterion(pred2, y2)
            loss3 = criterion(pred3, y3)
            loss4 = criterion(pred4, y4)

            loss5 = similarity_criterion(s1, s2) + similarity_criterion(s1, s3) + similarity_criterion(s1, s4) \
                    + similarity_criterion(s2, s3) + similarity_criterion(s2, s4) + similarity_criterion(s3, s4)
            loss5 = loss5.sum()
            loss5.backward(retain_graph=True)
            loss_total = loss1 / (theta1 ** 2) +  loss2 / (theta2 ** 2) + loss3 / (theta3 ** 2) + loss4 / (theta4 ** 2) +  2 * (torch.log(theta1) +torch.log(theta2) + torch.log(theta3) + torch.log(theta4))



            loss_total.backward()
            optimizer.step()
            step += 1

            loss_total_record.append(loss_total.detach().item())
            loss1_record.append(loss1.detach().item())
            loss2_record.append(loss2.detach().item())
            loss3_record.append(loss3.detach().item())
            loss4_record.append(loss4.detach().item())

        mean_train_loss_total = sum(loss_total_record) / len(loss_total_record)

        mean_train_loss1 = sum(loss1_record) / len(loss1_record)
        mean_train_loss2 = sum(loss2_record) / len(loss2_record)
        mean_train_loss3 = sum(loss3_record) / len(loss3_record)
        mean_train_loss4 = sum(loss4_record) / len(loss4_record)
        if writer_flag:
            writer.add_scalar('Loss_total/train', mean_train_loss_total, step)
            writer.add_scalar('Loss1/train', mean_train_loss1, step)
            writer.add_scalar('Loss2/train', mean_train_loss2, step)
            writer.add_scalar('Loss3/train', mean_train_loss3, step)
            writer.add_scalar('Loss4/train', mean_train_loss4, step)

        if verbose and epoch % 100 == 99:
            print(
                f'Epoch [{epoch + 1}/{n_epochs}]: Train loss_total: {mean_train_loss_total:.6f}, loss1: {mean_train_loss1:.6f},loss2: {mean_train_loss2:.6f},loss3: {mean_train_loss3:.6f},loss4: {mean_train_loss4:.6f}')

        if mean_train_loss_total < best_loss:
            best_loss = mean_train_loss_total
            best_loss1 = mean_train_loss1
            best_loss2 = mean_train_loss2
            best_loss3 = mean_train_loss3
            best_loss4 = mean_train_loss4

            torch.save(model.state_dict(), model_save_path)  # Save your best model
            if verbose:
                print(
                    f"\nSave with loss_total: {mean_train_loss_total:.6f}, loss1: {mean_train_loss1:.6f},loss2: {mean_train_loss2:.6f},loss3: {mean_train_loss3:.6f},loss4: {mean_train_loss4:.6f}")
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count >= early_stop_num:
            print(f'\nModel is not improving, so we halt the training session at epoch: {epoch + 1}.')
            print(
                f"\n Best Model loss_total: {best_loss:.6f}, loss1: {best_loss1:.6f},loss2: {best_loss2:.6f},loss3: {best_loss3:.6f},loss4: {best_loss4:.6f}")
            print(f"\ntheta1:{theta1},theta2:{theta2},theta3:{theta3},theta4:{theta4}")
            return

    print(f'\nTrain all epochs.')
    print(
        f"\n Best Model loss_total: {best_loss:.6f}, loss1: {best_loss1:.6f},loss2: {best_loss2:.6f},loss3: {best_loss3:.6f},loss4: {best_loss4:.6f}")
    print(f"\ntheta1:{theta1},theta2:{theta2},theta3:{theta3},theta4:{theta4}")




def predict(model,device,data,y):
    data = torch.Tensor(data).to(device)
    y1 = torch.Tensor(y[:,0]).to(device)
    y2 = torch.Tensor(y[:,1]).to(device)
    y3 = torch.Tensor(y[:,2]).to(device)
    y4 = torch.Tensor(y[:,3]).to(device)

    criterion = nn.MSELoss(reduction="mean")
    model.eval()
    pred1,pred2,pred3,pred4,_,_,_,_,_,= model(data)

    pred_list = [pred1,pred2,pred3,pred4]
    rmse1  = criterion(pred1,y1).item()**0.5
    rmse2  = criterion(pred2,y2).item() ** 0.5
    rmse3  = criterion(pred3,y3).item()** 0.5
    rmse4  = criterion(pred4,y4).item()** 0.5
    rmse_list = [rmse1,rmse2,rmse3,rmse4]

    r1 = r2_score(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    r2 = r2_score(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    r3 = r2_score(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    r4 = r2_score(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    m1 = mean_absolute_error(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    m2 = mean_absolute_error(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    m3 = mean_absolute_error(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    m4 = mean_absolute_error(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    mape1 = mean_absolute_percentage_error(y1.cpu().detach().numpy(),pred1.cpu().detach().numpy())
    mape2 = mean_absolute_percentage_error(y2.cpu().detach().numpy(),pred2.cpu().detach().numpy())
    mape3 = mean_absolute_percentage_error(y3.cpu().detach().numpy(),pred3.cpu().detach().numpy())
    mape4 = mean_absolute_percentage_error(y4.cpu().detach().numpy(),pred4.cpu().detach().numpy())

    r_list = [r1,r2,r3,r4]
    m_list = [m1,m2,m3,m4]
    mape_list = [mape1,mape2,mape3,mape4]
    return rmse_list,r_list,m_list,pred_list,mape_list




if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Train')
    parser.add_argument('--file_path', type=str, default='data/new_data/process_data/water.csv',help='')
    parser.add_argument('--model_save_dir', type=str,default='water2_model_result', help='')
    parser.add_argument('--res_dir', type=str,default='water2_res_result', help='')



    parser.add_argument('--input_dim', type=int,default=19, help='')
    parser.add_argument('--represent_dim', type=int, default=100, help='')
    parser.add_argument('--pair_embedding_dim', type=int, default=5, help='')
    parser.add_argument('--expert_num', type=int, default=5, help='')
    parser.add_argument('--epochs',  type=int, default=2000, help='')
    parser.add_argument('--early_stop_num',  type=int, default=200, help='')

    parser.add_argument('--lr', type=float, default=0.0005, help='')
    parser.add_argument("--verbose", action="store_true", help="")
    parser.add_argument("--writer_flag", action="store_true", help="")
    parser.add_argument('--batch_size',  type=int, default=128, help='')
    parser.add_argument('--test_ratio', type=float, default=0.2, help='')
    parser.add_argument('--seed',  type=int, default=42, help='')


    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)

    karg = parser.parse_args(args=[])


    file_name = os.path.basename(karg.file_path)
    print(f"\n\n\n====start to train {file_name}====")
    #接下来在这里在套一个外层大循环，这和循环直接执行10次
    for p in range(10):
        rand_num = random.randint(0, 100)
        kf = KFold(n_splits=10,shuffle=True,random_state=42)
        #在此处我将完成交叉验证10次的选取,接下来我将保存结果，将所有数据都报错起来
        or_data = pd.read_csv(karg.file_path).values
        k_num=0
        for train_index , test_index in kf.split(or_data):  # 调用split方法切分数据
            x_train, x_test, y_train, y_test = get_train_test_data(karg.file_path, train_index , test_index)
            train_dataset = MMoE_Dataset(x_train, y_train[:, 0], y_train[:, 1], y_train[:, 2], y_train[:, 3])
            train_dataloader = DataLoader(train_dataset, karg.batch_size, shuffle=True, pin_memory=True)
            model_save_path =  os.path.join(karg.model_save_dir,file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+".ckpt")
            create_dir(model_save_path)
            model = MMoE_Model(input_dim=karg.input_dim, represent_dim=karg.represent_dim, pair_embedding_dim=karg.pair_embedding_dim,
                               expert_num=karg.expert_num).to(device)

            trainer(train_dataloader, model, model_save_path, device, karg.lr, karg.epochs, karg.early_stop_num, karg.verbose, karg.writer_flag)
            rmse, r2, m, _, mape = predict(model, device, x_train, y_train)
            print("=====")
            print(mape)
            rmse, r2, m, _, mape = predict(model, device, x_test, y_test)
            print(f"file:{file_name}, rmse:{rmse}, r2:{r2},mae:{m},mape:{mape}")

            result = pd.DataFrame([rmse + r2 + m + mape],
                                  columns=["eads_rmse", "delta_e_rmse", "eb_rmse", "db_rmse", "eads_r2", "delta_e_r2", "eb_r2",
                                           "db_r2", "eads_mae",
                                           "delta_e_mae", "eb_mae", "db_mae", "eads_mape", "delta_e_mape", "eb_mape",
                                           "db_mape"])

            pred1, pred2, pred3, pred4, s1, s2, s3, s4, gates = model(torch.Tensor(x_train).to(device))
            weight = pd.DataFrame(gates.cpu().mean(1).squeeze(-1).detach().numpy())

            top2_index_list = []
            for i in range(4):
                top2 = np.sort(weight.iloc[i])[-2:]
                top2_index = np.argsort(weight.iloc[i])[-2:]
                top2_index_list.append(top2_index.values)
            top2_index_df = pd.DataFrame((top2_index_list))
            top2_index_df.columns = ["top2", "top1"]


            result_path = os.path.join(karg.res_dir, file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+".csv")
            top2_path = os.path.join(karg.res_dir, file_name.split(".")[0]+"di"+str(k_num)+"zhe"+str(p)+"_top2.csv")
            create_dir(result_path)
            top2_index_df.to_csv(top2_path, index=False)
            result.to_csv(result_path, index_label='num')
            k_num=k_num+1

cuda:0



====start to train water.csv====
the number of train samples is: 1918
the number of test samples is: 214


100%|██████████| 2000/2000 [24:36<00:00,  1.35it/s]



Train all epochs.

 Best Model loss_total: -29.294102, loss1: 0.001485,loss2: 0.000076,loss3: 0.002553,loss4: 0.000012

theta1:Parameter containing:
tensor([0.0371], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0089], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0588], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0057], device='cuda:0', requires_grad=True)
=====
[0.32961002, 0.47275347, 0.0036495605, 0.0005778504]
file:water.csv, rmse:[0.03280375092950757, 0.013047081659623335, 0.05777596965938221, 0.004285358223240122], r2:[0.9965161481343433, 0.9007699746436992, 0.995728878474396, 0.9932249258287632],mae:[0.020280296, 0.007920684, 0.0396264, 0.0031060837],mape:[0.27887642, 0.90850466, 0.004536635, 0.00074301305]
the number of train samples is: 1918
the number of test samples is: 214


100%|██████████| 2000/2000 [26:51<00:00,  1.24it/s]



Train all epochs.

 Best Model loss_total: -28.014139, loss1: 0.000944,loss2: 0.000092,loss3: 0.003540,loss4: 0.000030

theta1:Parameter containing:
tensor([0.0372], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0107], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0690], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0063], device='cuda:0', requires_grad=True)
=====
[0.26668543, 0.5380871, 0.003735213, 0.00055758207]
file:water.csv, rmse:[0.03186717453608211, 0.017474146783964254, 0.06029438045876843, 0.004216208066401792], r2:[0.9969052555620014, 0.854898691472422, 0.9957685245345217, 0.994033830293547],mae:[0.02191749, 0.0077379085, 0.042654198, 0.0030905085],mape:[0.39893535, 0.6373543, 0.004804199, 0.0007370333]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [27:14<00:00,  1.22it/s]



Train all epochs.

 Best Model loss_total: -28.877173, loss1: 0.001121,loss2: 0.000078,loss3: 0.002484,loss4: 0.000017

theta1:Parameter containing:
tensor([0.0418], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0090], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0624], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0077], device='cuda:0', requires_grad=True)
=====
[0.27913174, 0.6551289, 0.0038911535, 0.0006146606]
file:water.csv, rmse:[0.028570972212238553, 0.016562659950541136, 0.05252556388130503, 0.004006635848326131], r2:[0.9957787374090395, 0.8650851086184264, 0.9956100802089927, 0.9940532244751963],mae:[0.019989505, 0.008114905, 0.0399965, 0.003056224],mape:[0.63559574, 0.97549546, 0.004464927, 0.00073054223]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [27:08<00:00,  1.23it/s]



Train all epochs.

 Best Model loss_total: -27.484084, loss1: 0.001276,loss2: 0.000112,loss3: 0.004723,loss4: 0.000025

theta1:Parameter containing:
tensor([0.0469], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0112], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0789], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0054], device='cuda:0', requires_grad=True)
=====
[0.41795158, 0.5439077, 0.0044410783, 0.0006372367]
file:water.csv, rmse:[0.03457030118521812, 0.01451088426820007, 0.07185651992522525, 0.00427992607080291], r2:[0.9971302747261256, 0.8907758064550552, 0.9944490038645547, 0.9935343271497422],mae:[0.02516109, 0.009033511, 0.04939863, 0.0032389355],mape:[0.44087785, 1.3257716, 0.0055283387, 0.00077372364]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [27:12<00:00,  1.22it/s]



Train all epochs.

 Best Model loss_total: -28.871661, loss1: 0.000814,loss2: 0.000108,loss3: 0.003444,loss4: 0.000015

theta1:Parameter containing:
tensor([0.0408], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0099], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0718], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0065], device='cuda:0', requires_grad=True)
=====
[0.33578807, 0.7565616, 0.0038534473, 0.0011522378]
file:water.csv, rmse:[0.05598416228257953, 0.008963314540977102, 0.07934234025137764, 0.006396887046300656], r2:[0.9922384753260668, 0.9554011208533201, 0.9932721139844665, 0.9859563248670881],mae:[0.030147338, 0.0064619617, 0.04422998, 0.0053490605],mape:[0.29953608, 0.56343406, 0.0055752886, 0.0012801242]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [27:20<00:00,  1.22it/s]



Train all epochs.

 Best Model loss_total: -27.814653, loss1: 0.001115,loss2: 0.000103,loss3: 0.004278,loss4: 0.000028

theta1:Parameter containing:
tensor([0.0488], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0108], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0799], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0068], device='cuda:0', requires_grad=True)
=====
[0.35143825, 0.6935287, 0.0036947923, 0.0007763498]
file:water.csv, rmse:[0.02756357704022646, 0.018132877658571196, 0.05992176783277336, 0.005060426158246904], r2:[0.9973665230676885, 0.8369263975885868, 0.9949837091979318, 0.9895999516651861],mae:[0.021064507, 0.009345297, 0.04374682, 0.0038562738],mape:[0.28950146, 0.8931777, 0.0049129953, 0.0009197608]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [27:17<00:00,  1.22it/s]



Train all epochs.

 Best Model loss_total: -28.393531, loss1: 0.001290,loss2: 0.000084,loss3: 0.003818,loss4: 0.000018

theta1:Parameter containing:
tensor([0.0378], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0101], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0710], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0044], device='cuda:0', requires_grad=True)
=====
[0.2661451, 0.52514064, 0.0040533086, 0.0005536595]
file:water.csv, rmse:[0.038916145274234704, 0.011941249160759709, 0.06798389035987873, 0.0045662744892402095], r2:[0.9950419766005235, 0.9271759486695619, 0.994530484606452, 0.9932553230115887],mae:[0.021992864, 0.0075284163, 0.048265297, 0.0032266318],mape:[0.3151707, 1.0166867, 0.005380523, 0.00077118014]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [26:32<00:00,  1.26it/s]



Train all epochs.

 Best Model loss_total: -28.156098, loss1: 0.001646,loss2: 0.000074,loss3: 0.003240,loss4: 0.000024

theta1:Parameter containing:
tensor([0.0384], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0097], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0686], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0076], device='cuda:0', requires_grad=True)
=====
[0.25624084, 0.7602085, 0.003886282, 0.0007570477]
file:water.csv, rmse:[0.029625765159967527, 0.014420393763007695, 0.060524817263523524, 0.00516332855863331], r2:[0.9972235796340042, 0.8853052428080684, 0.9957650653922899, 0.9913224591040648],mae:[0.01911261, 0.009824831, 0.047288332, 0.0036706075],mape:[0.44863153, 0.63632, 0.005300734, 0.00087772496]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [26:12<00:00,  1.27it/s]



Train all epochs.

 Best Model loss_total: -27.925621, loss1: 0.001230,loss2: 0.000086,loss3: 0.004221,loss4: 0.000025

theta1:Parameter containing:
tensor([0.0415], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0135], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0695], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0063], device='cuda:0', requires_grad=True)
=====
[0.45232725, 0.63105863, 0.004369393, 0.00084178493]
file:water.csv, rmse:[0.02544693479879943, 0.011976259531979522, 0.055801744175694966, 0.005367915645118721], r2:[0.997912854086892, 0.906645587759383, 0.996288010313911, 0.9907687555931985],mae:[0.019334996, 0.007920343, 0.0418643, 0.0039965],mape:[0.3085487, 0.49150142, 0.0045904536, 0.0009539849]
the number of train samples is: 1919
the number of test samples is: 213


100%|██████████| 2000/2000 [25:35<00:00,  1.30it/s]



Train all epochs.

 Best Model loss_total: -28.948844, loss1: 0.000826,loss2: 0.000087,loss3: 0.002934,loss4: 0.000017

theta1:Parameter containing:
tensor([0.0470], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0099], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0663], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0054], device='cuda:0', requires_grad=True)
=====
[0.30930576, 0.56862265, 0.0036748652, 0.00073328795]
file:water.csv, rmse:[0.03335435520770002, 0.017270178117643836, 0.05147190117989637, 0.004893785306817185], r2:[0.9939872930206283, 0.8521371984356017, 0.9956613580259717, 0.990659234664882],mae:[0.021186883, 0.0075232806, 0.03749236, 0.0038765764],mape:[0.25215164, 0.47816768, 0.0041269944, 0.0009296026]
the number of train samples is: 1918
the number of test samples is: 214


100%|██████████| 2000/2000 [25:20<00:00,  1.32it/s]



Train all epochs.

 Best Model loss_total: -29.755248, loss1: 0.000905,loss2: 0.000065,loss3: 0.002224,loss4: 0.000014

theta1:Parameter containing:
tensor([0.0314], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0079], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0524], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0041], device='cuda:0', requires_grad=True)
=====
[0.26088402, 0.36232772, 0.002230137, 0.0018480435]
file:water.csv, rmse:[0.03359744369083452, 0.014106726246674228, 0.05100604196961667, 0.008437545543620587], r2:[0.9963455236771215, 0.8839971097577115, 0.9966711767643444, 0.9737353226257744],mae:[0.019980127, 0.007886667, 0.03391272, 0.007814086],mape:[0.38678706, 0.66259825, 0.003749556, 0.0018761952]
the number of train samples is: 1918
the number of test samples is: 214


100%|██████████| 2000/2000 [25:28<00:00,  1.31it/s]



Train all epochs.

 Best Model loss_total: -28.296567, loss1: 0.001233,loss2: 0.000099,loss3: 0.003451,loss4: 0.000018

theta1:Parameter containing:
tensor([0.0385], device='cuda:0', requires_grad=True),theta2:Parameter containing:
tensor([0.0092], device='cuda:0', requires_grad=True),theta3:Parameter containing:
tensor([0.0684], device='cuda:0', requires_grad=True),theta4:Parameter containing:
tensor([0.0086], device='cuda:0', requires_grad=True)
=====
[0.29591474, 0.7160155, 0.0035563156, 0.00063792383]
file:water.csv, rmse:[0.02962153078919131, 0.018698853001614554, 0.05269557134523719, 0.004177574452273113], r2:[0.9973260539152277, 0.8338465919838003, 0.99676788795024, 0.9941426673140425],mae:[0.019672222, 0.008572657, 0.038760547, 0.0032111395],mape:[0.38325432, 0.5063325, 0.0043758187, 0.0007668497]
the number of train samples is: 1919
the number of test samples is: 213


 58%|█████▊    | 1161/2000 [14:53<10:55,  1.28it/s]