In [24]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import torch.utils.data as Data
import optuna
import optuna.trial
import os 

In [25]:
import pandas_profiling as ppf
data = pd.read_csv('./data/train_data.csv')
# ppf.ProfileReport(data)

In [26]:


# 分割训练集和测试集
from torch import seed


x = data.iloc[:, [0,2]].values
y = data.iloc[:, 1].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# 数据标准化
ss1 = preprocessing.StandardScaler()
x_train = ss1.fit_transform(x_train)
x_test = ss1.transform(x_test)
x_total=ss1.transform(x)


y_mean = y_train.mean()
y_std = y_train.std()
y_train = (y_train-y_mean)/y_std
y_test=(y_test-y_mean)/y_std

# numpy转tensor
x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).float().view(-1,1)
x_test = torch.from_numpy(x_test).float()
y_test = torch.from_numpy(y_test).float().view(-1,1)
x_total=torch.from_numpy(x_total).float()

# batch_size = 64
torch_dataset = Data.TensorDataset(x_train, y_train) 
loader = Data.DataLoader(dataset=torch_dataset,batch_size=len(x),
            shuffle=True) 

# print(next(iter(loader)))


In [27]:
class LR(nn.Module):

    def __init__(self,trial):
        super(LR, self).__init__()
        fc1_out=trial.suggest_int("fc1_out", 16, 128,16)
        self.fc1 = nn.Linear(2, fc1_out)
        fc2_out=trial.suggest_int("fc2_out",16,128,16)
        self.fc2 = nn.Linear(fc1_out, fc2_out)
        fc3_out=trial.suggest_int("fc3_out",16,128,16)
        self.fc3 = nn.Linear(fc2_out, fc3_out)
        self.fc4 = nn.Linear(fc3_out, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x=torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [28]:
epochs=4000

In [29]:
def objective(trial):
    net=LR(trial)
    criterion = nn.MSELoss()
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "Adadelta","Adagrad"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1,log=True)
    weight_decay=trial.suggest_float("weight_decay",1e-3,1e-1,log=True)
    optimizer = getattr(optim, optimizer_name)(net.parameters(), lr=lr,weight_decay=weight_decay) 

    epochs_list=[]
    train_loss_list=[]
    test_error_list=[]
    for e in range(epochs):
        epochs_list.append(e)
        net.train()
        for i, (batch_x, batch_y) in enumerate(loader):
            y_hat = net(batch_x)
    #         print(y_hat.shape)
            
            loss = criterion(y_hat, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        net.eval()
        with torch.no_grad():
            y_pred_train= net(x_train)
            loss = criterion(y_pred_train, y_train)
            train_loss_list.append(loss.item())
            
            y_pred_test = net(x_test)
            error = criterion(y_pred_test, y_test)
            test_error_list.append(error.item())
            # if (e+1)%50==0:
            #     print("Epoch:{}, trainLoss:{}，testLoss:{}".format(e+1,loss.item(),error.item()))

        trial.report(error,e)

    return error


In [39]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)
trial = study.best_trial
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2022-10-24 22:17:20,243][0m A new study created in memory with name: no-name-f68b4c8f-1da1-488e-aca5-a3784d3a587c[0m
[32m[I 2022-10-24 22:17:30,720][0m Trial 0 finished with value: 0.05747666954994202 and parameters: {'fc1_out': 32, 'fc2_out': 96, 'fc3_out': 64, 'optimizer': 'Adam', 'lr': 0.0007837054420502013, 'weight_decay': 0.0036147272908536687}. Best is trial 0 with value: 0.05747666954994202.[0m
[33m[W 2022-10-24 22:17:32,240][0m Trial 1 failed because of the following error: KeyboardInterrupt()[0m
Traceback (most recent call last):
  File "c:\Users\86189\anaconda3\envs\pythonProject\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\86189\AppData\Local\Temp\ipykernel_26900\3524078032.py", line 21, in objective
    loss.backward()
  File "c:\Users\86189\anaconda3\envs\pythonProject\lib\site-packages\torch\_tensor.py", line 396, in backward
    torch.autograd.backward(self, gradient, retain_graph

KeyboardInterrupt: 

NameError: name 'trial' is not defined

In [31]:
valid_data=pd.read_csv('./data/test_data.csv').values
valid_data_target=valid_data[:,[0,2]]
valid_data_y=valid_data[:,1]

In [32]:
import matplotlib.pyplot as plt
def valid(trial):
    net=LR(trial)
    criterion = nn.MSELoss()
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "Adadelta","Adagrad"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1,log=True)
    weight_decay=trial.suggest_float("weight_decay",1e-3,1e-1,log=True)
    optimizer = getattr(optim, optimizer_name)(net.parameters(), lr=lr,weight_decay=weight_decay) 

    epochs_list=[]
    train_loss_list=[]
    test_error_list=[]
    for e in range(epochs):
        epochs_list.append(e)
        net.train()
        for i, (batch_x, batch_y) in enumerate(loader):
            y_hat = net(batch_x)
    #         print(y_hat.shape)
            
            loss = criterion(y_hat, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        net.eval()
        with torch.no_grad():
            y_pred_train= net(x_train)
            loss = criterion(y_pred_train, y_train)
            train_loss_list.append(loss.item())
            
            y_pred_test = net(x_test)
            error = criterion(y_pred_test, y_test)
            test_error_list.append(error.item())
            if (e+1)%50==0:
                print("Epoch:{}, trainLoss:{}，testLoss:{}".format(e+1,loss.item(),error.item()))
    
    plt.figure()
    plt.subplot(1,2,1)
    plt.xlabel("Training epochs")
    plt.ylabel("error")
    plt.grid()
    plt.plot(epochs_list, train_loss_list, '.-', color="r",
                    label="train_loss_")
    plt.plot(epochs_list, test_error_list, '.-', color="g",
                    label="test_error")

    plt.legend(loc="best")
    
    net.eval()
    x_valid=ss1.transform(valid_data_target)
    x_valid = torch.from_numpy(x_valid).float()

    y_pred=net(x_total)
    y_pred=y_pred.data.numpy()
    y_pred=y_pred*y_std+y_mean

    y_pred_valid=net(x_valid)
    y_pred_valid=y_pred_valid.data.numpy()
    y_pred_valid=y_pred_valid*y_std+y_mean

    x_axis=data.iloc[:,0].values

    plt.figure(figsize=(15,8))
    plt.scatter(x_axis,y_pred,s=50,c="b")
    plt.scatter(x_axis,y,s=50,c="g")

    x_axis_valid=valid_data[:,0]
    plt.scatter(x_axis_valid,y_pred_valid,s=50,c="c")
    plt.scatter(x_axis_valid,valid_data_y,s=50,c="r")
    plt.grid()
    plt.xlim(0,2)
    plt.ylim(-13,-7)
    return None

In [34]:
valid(trial)

NameError: name 'trial' is not defined