<a href="https://colab.research.google.com/github/chuanxiangpei/MyProject/blob/master/2022_CCF_BDCI_%E8%BF%94%E4%B9%A1%E5%8F%91%E5%B1%95%E4%BA%BA%E7%BE%A4%E9%A2%84%E6%B5%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

path = "/content/drive/MyDrive/data"

os.chdir(path)
os.listdir(path)

print()
!ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

dataA.csv  dataTrain.csv  models  pred.csv


1. 导入包

In [None]:
# -*- coding: utf-8 -*-
# Numerical Operations
import math
import numpy as np

# Reading/Writing Data
import pandas as pd
import os
import csv

# For Progress Bar
from tqdm import tqdm

# Pytorch
import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

import torch.nn as nn
import torchvision.transforms as transforms

# sklearn package
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import  SelectFromModel

2. 超参数

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 5201314,      # Your seed number, you can pick your lucky number. 
    # 'select_all': True,   # Whether to use all features.
    'valid_ratio': 0.2,   # valid_size = train_size * valid_ratio
    'test_ratio': 0.2,
    'n_epochs': 100,     # Number of epochs.            
    'batch_size': 128, 
    'learning_rate': 1e-3,              
    'hidden_layers': 10, 
    'hidden_dim': 512,
    'early_stop': 50,    # If model has not improved for this many consecutive epochs, stop training.     
    'save_path': './models/model.ckpt'  # Your model will be saved here.
}

3. 随机性

In [None]:
def same_seed(seed):
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

4. 数据封装

In [None]:
class MyDataset(Dataset):
  def __init__(self, x, y=None):
    self.x = torch.FloatTensor(x)
    if y is None:
      self.y = y
    else:
      self.y = torch.FloatTensor(y)

  def __getitem__(self, index):
    if self.y is None:
      return self.x[index]
    else:
      return self.x[index], self.y[index]

  def __len__(self):
    return len(self.x)

5. 模型设计

In [None]:
class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BasicBlock, self).__init__()

        self.block = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.ReLU(),
            nn.BatchNorm1d(output_dim),
            nn.Dropout(0.2)   # 正则
        )

    def forward(self, x):
        x = self.block(x)
        return x


class MyModel(nn.Module):
    def __init__(self, input_dim, output_dim=2, hidden_layers=3, hidden_dim=200):
        super(MyModel, self).__init__()

        self.fc = nn.Sequential(
            BasicBlock(input_dim, hidden_dim),
            *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)],
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        x = self.fc(x)
        return x

In [None]:
np.set_printoptions(threshold=np.inf) 
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)

6. 数据导入与数据预处理

In [None]:
# type: DataFrame
# test_data_df = pd.read_csv('dataA.csv')
train_data_df = pd.read_csv('dataTrain.csv')

# print(f"""train_data df:{train_data_df.shape}
#       test_data df:{test_data_df.shape}""")

# transform column(f3) to digits
f3_mapping = {'low':0, 'mid':1, 'high':2}
train_data_df['f3'] = train_data_df['f3'].map(f3_mapping)
# test_data_df['f3'] = test_data_df['f3'].map(f3_mapping)


# Return a Numpy representation of the DataFrame.
train_data = train_data_df.values
# test_data = test_data_df.values

7. 特征提取及数据封装

In [None]:
same_seed(config['seed'])


# Data cleansing
# Removing interfering data
train_data = train_data[:50000]


# spilit train_data to train_data and valid_data
# train_data, valid_data = train_valid_split(train_data, config['train_ratio'], config['seed'])

X_train, X_test, y_train, y_test = train_test_split(train_data[:, 1:-1], train_data[:, -1], test_size=config['test_ratio'], random_state=config['seed'], stratify=train_data[:, -1])
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=config['valid_ratio'], random_state=config['seed'], stratify=y_train)

print(f"""X_train size:{X_train.shape}
      X_valid size:{X_valid.shape}
      X_test size:{X_test.shape}""")

# 选取某些特征
# 基于哪个模型
etc = ExtraTreesClassifier()

# 用这个模型训练数据
etc_ = etc.fit(X_train, y_train)

# 得到各个特征的重要性参数
feature_importances = etc_.feature_importances_

# 列表形式得到n个相对重要性参数的序号
idx = np.argsort(feature_importances)

# 根据模型选择特征
select_from_model = SelectFromModel(etc_, prefit=True)

# 将x_train进行transform
X_SFM_ETC = select_from_model.transform(X_train)

# 得到feature_idx
feature_idx = idx[-X_SFM_ETC.shape[1]:]
print(f'feature_idx:{feature_idx}')


X_train, X_valid, X_test = X_train[:,feature_idx], X_valid[:,feature_idx], X_test[:,feature_idx]

# x_train, x_valid, x_test, y_train, y_valid = select_feature(train_data, valid_data, test_data, config['select_all'])
print(f'number of features:{X_train.shape[1]}')
print(f"""X_train size:{X_train.shape}
      X_valid size:{X_valid.shape}
      X_test size:{X_test.shape}
      y_train size:{y_train.shape}
      y_valid size:{y_valid.shape}""")


train_dataset = MyDataset(X_train, y_train)

valid_dataset = MyDataset(X_valid, y_valid)

test_dataset = MyDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)


X_train size:(32000, 46)
      X_valid size:(8000, 46)
      X_test size:(10000, 46)
feature_idx:[42 23 43 22 36 18 24 25 44 45  0  1  4  5  3]
number of features:15
X_train size:(32000, 15)
      X_valid size:(8000, 15)
      X_test size:(10000, 15)
      y_train size:(32000,)
      y_valid size:(8000,)


8. 模型训练

In [None]:
model = MyModel(input_dim=X_train.shape[1], hidden_layers=config['hidden_layers'], hidden_dim=config['hidden_dim']).to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9, weight_decay=1e-5)
optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
scheduler = CosineAnnealingWarmRestarts(optimizer,T_0=5,T_mult=1)  # T_mult=2时效果不好，准确率不高

if not os.path.isdir('./models'):
    os.mkdir('./models')

n_epochs = config['n_epochs']
best_acc = 0.0
early_stop_count = 0

for epoch in range(n_epochs):
    # train
    model.train()
    loss_record = []
    acc_record = []
    train_pbar = tqdm(train_loader)
    for X, y in train_pbar:
      optimizer.zero_grad()
      X, y = X.to(device), y.to(device)
      pred = model(X)
      loss = criterion(pred, y.long())  # y.long() is equivalent to self.to(torch.int64)
      loss.backward()
      
      # Clip the gradient norms for stable training.
      grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
      
      optimizer.step()
      scheduler.step()   
      
      loss_record.append(loss.detach().item())
      train_pbar.set_description(f'Epoch  [{epoch+1}/{n_epochs}]')
      train_pbar.set_postfix({'loss': loss.detach().item()})
   
      _, pred_max_index = torch.max(pred.data, dim=1) 
      acc = (pred_max_index.detach() == y.detach()).float().mean().item() # before add .item():<class 'torch.Tensor'>    after add .item():<class 'float'>
      acc_record.append(acc)

    mean_train_loss = sum(loss_record) / len(loss_record)
    # torch.argmax(dim=0):column, torch.argmax(dim=1):row
    mean_train_acc = sum(acc_record) / len(acc_record)
    
    print(f'Epoch  [{epoch+1} / {n_epochs}]:  Train Loss:{mean_train_loss:.4f}, Train Acc:{mean_train_acc}')

    
    # valid
    model.eval()
    valid_loss_record = []
    valid_acc_record = []
    for X, y in valid_loader:
      X, y = X.to(device), y.to(device)
      with torch.no_grad():
        pred = model(X)
      loss = criterion(pred, y.long())
      valid_loss_record.append(loss.item())
    
      _, pred_max_index = torch.max(pred, dim=1) 
      acc = (pred_max_index.detach() == y.detach()).float().mean().item()
      valid_acc_record.append(acc)

    mean_valid_loss = sum(valid_loss_record) / len(valid_loss_record)
    mean_valid_acc = sum(valid_acc_record) / len(valid_acc_record)
    
    
    print(f'Epoch  [{epoch+1} / {n_epochs}]:  Valid Loss:{mean_valid_loss:.4f}, Valid Acc:{mean_valid_acc}')
    
    if mean_valid_acc > best_acc:
      best_acc = mean_valid_acc
      torch.save(model.state_dict(), config['save_path'])
      print(f'Saving model with acc {best_acc:.3f}, Best model found at epoch {epoch}')
      early_stop_count = 0
    else:
      early_stop_count += 1
      if early_stop_count >= config['early_stop']:
          print('Model is not improving, so we halt the traing session')
          break



Epoch  [1/100]: 100%|██████████| 250/250 [00:21<00:00, 11.69it/s, loss=0.546]


Epoch  [1 / 100]:  Train Loss:0.5237, Train Acc:0.76128125
Epoch  [1 / 100]:  Valid Loss:0.4088, Valid Acc:0.8328373015873016
Saving model with acc 0.833, Best model found at epoch 0


Epoch  [2/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.352]


Epoch  [2 / 100]:  Train Loss:0.4118, Train Acc:0.82853125
Epoch  [2 / 100]:  Valid Loss:0.3706, Valid Acc:0.8509424603174603
Saving model with acc 0.851, Best model found at epoch 1


Epoch  [3/100]: 100%|██████████| 250/250 [00:20<00:00, 12.24it/s, loss=0.374]


Epoch  [3 / 100]:  Train Loss:0.3908, Train Acc:0.840875
Epoch  [3 / 100]:  Valid Loss:0.3653, Valid Acc:0.8559027777777778
Saving model with acc 0.856, Best model found at epoch 2


Epoch  [4/100]: 100%|██████████| 250/250 [00:20<00:00, 12.24it/s, loss=0.386]


Epoch  [4 / 100]:  Train Loss:0.3800, Train Acc:0.84840625
Epoch  [4 / 100]:  Valid Loss:0.3590, Valid Acc:0.8560267857142857
Saving model with acc 0.856, Best model found at epoch 3


Epoch  [5/100]: 100%|██████████| 250/250 [00:20<00:00, 12.24it/s, loss=0.3]


Epoch  [5 / 100]:  Train Loss:0.3738, Train Acc:0.8513125
Epoch  [5 / 100]:  Valid Loss:0.3499, Valid Acc:0.8607390873015873
Saving model with acc 0.861, Best model found at epoch 4


Epoch  [6/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.357]


Epoch  [6 / 100]:  Train Loss:0.3704, Train Acc:0.85203125
Epoch  [6 / 100]:  Valid Loss:0.3478, Valid Acc:0.8602430555555556


Epoch  [7/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.404]


Epoch  [7 / 100]:  Train Loss:0.3654, Train Acc:0.8525625
Epoch  [7 / 100]:  Valid Loss:0.3493, Valid Acc:0.861359126984127
Saving model with acc 0.861, Best model found at epoch 6


Epoch  [8/100]: 100%|██████████| 250/250 [00:20<00:00, 12.29it/s, loss=0.339]


Epoch  [8 / 100]:  Train Loss:0.3601, Train Acc:0.85421875
Epoch  [8 / 100]:  Valid Loss:0.3455, Valid Acc:0.8612351190476191


Epoch  [9/100]: 100%|██████████| 250/250 [00:20<00:00, 12.30it/s, loss=0.327]


Epoch  [9 / 100]:  Train Loss:0.3544, Train Acc:0.85509375
Epoch  [9 / 100]:  Valid Loss:0.3393, Valid Acc:0.8637152777777778
Saving model with acc 0.864, Best model found at epoch 8


Epoch  [10/100]: 100%|██████████| 250/250 [00:20<00:00, 12.33it/s, loss=0.381]


Epoch  [10 / 100]:  Train Loss:0.3510, Train Acc:0.8566875
Epoch  [10 / 100]:  Valid Loss:0.3372, Valid Acc:0.8634672619047619


Epoch  [11/100]: 100%|██████████| 250/250 [00:20<00:00, 12.35it/s, loss=0.379]


Epoch  [11 / 100]:  Train Loss:0.3477, Train Acc:0.85671875
Epoch  [11 / 100]:  Valid Loss:0.3337, Valid Acc:0.8638392857142857
Saving model with acc 0.864, Best model found at epoch 10


Epoch  [12/100]: 100%|██████████| 250/250 [00:20<00:00, 12.28it/s, loss=0.313]


Epoch  [12 / 100]:  Train Loss:0.3476, Train Acc:0.8569375
Epoch  [12 / 100]:  Valid Loss:0.3294, Valid Acc:0.8648313492063492
Saving model with acc 0.865, Best model found at epoch 11


Epoch  [13/100]: 100%|██████████| 250/250 [00:20<00:00, 12.29it/s, loss=0.379]


Epoch  [13 / 100]:  Train Loss:0.3418, Train Acc:0.8580625
Epoch  [13 / 100]:  Valid Loss:0.3314, Valid Acc:0.8649553571428571
Saving model with acc 0.865, Best model found at epoch 12


Epoch  [14/100]: 100%|██████████| 250/250 [00:20<00:00, 12.29it/s, loss=0.335]


Epoch  [14 / 100]:  Train Loss:0.3412, Train Acc:0.857125
Epoch  [14 / 100]:  Valid Loss:0.3348, Valid Acc:0.8628472222222222


Epoch  [15/100]: 100%|██████████| 250/250 [00:20<00:00, 12.29it/s, loss=0.323]


Epoch  [15 / 100]:  Train Loss:0.3414, Train Acc:0.858125
Epoch  [15 / 100]:  Valid Loss:0.3318, Valid Acc:0.865203373015873
Saving model with acc 0.865, Best model found at epoch 14


Epoch  [16/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.429]


Epoch  [16 / 100]:  Train Loss:0.3383, Train Acc:0.8591875
Epoch  [16 / 100]:  Valid Loss:0.3385, Valid Acc:0.8591269841269841


Epoch  [17/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.277]


Epoch  [17 / 100]:  Train Loss:0.3369, Train Acc:0.8600625
Epoch  [17 / 100]:  Valid Loss:0.3281, Valid Acc:0.8618551587301587


Epoch  [18/100]: 100%|██████████| 250/250 [00:20<00:00, 12.30it/s, loss=0.301]


Epoch  [18 / 100]:  Train Loss:0.3363, Train Acc:0.859
Epoch  [18 / 100]:  Valid Loss:0.3250, Valid Acc:0.8637152777777778


Epoch  [19/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.365]


Epoch  [19 / 100]:  Train Loss:0.3359, Train Acc:0.85978125
Epoch  [19 / 100]:  Valid Loss:0.3296, Valid Acc:0.861359126984127


Epoch  [20/100]: 100%|██████████| 250/250 [00:20<00:00, 12.23it/s, loss=0.444]


Epoch  [20 / 100]:  Train Loss:0.3349, Train Acc:0.85840625
Epoch  [20 / 100]:  Valid Loss:0.3297, Valid Acc:0.8649553571428571


Epoch  [21/100]: 100%|██████████| 250/250 [00:20<00:00, 12.32it/s, loss=0.315]


Epoch  [21 / 100]:  Train Loss:0.3325, Train Acc:0.8583125
Epoch  [21 / 100]:  Valid Loss:0.3296, Valid Acc:0.8638392857142857


Epoch  [22/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.311]


Epoch  [22 / 100]:  Train Loss:0.3338, Train Acc:0.8586875
Epoch  [22 / 100]:  Valid Loss:0.3272, Valid Acc:0.8638392857142857


Epoch  [23/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.335]


Epoch  [23 / 100]:  Train Loss:0.3340, Train Acc:0.860625
Epoch  [23 / 100]:  Valid Loss:0.3250, Valid Acc:0.8666914682539683
Saving model with acc 0.867, Best model found at epoch 22


Epoch  [24/100]: 100%|██████████| 250/250 [00:20<00:00, 12.30it/s, loss=0.298]


Epoch  [24 / 100]:  Train Loss:0.3303, Train Acc:0.8595
Epoch  [24 / 100]:  Valid Loss:0.3276, Valid Acc:0.8655753968253969


Epoch  [25/100]: 100%|██████████| 250/250 [00:20<00:00, 12.36it/s, loss=0.368]


Epoch  [25 / 100]:  Train Loss:0.3328, Train Acc:0.85975
Epoch  [25 / 100]:  Valid Loss:0.3301, Valid Acc:0.8648313492063492


Epoch  [26/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.302]


Epoch  [26 / 100]:  Train Loss:0.3304, Train Acc:0.8603125
Epoch  [26 / 100]:  Valid Loss:0.3424, Valid Acc:0.8647073412698413


Epoch  [27/100]: 100%|██████████| 250/250 [00:20<00:00, 12.32it/s, loss=0.354]


Epoch  [27 / 100]:  Train Loss:0.3297, Train Acc:0.86196875
Epoch  [27 / 100]:  Valid Loss:0.3350, Valid Acc:0.861359126984127


Epoch  [28/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.304]


Epoch  [28 / 100]:  Train Loss:0.3281, Train Acc:0.860875
Epoch  [28 / 100]:  Valid Loss:0.3339, Valid Acc:0.8660714285714286


Epoch  [29/100]: 100%|██████████| 250/250 [00:20<00:00, 12.32it/s, loss=0.355]


Epoch  [29 / 100]:  Train Loss:0.3294, Train Acc:0.8618125
Epoch  [29 / 100]:  Valid Loss:0.3478, Valid Acc:0.8649553571428571


Epoch  [30/100]: 100%|██████████| 250/250 [00:20<00:00, 12.30it/s, loss=0.254]


Epoch  [30 / 100]:  Train Loss:0.3260, Train Acc:0.86225
Epoch  [30 / 100]:  Valid Loss:0.3536, Valid Acc:0.8617311507936508


Epoch  [31/100]: 100%|██████████| 250/250 [00:20<00:00, 12.35it/s, loss=0.34]


Epoch  [31 / 100]:  Train Loss:0.3275, Train Acc:0.8609375
Epoch  [31 / 100]:  Valid Loss:0.3316, Valid Acc:0.8621031746031746


Epoch  [32/100]: 100%|██████████| 250/250 [00:20<00:00, 12.18it/s, loss=0.329]


Epoch  [32 / 100]:  Train Loss:0.3260, Train Acc:0.86128125
Epoch  [32 / 100]:  Valid Loss:0.3350, Valid Acc:0.8628472222222222


Epoch  [33/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.355]


Epoch  [33 / 100]:  Train Loss:0.3265, Train Acc:0.86328125
Epoch  [33 / 100]:  Valid Loss:0.3269, Valid Acc:0.8673115079365079
Saving model with acc 0.867, Best model found at epoch 32


Epoch  [34/100]: 100%|██████████| 250/250 [00:20<00:00, 12.21it/s, loss=0.354]


Epoch  [34 / 100]:  Train Loss:0.3227, Train Acc:0.86346875
Epoch  [34 / 100]:  Valid Loss:0.3306, Valid Acc:0.8634672619047619


Epoch  [35/100]: 100%|██████████| 250/250 [00:20<00:00, 12.25it/s, loss=0.384]


Epoch  [35 / 100]:  Train Loss:0.3235, Train Acc:0.86275
Epoch  [35 / 100]:  Valid Loss:0.3360, Valid Acc:0.8635912698412699


Epoch  [36/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.346]


Epoch  [36 / 100]:  Train Loss:0.3229, Train Acc:0.8645
Epoch  [36 / 100]:  Valid Loss:0.3541, Valid Acc:0.8647073412698413


Epoch  [37/100]: 100%|██████████| 250/250 [00:20<00:00, 12.22it/s, loss=0.381]


Epoch  [37 / 100]:  Train Loss:0.3241, Train Acc:0.86315625
Epoch  [37 / 100]:  Valid Loss:0.3426, Valid Acc:0.8656994047619048


Epoch  [38/100]: 100%|██████████| 250/250 [00:20<00:00, 12.26it/s, loss=0.35]


Epoch  [38 / 100]:  Train Loss:0.3200, Train Acc:0.8636875
Epoch  [38 / 100]:  Valid Loss:0.3346, Valid Acc:0.8655753968253969


Epoch  [39/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.398]


Epoch  [39 / 100]:  Train Loss:0.3223, Train Acc:0.86396875
Epoch  [39 / 100]:  Valid Loss:0.3268, Valid Acc:0.8669394841269841


Epoch  [40/100]: 100%|██████████| 250/250 [00:20<00:00, 12.31it/s, loss=0.377]


Epoch  [40 / 100]:  Train Loss:0.3217, Train Acc:0.864875
Epoch  [40 / 100]:  Valid Loss:0.3337, Valid Acc:0.8639632936507936


Epoch  [41/100]: 100%|██████████| 250/250 [00:20<00:00, 12.27it/s, loss=0.349]


Epoch  [41 / 100]:  Train Loss:0.3198, Train Acc:0.8625625
Epoch  [41 / 100]:  Valid Loss:0.3468, Valid Acc:0.8676835317460317
Saving model with acc 0.868, Best model found at epoch 40


Epoch  [42/100]: 100%|██████████| 250/250 [00:20<00:00, 12.19it/s, loss=0.334]


Epoch  [42 / 100]:  Train Loss:0.3201, Train Acc:0.86328125
Epoch  [42 / 100]:  Valid Loss:0.3320, Valid Acc:0.8629712301587301


Epoch  [43/100]: 100%|██████████| 250/250 [00:20<00:00, 12.25it/s, loss=0.276]


Epoch  [43 / 100]:  Train Loss:0.3192, Train Acc:0.86521875
Epoch  [43 / 100]:  Valid Loss:0.3491, Valid Acc:0.8671875


Epoch  [44/100]: 100%|██████████| 250/250 [00:20<00:00, 12.25it/s, loss=0.249]


Epoch  [44 / 100]:  Train Loss:0.3197, Train Acc:0.86425
Epoch  [44 / 100]:  Valid Loss:0.3320, Valid Acc:0.8687996031746031
Saving model with acc 0.869, Best model found at epoch 43


Epoch  [45/100]: 100%|██████████| 250/250 [00:20<00:00, 12.18it/s, loss=0.419]


Epoch  [45 / 100]:  Train Loss:0.3193, Train Acc:0.86290625
Epoch  [45 / 100]:  Valid Loss:0.3393, Valid Acc:0.8676835317460317


Epoch  [46/100]: 100%|██████████| 250/250 [00:20<00:00, 12.18it/s, loss=0.198]


Epoch  [46 / 100]:  Train Loss:0.3189, Train Acc:0.86428125
Epoch  [46 / 100]:  Valid Loss:0.3340, Valid Acc:0.8692956349206349
Saving model with acc 0.869, Best model found at epoch 45


Epoch  [47/100]: 100%|██████████| 250/250 [00:20<00:00, 12.21it/s, loss=0.325]


Epoch  [47 / 100]:  Train Loss:0.3140, Train Acc:0.8675
Epoch  [47 / 100]:  Valid Loss:0.3280, Valid Acc:0.8654513888888888


Epoch  [48/100]: 100%|██████████| 250/250 [00:20<00:00, 12.15it/s, loss=0.254]


Epoch  [48 / 100]:  Train Loss:0.3141, Train Acc:0.86684375
Epoch  [48 / 100]:  Valid Loss:0.3325, Valid Acc:0.865203373015873


Epoch  [49/100]: 100%|██████████| 250/250 [00:20<00:00, 12.17it/s, loss=0.389]


Epoch  [49 / 100]:  Train Loss:0.3131, Train Acc:0.865
Epoch  [49 / 100]:  Valid Loss:0.3299, Valid Acc:0.8665674603174603


Epoch  [50/100]: 100%|██████████| 250/250 [00:20<00:00, 12.13it/s, loss=0.263]


Epoch  [50 / 100]:  Train Loss:0.3147, Train Acc:0.8660625
Epoch  [50 / 100]:  Valid Loss:0.3559, Valid Acc:0.861359126984127


Epoch  [51/100]: 100%|██████████| 250/250 [00:20<00:00, 12.19it/s, loss=0.33]


Epoch  [51 / 100]:  Train Loss:0.3146, Train Acc:0.86590625
Epoch  [51 / 100]:  Valid Loss:0.3547, Valid Acc:0.8642113095238095


Epoch  [52/100]: 100%|██████████| 250/250 [00:20<00:00, 12.26it/s, loss=0.253]


Epoch  [52 / 100]:  Train Loss:0.3142, Train Acc:0.86665625
Epoch  [52 / 100]:  Valid Loss:0.3364, Valid Acc:0.8675595238095238


Epoch  [53/100]: 100%|██████████| 250/250 [00:20<00:00, 12.21it/s, loss=0.366]


Epoch  [53 / 100]:  Train Loss:0.3115, Train Acc:0.86859375
Epoch  [53 / 100]:  Valid Loss:0.3401, Valid Acc:0.8668154761904762


Epoch  [54/100]: 100%|██████████| 250/250 [00:20<00:00, 12.22it/s, loss=0.268]


Epoch  [54 / 100]:  Train Loss:0.3106, Train Acc:0.86746875
Epoch  [54 / 100]:  Valid Loss:0.3438, Valid Acc:0.8623511904761905


Epoch  [55/100]: 100%|██████████| 250/250 [00:20<00:00, 12.26it/s, loss=0.366]


Epoch  [55 / 100]:  Train Loss:0.3111, Train Acc:0.86590625
Epoch  [55 / 100]:  Valid Loss:0.3411, Valid Acc:0.8640873015873016


Epoch  [56/100]: 100%|██████████| 250/250 [00:20<00:00, 12.29it/s, loss=0.322]


Epoch  [56 / 100]:  Train Loss:0.3108, Train Acc:0.8669375
Epoch  [56 / 100]:  Valid Loss:0.3367, Valid Acc:0.8655753968253969


Epoch  [57/100]: 100%|██████████| 250/250 [00:20<00:00, 12.22it/s, loss=0.355]


Epoch  [57 / 100]:  Train Loss:0.3134, Train Acc:0.865625
Epoch  [57 / 100]:  Valid Loss:0.3317, Valid Acc:0.8670634920634921


Epoch  [58/100]: 100%|██████████| 250/250 [00:20<00:00, 12.23it/s, loss=0.316]


Epoch  [58 / 100]:  Train Loss:0.3090, Train Acc:0.86584375
Epoch  [58 / 100]:  Valid Loss:0.3468, Valid Acc:0.8674355158730159


Epoch  [59/100]: 100%|██████████| 250/250 [00:20<00:00, 12.19it/s, loss=0.357]


Epoch  [59 / 100]:  Train Loss:0.3074, Train Acc:0.86765625
Epoch  [59 / 100]:  Valid Loss:0.3688, Valid Acc:0.8656994047619048


Epoch  [60/100]: 100%|██████████| 250/250 [00:20<00:00, 12.16it/s, loss=0.251]


Epoch  [60 / 100]:  Train Loss:0.3072, Train Acc:0.86884375
Epoch  [60 / 100]:  Valid Loss:0.3337, Valid Acc:0.8664434523809523


Epoch  [61/100]: 100%|██████████| 250/250 [00:20<00:00, 12.23it/s, loss=0.273]


Epoch  [61 / 100]:  Train Loss:0.3057, Train Acc:0.8699375
Epoch  [61 / 100]:  Valid Loss:0.3384, Valid Acc:0.8653273809523809


Epoch  [62/100]: 100%|██████████| 250/250 [00:20<00:00, 12.23it/s, loss=0.358]


Epoch  [62 / 100]:  Train Loss:0.3084, Train Acc:0.8680625
Epoch  [62 / 100]:  Valid Loss:0.3341, Valid Acc:0.8669394841269841


Epoch  [63/100]: 100%|██████████| 250/250 [00:20<00:00, 12.20it/s, loss=0.284]


Epoch  [63 / 100]:  Train Loss:0.3052, Train Acc:0.87071875
Epoch  [63 / 100]:  Valid Loss:0.3520, Valid Acc:0.8658234126984127


Epoch  [64/100]: 100%|██████████| 250/250 [00:20<00:00, 12.20it/s, loss=0.316]


Epoch  [64 / 100]:  Train Loss:0.3054, Train Acc:0.86965625
Epoch  [64 / 100]:  Valid Loss:0.3446, Valid Acc:0.8649553571428571


Epoch  [65/100]: 100%|██████████| 250/250 [00:20<00:00, 12.24it/s, loss=0.347]


Epoch  [65 / 100]:  Train Loss:0.3049, Train Acc:0.86865625
Epoch  [65 / 100]:  Valid Loss:0.3387, Valid Acc:0.8647073412698413


Epoch  [66/100]: 100%|██████████| 250/250 [00:20<00:00, 12.19it/s, loss=0.355]


Epoch  [66 / 100]:  Train Loss:0.3041, Train Acc:0.86978125
Epoch  [66 / 100]:  Valid Loss:0.3402, Valid Acc:0.8686755952380952


Epoch  [67/100]: 100%|██████████| 250/250 [00:20<00:00, 12.21it/s, loss=0.282]


Epoch  [67 / 100]:  Train Loss:0.3009, Train Acc:0.8715625
Epoch  [67 / 100]:  Valid Loss:0.3578, Valid Acc:0.8664434523809523


Epoch  [68/100]: 100%|██████████| 250/250 [00:20<00:00, 12.20it/s, loss=0.323]


Epoch  [68 / 100]:  Train Loss:0.3032, Train Acc:0.86990625
Epoch  [68 / 100]:  Valid Loss:0.3365, Valid Acc:0.8634672619047619


Epoch  [69/100]: 100%|██████████| 250/250 [00:20<00:00, 12.22it/s, loss=0.275]


Epoch  [69 / 100]:  Train Loss:0.3018, Train Acc:0.87146875
Epoch  [69 / 100]:  Valid Loss:0.3484, Valid Acc:0.8655753968253969


Epoch  [70/100]: 100%|██████████| 250/250 [00:20<00:00, 12.21it/s, loss=0.218]


Epoch  [70 / 100]:  Train Loss:0.2975, Train Acc:0.8719375
Epoch  [70 / 100]:  Valid Loss:0.3716, Valid Acc:0.8647073412698413


Epoch  [71/100]: 100%|██████████| 250/250 [00:20<00:00, 12.13it/s, loss=0.287]


Epoch  [71 / 100]:  Train Loss:0.2997, Train Acc:0.8716875
Epoch  [71 / 100]:  Valid Loss:0.3433, Valid Acc:0.8603670634920635


Epoch  [72/100]: 100%|██████████| 250/250 [00:20<00:00, 12.12it/s, loss=0.324]


Epoch  [72 / 100]:  Train Loss:0.2973, Train Acc:0.87065625
Epoch  [72 / 100]:  Valid Loss:0.3641, Valid Acc:0.8668154761904762


Epoch  [73/100]: 100%|██████████| 250/250 [00:20<00:00, 12.03it/s, loss=0.29]


Epoch  [73 / 100]:  Train Loss:0.2980, Train Acc:0.873625
Epoch  [73 / 100]:  Valid Loss:0.3393, Valid Acc:0.8681795634920635


Epoch  [74/100]: 100%|██████████| 250/250 [00:20<00:00, 11.99it/s, loss=0.253]


Epoch  [74 / 100]:  Train Loss:0.2977, Train Acc:0.87190625
Epoch  [74 / 100]:  Valid Loss:0.3699, Valid Acc:0.8640873015873016


Epoch  [75/100]: 100%|██████████| 250/250 [00:20<00:00, 12.12it/s, loss=0.329]


Epoch  [75 / 100]:  Train Loss:0.2964, Train Acc:0.87190625
Epoch  [75 / 100]:  Valid Loss:0.3414, Valid Acc:0.8614831349206349


Epoch  [76/100]: 100%|██████████| 250/250 [00:20<00:00, 12.04it/s, loss=0.359]


Epoch  [76 / 100]:  Train Loss:0.2951, Train Acc:0.87303125
Epoch  [76 / 100]:  Valid Loss:0.3535, Valid Acc:0.8619791666666666


Epoch  [77/100]: 100%|██████████| 250/250 [00:20<00:00, 12.13it/s, loss=0.399]


Epoch  [77 / 100]:  Train Loss:0.2953, Train Acc:0.87246875
Epoch  [77 / 100]:  Valid Loss:0.3749, Valid Acc:0.8625992063492064


Epoch  [78/100]: 100%|██████████| 250/250 [00:20<00:00, 12.10it/s, loss=0.266]


Epoch  [78 / 100]:  Train Loss:0.2961, Train Acc:0.8751875
Epoch  [78 / 100]:  Valid Loss:0.3839, Valid Acc:0.8604910714285714


Epoch  [79/100]: 100%|██████████| 250/250 [00:20<00:00, 12.18it/s, loss=0.311]


Epoch  [79 / 100]:  Train Loss:0.2949, Train Acc:0.8749375
Epoch  [79 / 100]:  Valid Loss:0.3475, Valid Acc:0.8650793650793651


Epoch  [80/100]: 100%|██████████| 250/250 [00:20<00:00, 12.17it/s, loss=0.215]


Epoch  [80 / 100]:  Train Loss:0.2956, Train Acc:0.87484375
Epoch  [80 / 100]:  Valid Loss:0.3766, Valid Acc:0.8629712301587301


Epoch  [81/100]: 100%|██████████| 250/250 [00:20<00:00, 12.10it/s, loss=0.26]


Epoch  [81 / 100]:  Train Loss:0.2923, Train Acc:0.8743125
Epoch  [81 / 100]:  Valid Loss:0.3537, Valid Acc:0.8628472222222222


Epoch  [82/100]: 100%|██████████| 250/250 [00:20<00:00, 12.13it/s, loss=0.354]


Epoch  [82 / 100]:  Train Loss:0.2933, Train Acc:0.875375
Epoch  [82 / 100]:  Valid Loss:0.3604, Valid Acc:0.8616071428571429


Epoch  [83/100]: 100%|██████████| 250/250 [00:20<00:00, 12.15it/s, loss=0.352]


Epoch  [83 / 100]:  Train Loss:0.2904, Train Acc:0.87603125
Epoch  [83 / 100]:  Valid Loss:0.3527, Valid Acc:0.8639632936507936


Epoch  [84/100]: 100%|██████████| 250/250 [00:20<00:00, 12.11it/s, loss=0.345]


Epoch  [84 / 100]:  Train Loss:0.2898, Train Acc:0.87603125
Epoch  [84 / 100]:  Valid Loss:0.3476, Valid Acc:0.8638392857142857


Epoch  [85/100]: 100%|██████████| 250/250 [00:20<00:00, 12.10it/s, loss=0.339]


Epoch  [85 / 100]:  Train Loss:0.2890, Train Acc:0.87565625
Epoch  [85 / 100]:  Valid Loss:0.3792, Valid Acc:0.8624751984126984


Epoch  [86/100]: 100%|██████████| 250/250 [00:20<00:00, 12.20it/s, loss=0.281]


Epoch  [86 / 100]:  Train Loss:0.2894, Train Acc:0.87765625
Epoch  [86 / 100]:  Valid Loss:0.3436, Valid Acc:0.8612351190476191


Epoch  [87/100]: 100%|██████████| 250/250 [00:20<00:00, 12.03it/s, loss=0.298]


Epoch  [87 / 100]:  Train Loss:0.2883, Train Acc:0.87765625
Epoch  [87 / 100]:  Valid Loss:0.3540, Valid Acc:0.8611111111111112


Epoch  [88/100]: 100%|██████████| 250/250 [00:20<00:00, 12.07it/s, loss=0.347]


Epoch  [88 / 100]:  Train Loss:0.2876, Train Acc:0.87584375
Epoch  [88 / 100]:  Valid Loss:0.3530, Valid Acc:0.8567708333333334


Epoch  [89/100]: 100%|██████████| 250/250 [00:20<00:00, 12.14it/s, loss=0.299]


Epoch  [89 / 100]:  Train Loss:0.2863, Train Acc:0.8784375
Epoch  [89 / 100]:  Valid Loss:0.3416, Valid Acc:0.8627232142857143


Epoch  [90/100]: 100%|██████████| 250/250 [00:20<00:00, 12.10it/s, loss=0.217]


Epoch  [90 / 100]:  Train Loss:0.2840, Train Acc:0.8785625
Epoch  [90 / 100]:  Valid Loss:0.3569, Valid Acc:0.8639632936507936


Epoch  [91/100]: 100%|██████████| 250/250 [00:20<00:00, 12.11it/s, loss=0.254]


Epoch  [91 / 100]:  Train Loss:0.2835, Train Acc:0.8785625
Epoch  [91 / 100]:  Valid Loss:0.3526, Valid Acc:0.8587549603174603


Epoch  [92/100]: 100%|██████████| 250/250 [00:20<00:00, 12.08it/s, loss=0.388]


Epoch  [92 / 100]:  Train Loss:0.2843, Train Acc:0.87975
Epoch  [92 / 100]:  Valid Loss:0.3451, Valid Acc:0.863343253968254


Epoch  [93/100]: 100%|██████████| 250/250 [00:20<00:00, 12.06it/s, loss=0.241]


Epoch  [93 / 100]:  Train Loss:0.2818, Train Acc:0.87865625
Epoch  [93 / 100]:  Valid Loss:0.3596, Valid Acc:0.8647073412698413


Epoch  [94/100]: 100%|██████████| 250/250 [00:20<00:00, 12.09it/s, loss=0.293]


Epoch  [94 / 100]:  Train Loss:0.2836, Train Acc:0.877625
Epoch  [94 / 100]:  Valid Loss:0.3476, Valid Acc:0.8624751984126984


Epoch  [95/100]: 100%|██████████| 250/250 [00:20<00:00, 12.03it/s, loss=0.247]


Epoch  [95 / 100]:  Train Loss:0.2824, Train Acc:0.88015625
Epoch  [95 / 100]:  Valid Loss:0.3556, Valid Acc:0.8628472222222222


Epoch  [96/100]: 100%|██████████| 250/250 [00:20<00:00, 12.09it/s, loss=0.291]


Epoch  [96 / 100]:  Train Loss:0.2793, Train Acc:0.87965625
Epoch  [96 / 100]:  Valid Loss:0.3547, Valid Acc:0.8612351190476191
Model is not improving, so we halt the traing session


9. 对测试数据进行预测

In [None]:
def save_pred(preds, file):
  with open(file, 'w') as fp:
    writer = csv.writer(fp)
    writer.writerow(['id', 'label'])
    for i, p in enumerate(preds):
      writer.writerow([i, p])

In [None]:
model = MyModel(input_dim=X_train.shape[1], hidden_layers=config['hidden_layers'], hidden_dim=config['hidden_dim']).to(device)
model.load_state_dict(torch.load(config['save_path']))
model.eval()
test_acc_record = []
prediction = []
for X, y in test_loader:
  X = X.to(device)
  y = y.to(device)
  with torch.no_grad():
    pred = model(X)
  # torch.max:依次最大值和最大值的索引，dim=1（按列返回）
  _, idx_max = torch.max(pred.cpu().data, dim=1)
  acc = (idx_max.detach() == y.detach()).float().mean().item()
  test_acc_record.append(acc)  
  prediction.extend(idx_max.tolist())
mean_test_acc = sum(test_acc_record) / len(test_acc_record)
print(f'Epoch  [{epoch+1} / {n_epochs}]:  test Acc:{mean_test_acc}')

preds = np.array(prediction)
save_pred(preds, 'pred.csv')

Epoch  [96 / 100]:  test Acc:0.8581882911392406
