In [6]:
import numpy as np
import torch
import pandas as pd
import re, os, sys
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('./winequality-red.csv')

In [3]:
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [4]:
X = data.drop(['quality'], axis=1)
y = data['quality']

In [5]:
print(X.shape, y.shape)

(1599, 11) (1599,)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [27]:
print(X_train.shape, X_test.shape)

(1199, 11) (400, 11)


In [29]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.x = torch.from_numpy(np.array(X)).float()
        self.y = torch.from_numpy(np.array(y)).float()
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, item):
        x = self.x[item]
        y = self.y[item]
        return x, y

In [30]:
train_set = MyDataset(X_train, y_train)
valid_set = MyDataset(X_test, y_test)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=128, shuffle=False, pin_memory=True)

In [33]:
class ResidualBlock(nn.Module):
    def __init__(self, input_units, output_units, residual_path):
        super().__init__()
        
        self.residual_path = residual_path
        
        self.features_1 = nn.Sequential(
            nn.Linear(input_units, output_units), nn.ReLU(inplace=True),
            nn.Linear(output_units, output_units), nn.ReLU(inplace=True)
        )
        
        if residual_path:
            self.features_2 = nn.Sequential(
            nn.Linear(input_units, output_units), nn.GELU(),
            nn.Linear(output_units, output_units), nn.GELU()
        )
        
    def forward(self, x):
        residual = x
        y = self.features_1(x)
        if self.residual_path:
            residual = self.features_2(x)
        output = y + residual
        return output
    
# 残差MLP 特征融合 鲁棒性更好 
class ResMLP(nn.Module):
    def __init__(self, initial_units, block_list, num_classes, initial_weights=True):
        super().__init__()
        
        self.initial_units = initial_units
        self.block_list = block_list
        
        self.layer1 = nn.Sequential(nn.Linear(initial_units, 32), 
                                    nn.ReLU(inplace=True), 
                                    nn.BatchNorm1d(num_features=32)
                        )
        self.layer2 = self._make_layer(block_num=self.block_list[0], input_units=32, output_units=64)
        self.layer3 = self._make_layer(block_num=self.block_list[1], input_units=64, output_units=128)
        self.layer4 = self._make_layer(block_num=self.block_list[2], input_units=128, output_units=256)
        self.b1 = nn.BatchNorm1d(num_features=256)
        self.out = nn.Linear(256, num_classes)
        
        if initial_weights:
            self._initialize_weights()
        
        
    def _make_layer(self, block_num, input_units, output_units):
        blocks = []
        blocks.append(ResidualBlock(input_units, output_units, residual_path=True))
        for _ in range(1, block_num):
            blocks.append(ResidualBlock(output_units, output_units, residual_path=False))
        
        return nn.Sequential(*blocks)
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.b1(x)
        y = self.out(x).squeeze(1)
        return y

In [34]:
def train_step(net, optimizer, data_loader, device, epoch, scalar=None):
    net.train()
    loss_function = nn.MSELoss()
    train_loss, sampleNum = 0, 0
    optimizer.zero_grad()

    train_bar = tqdm(data_loader, file=sys.stdout, colour='red')
    for step, data in enumerate(train_bar):
        images, labels = data
        sampleNum += images.shape[0]  # batch
        
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        if scalar is not None:
            with torch.cuda.amp.autocast():
                outputs = net(images)
                loss = loss_function(outputs, labels)
        else:
            outputs = net(images)
            loss = loss_function(outputs, labels)
        train_loss += (loss.item() ** 0.5)
        # loss.backward()
        # optimizer.step()

        if scalar is not None:
            scalar.scale(loss).backward()
            scalar.step(optimizer)
            scalar.update()
        else:
            loss.backward()
            optimizer.step()
        train_bar.desc = "[train epoch {}] RMSE loss: {:.3f}".format(epoch, train_loss / (step + 1))

    return train_loss / (step + 1)


@torch.no_grad()
def val_step(net, data_loader, device, epoch):
    loss_function = nn.MSELoss()
    net.eval()
    val_loss = 0
    sample_num = 0
    val_bar = tqdm(data_loader, file=sys.stdout, colour='red')
    for step, data in enumerate(val_bar):
        images, labels = data
        sample_num += images.shape[0]
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        loss = loss_function(outputs, labels)
        val_loss += (loss.item() ** 0.5)
        val_bar.desc = "[valid epoch {}] RMSE loss: {:.3f}".format(epoch, val_loss / (step + 1))

    return val_loss / (step + 1)

In [37]:
epochs = 30
model = ResMLP(11, [2, 2, 2], 1).to(device)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
lr = 1e-3
weight_decay = 1e-5
optimizer=torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=5)
best_loss = 10000
scalar = torch.cuda.amp.GradScaler()

In [38]:
for epoch in range(epochs):
        # train
    train_loss = train_step(net=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch, scalar=scalar)

        # validate
    val_loss = val_step(net=model, data_loader=valid_loader, device=device, epoch=epoch)

    lr_scheduler.step()
    
    if val_loss < best_loss:
        best_loss = val_loss

[train epoch 0] RMSE loss: 5.690: 100%|[31m████████████████████████████████████████████████[0m| 10/10 [00:03<00:00,  2.93it/s][0m
[valid epoch 0] RMSE loss: 5.651: 100%|[31m█████████████████████████████████████████████████[0m| 4/4 [00:00<00:00, 147.15it/s][0m
[train epoch 1] RMSE loss: 5.642: 100%|[31m████████████████████████████████████████████████[0m| 10/10 [00:00<00:00, 87.60it/s][0m
[valid epoch 1] RMSE loss: 5.596: 100%|[31m█████████████████████████████████████████████████[0m| 4/4 [00:00<00:00, 166.45it/s][0m
[train epoch 2] RMSE loss: 5.569: 100%|[31m███████████████████████████████████████████████[0m| 10/10 [00:00<00:00, 111.56it/s][0m
[valid epoch 2] RMSE loss: 5.517: 100%|[31m█████████████████████████████████████████████████[0m| 4/4 [00:00<00:00, 206.07it/s][0m
[train epoch 3] RMSE loss: 5.475: 100%|[31m███████████████████████████████████████████████[0m| 10/10 [00:00<00:00, 108.90it/s][0m
[valid epoch 3] RMSE loss: 5.468: 100%|[31m█████████████████████████

In [39]:
best_loss

0.7282637381691834