In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# 設定 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
red_train = pd.read_csv("data/winequality-red_train.csv")
white_train = pd.read_csv("data/winequality-white_train.csv")
data = pd.concat([red_train, white_train], ignore_index=True)

selected_features = True

print("選取特徵後的資料形狀：", data.shape)
print(data.head())

# 分割特徵與標籤
X = data.iloc[:, :-1].values  # 特徵
y = data.iloc[:, -1].values   # 標籤 (Wine Quality)

# 標準化數據
scaler = StandardScaler()
X = scaler.fit_transform(X)


選取特徵後的資料形狀： (5198, 12)
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            6.9             0.670         0.06             2.1      0.080   
1            7.5             0.530         0.06             2.6      0.086   
2           11.1             0.180         0.48             1.5      0.068   
3            8.3             0.705         0.12             2.6      0.092   
4            7.4             0.670         0.12             1.6      0.186   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                  8.0                  33.0  0.99845  3.68       0.71   
1                 20.0                  44.0  0.99650  3.38       0.59   
2                  7.0                  15.0  0.99730  3.22       0.64   
3                 12.0                  28.0  0.99940  3.51       0.72   
4                  5.0                  21.0  0.99600  3.39       0.54   

   alcohol  quality  
0      9.6        5  
1     10.7        6

In [6]:
# 切分訓練集與測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 轉換為 PyTorch Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# 創建 DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# 定義神經網絡模型
class WineQualityNN(nn.Module):
    def __init__(self, input_dim):
        super(WineQualityNN, self).__init__()
        #### TODO
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(32, 11)  # Wine quality 範圍為 0-10
        #### TODO

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [10]:
# 初始化模型
input_dim = X.shape[1]
model = WineQualityNN(input_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [11]:
# 訓練模型
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")


In [12]:
# 測試模型
def test_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

In [13]:
# 執行訓練與測試
train_model(model, train_loader, criterion, optimizer, epochs=100)
test_model(model, test_loader)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x64 and 32x64)

In [None]:
# 預測新數據並保存到同一個 CSV
def predict_and_save_combined(model, selected_features, files, output_csv):
    results = []
    for file_path, wine_type in files:
        data = pd.read_csv(file_path, delimiter=',')
        if selected_features:
            data = data[selected_features]
        X_new = scaler.transform(data.values)
        X_new_tensor = torch.tensor(X_new, dtype=torch.float32).to(device)

        with torch.no_grad():
            outputs = model(X_new_tensor)
            _, predicted = torch.max(outputs, 1)

        results.extend([
            {'ID': f"{wine_type}_{i+1}", 'quality': int(pred.cpu().numpy())}
            for i, pred in enumerate(predicted)
        ])

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [None]:
# 預測紅酒與白酒品質，合併輸出至單一 CSV
predict_and_save_combined(model,
  selected_features,
 [("winequality-red_goal.csv", "red"), ("winequality-white_goal.csv", "white")],
                          "/content/baseline_winequality_predictions.csv")