In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
# CSVファイルの読み込み
data_path = './csv/koniq10k_merged.csv'
data = pd.read_csv(data_path)

# 特徴量とターゲットの分離
features = data[['arniqa', 'clipiqa', 'cnniqa', 'metaiqa']]
target = data['overall_score']

# スケーリング（MinMaxScalerを使用）
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)

# データ分割（80% トレーニング、20% 検証）
X_train, X_val, y_train, y_val = train_test_split(scaled_features, target, test_size=0.2, random_state=42)

# データをPyTorchのテンソルに変換
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_val = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1)

In [15]:
# print(features.describe())  # 特徴量の統計を確認

print(target.describe())    # ターゲットの統計を確認
# print(data.isnull().sum())  # NaNの有無を確認


count    10073.000000
mean         0.587296
std          0.154324
min          0.039118
25%          0.492478
50%          0.623544
75%          0.707125
max          0.883889
Name: overall_score, dtype: float64


In [None]:

# MLPモデルの定義
class MLPModel(nn.Module):
    def __init__(self):
        super(ResMLPModel, self).__init__()
        # 各層の定義
        self.fc1 = nn.Linear(4, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 1)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

        # 残差接続用のプロジェクション（次元が異なる場合の線形変換）
        self.proj_fc1 = nn.Linear(4, 64)  # fc1の入力サイズ(4)とfc2の出力サイズ(64)を一致
        self.proj_fc2 = nn.Linear(64, 32)  # fc2の出力サイズ(64)とfc3の出力サイズ(32)を一致

    def forward(self, x):
        # 第一層: fc1 + ReLU
        residual = x  # 残差用
        x = self.relu(self.fc1(x))
        residual = self.proj_fc1(residual)  # 次元変換
        x = x + residual  # 残差接続

        # 第二層: fc2 + ReLU
        residual = x  # 残差用
        x = self.relu(self.fc2(x))
        residual = self.proj_fc2(residual)  # 次元変換
        x = x + residual  # 残差接続

        # 第三層: fc3 + ReLU
        residual = x  # 残差用（次元一致している場合は直接加算）
        x = self.relu(self.fc3(x))
        x = x + residual  # 残差接続

        # 第四層: fc4 + ReLU
        x = self.relu(self.fc4(x))  # この層では残差接続は省略（次元が小さくなるため）

        # 最終層: fc5 + Sigmoid
        x = self.fc5(x)
        x = self.sigmoid(x)  # 

        return x


# モデルの初期化
model = MLPModel()

# 損失関数とオプティマイザ
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# トレーニングループ
epochs = 50  # エポック数
batch_size = 128  # バッチサイズ

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
val_dataset = torch.utils.data.TensorDataset(X_val, y_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

# モデルの評価
def evaluate_model(model, loader):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
    return total_loss / len(loader)

final_val_loss = evaluate_model(model, val_loader)
print(f"Final Validation Loss (MSE): {final_val_loss:.4f}")

Epoch 1/50, Train Loss: nan, Validation Loss: nan
Epoch 2/50, Train Loss: nan, Validation Loss: nan
Epoch 3/50, Train Loss: nan, Validation Loss: nan
Epoch 4/50, Train Loss: nan, Validation Loss: nan
Epoch 5/50, Train Loss: nan, Validation Loss: nan
Epoch 6/50, Train Loss: nan, Validation Loss: nan
Epoch 7/50, Train Loss: nan, Validation Loss: nan
Epoch 8/50, Train Loss: nan, Validation Loss: nan
Epoch 9/50, Train Loss: nan, Validation Loss: nan
Epoch 10/50, Train Loss: nan, Validation Loss: nan
Epoch 11/50, Train Loss: nan, Validation Loss: nan
Epoch 12/50, Train Loss: nan, Validation Loss: nan
Epoch 13/50, Train Loss: nan, Validation Loss: nan
Epoch 14/50, Train Loss: nan, Validation Loss: nan
Epoch 15/50, Train Loss: nan, Validation Loss: nan
Epoch 16/50, Train Loss: nan, Validation Loss: nan
Epoch 17/50, Train Loss: nan, Validation Loss: nan
Epoch 18/50, Train Loss: nan, Validation Loss: nan
Epoch 19/50, Train Loss: nan, Validation Loss: nan
Epoch 20/50, Train Loss: nan, Validation