<a href="https://colab.research.google.com/github/Wandering-Sci/Python-Samples/blob/main/PyTorch_NN_KFold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**PyTorch example on Iris dataset with K-Fold Cross-Validation**

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.datasets import load_iris
import torch.optim as optim
from sklearn.model_selection import KFold
from torch.utils.data import TensorDataset, DataLoader

In [3]:
# Load iris dataset
iris = load_iris()
iris_x = torch.tensor(iris.data, dtype=torch.float32)
iris_y = torch.tensor(iris.target, dtype=torch.long)

dataset = TensorDataset(iris_x, iris_y)

In [4]:
# Creating NN Model class
class nnModel(nn.Module):
  def __init__(self, input_size = 4, fc1 = 8, output_size = 3):
    super().__init__()
    self.fc1 = nn.Linear(input_size, fc1)
    self.out = nn.Linear(fc1, output_size)

  def forward(self, data):
    data = F.relu(self.fc1(data))
    data = self.out(data)
    return data

In [6]:
# Initializing KFold cross-validation
folds_count = 5
kf = KFold(n_splits = folds_count, shuffle = True, random_state = 28)

kf_result = []
for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
  # Split folds
  train_subset = torch.utils.data.Subset(dataset, train_idx)
  test_subset = torch.utils.data.Subset(dataset, test_idx)
  train_loader = DataLoader(train_subset, batch_size = 16, shuffle = True)
  test_loader = DataLoader(test_subset, batch_size = 16, shuffle = False)

  # Initialize model, loss function, optimizer
  model = nnModel()
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr = 0.001)

  # Training
  for epoch in range(10):
    model.train()
    for batch in train_loader:
      inputs, targets = batch
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, targets)
      loss.backward()
      optimizer.step()

  # Evaluation
  correct = 0
  total = 0
  model.eval()
  with torch.no_grad():
    for batch in test_loader:
      inputs, targets = batch
      outputs = model(inputs)
      _, predicted = torch.max(outputs, 1)
      correct += (predicted == targets).sum().item()
      total += targets.size(0)

  accuracy = correct / total
  kf_result.append(accuracy)
  print(f"Accuracy for fold {fold + 1}: {accuracy:.2f}")

Accuracy for fold 1: 0.33
Accuracy for fold 2: 0.30
Accuracy for fold 3: 0.63
Accuracy for fold 4: 0.70
Accuracy for fold 5: 0.77


In [9]:
# Overall performance
print(f"Mean accuracy over {folds_count} folds: {sum(kf_result) / len(kf_result):.2f}")

Mean accuracy over 5 folds: 0.55
