In [None]:
# Testing

In [2]:
!pip install --upgrade kfp opencv-python torch torchvision torchaudio numpy matplotlib pillow

Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting torch
  Downloading torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting numpy
  Downloading numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting matplotlib
  Downloading matplotlib-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting pillow
  Downloading pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Collecting filelock (from torch)
  Downloading filelock-3.16.1-py3-none-any.whl.metada

In [None]:
import kfp
import kfp.components as comp
kfp_client = kfp.Client()

In [None]:
# This pipeline is based on https://towardsdatascience.com/building-a-vision-inspection-cnn-for-an-industrial-application-138936d7a34a
from kfp import dsl
from kfp.dsl import Input, Output, Dataset, Model, Artifact
from kfp import compiler




# # Path of images (local to accelerate loading)
# path = "data/Coil_Vision/01_train_val_test"
@dsl.component(
    packages_to_install=["opencv-python", "torch", "torchvision", "torchaudio", "numpy", "matplotlib", "pillow", "wget"],
    base_image="nvcr.io/nvidia/pytorch:24.12-py3"
)
def train_model(model_artifact: Output[Model]):
    import wget
    import subprocess
    import torch
    import torch.nn as nn
    from torch.utils.data import random_split
    from torch.utils.data.sampler import WeightedRandomSampler
    from torchvision import datasets, transforms
    from PIL import Image
    from torch.utils.data import DataLoader, Dataset
    import matplotlib.pyplot as plt
    import numpy as np
    import os
    import warnings
    warnings.filterwarnings("ignore")
    minibatch_size = 20
    learning_rate = 0.01
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    wget.download("https://www.dropbox.com/scl/fi/z8kkui6ync57rudlx10dx/CNN_data.zip?rlkey=27787pjnnbaa3mss5nu4nbi7u&e=1&st=4q3p56fh&dl=1", )
    subprocess.call("unzip CNN_data.zip".split(" "))
    dataset = "data/Coil_Vision/01_train_val_test"
    def custom_loader(path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            img = img.crop((50, 60, 750, 460))  #Size: 700x400 px
            img.load()
            return img
    # Transform function for loading
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.5), (0.5))])
    
    # Create dataset out of folder structure
    dataset = datasets.ImageFolder(dataset, transform=transform, loader=custom_loader)
    def val_test(dataloader, model):
        # Get dataset size
        dataset_size = len(dataloader.dataset)
        
        # Turn off gradient calculation for validation
        with torch.no_grad():
            # Loop over dataset
            correct = 0
            wrong_preds = []
            for (images, labels) in dataloader:
                images, labels = images.to(device), labels.to(device)
                
                # Get raw values from model
                output = model(images)
                
                # Derive prediction
                y_pred = output.argmax(1)
                
                # Count correct classifications over all batches
                correct += (y_pred == labels).type(torch.float32).sum().item()
                
                # Save wrong predictions (image, pred_lbl, true_lbl)
                for i, _ in enumerate(labels):
                    if y_pred[i] != labels[i]:
                        wrong_preds.append((images[i], y_pred[i], labels[i]))
    
            # Calculate accuracy
            acc = correct / dataset_size
            
        return acc, wrong_preds
    class CNN(nn.Module):
    
        def __init__(self):
            super().__init__()
    
            # Define model layers
            self.model_layers = nn.Sequential(
    
                nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
    
                nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
    
                nn.Flatten(),
                nn.Linear(16*97*172, 120),
                nn.ReLU(),
                # Adding hidden layers
                nn.Linear(120, 120),
                nn.Linear(120, 120),
                nn.Linear(120, 120),
                            
                nn.Linear(120, 2)
            )
            
        def forward(self, x):
            out = self.model_layers(x)
            return out
    print("Preparing training data")
    train_set, val_set, test_set = random_split(dataset, [round(0.5*len(dataset)), 
                                                      round(0.3*len(dataset)), 
                                                      round(0.2*len(dataset))])
    lbls = [dataset[idx][1] for idx in train_set.indices]
    bc = np.bincount(lbls)
    p_nOK = bc.sum()/bc[0]
    p_OK = bc.sum()/bc[1]
    lst_train = [p_nOK if lbl==0 else p_OK for lbl in lbls]
    train_sampler = WeightedRandomSampler(weights=lst_train, num_samples=len(lbls))
    # Define loader with batchsize
    train_loader = DataLoader(dataset=train_set, batch_size=minibatch_size, sampler=train_sampler)
    val_loader = DataLoader(dataset=val_set, batch_size=minibatch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_set, shuffle=True)

    # Define model on cpu or gpu
    model = CNN().to(device)
    
    # Loss and optimizer
    loss = nn.CrossEntropyLoss()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    acc_train = {}
    acc_val = {}
    epochs = 100
    # Iterate over epochs
    print("Training the model")
    for epoch in range(epochs):
        n_correct=0; n_samples=0; n_true_OK=0
        for idx, (images, labels) in enumerate(train_loader):
            model.train()
            # Push data to gpu if available
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            l = loss(outputs, labels)
                  
            # Backward and optimize
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
    
            # Get prediced labels (.max returns (value,index))
            _, y_pred = torch.max(outputs.data, 1)
    
            # Count correct classifications
            n_correct += (y_pred == labels).sum().item()
            n_true_OK += (labels == 1).sum().item()
            n_samples += labels.size(0)
            
        # At end of epoch: Eval accuracy and print information
        if (epoch+1) % 2 == 0:
            model.eval()
            # Calculate accuracy
            acc_train[epoch+1] = n_correct / n_samples
            true_OK = n_true_OK / n_samples
            acc_val[epoch+1] = val_test(val_loader, model)[0]
            
            # Print info
            print (f"Epoch [{epoch+1}/{epochs}], Loss: {l.item():.4f}")
            print(f"      Training accuracy: {acc_train[epoch+1]*100:.2f}%")
            print(f"      True OK: {true_OK*100:.3f}%")
            print(f"      Validation accuracy: {acc_val[epoch+1]*100:.2f}%")
    torch.save(model.state_dict(), model_artifact.path)
@dsl.component(
     packages_to_install=["minio"],
     base_image="python:3.11"
)
def publish_artifact(model: Model):
    from minio import Minio, VersioningConfig
    client = Minio("192.168.2.109:9000",
        access_key="S3_ACCESS_KEY",
        secret_key="S3_SECRET_KEY",
        secure=False
    )
    bucket_name = "industrial-cnn-models"
        # Make the bucket if it doesn't exist.
    found = client.bucket_exists(bucket_name)
    if not found:
        client.make_bucket(bucket_name)
        client.set_bucket_versioning("industrial-cnn-models", VersioningConfig('ENABLED'))
        print("Created bucket", bucket_name)
    else:
        print("Bucket", bucket_name, "already exists")
    destination_file = "model.pth"
    # Upload the file, renaming it in the process
    client.fput_object(
        bucket_name, destination_file, model.path,
    )
    print(
        model.path, "successfully uploaded as object",
        destination_file, "to bucket", bucket_name,
    )
    

@dsl.pipeline()
def train_cnn():
    model = train_model().set_gpu_limit(1)
    publish_artifact(model=model.output)


In [7]:
compiler.Compiler().compile(train_cnn, package_path='pipeline.yaml')