In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import importlib
import sys

# Determine the environment and import preprocessing module accordingly
def is_kaggle():
    return 'KAGGLE_KERNEL_RUN_TYPE' in os.environ

if is_kaggle():
    print("Running on Kaggle")
    # Assuming 'preprocessing.py' and other scripts are in '/kaggle/input'
    kaggle_input_path = '/kaggle/usr/lib'
    sys.path.append(kaggle_input_path)
    
    import utils_py.utils_py as utils
   
    
    # Install missing libraries on kaggle
    ! pip install mlflow
else:
    print("Running locally")
    import scripts.utils as utils
    
    
importlib.reload(utils)

# Other imports
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.optim as optim
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm
import mlflow
import mlflow.pytorch
import torch
import random
import models

Running on Kaggle
Collecting mlflow
  Downloading mlflow-2.13.2-py3-none-any.whl.metadata (29 kB)
Collecting cachetools<6,>=5.0.0 (from mlflow)
  Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting querystring-parser<2 (from mlflow)
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl.metadata (559 bytes)
Collecting gunicorn<23 (from mlflow)
  Downloading gunicorn-22.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.3-py3-none-any.whl.metadata (10 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)
Collecting aniso8601<10,>=8 (from graphene<4->mlflow)
  Downloading aniso8601-9.0.1-py2.py3-none-any.whl.metadata (23 kB)
Downloading mlflow-2.13.2-py3-none-any.whl (25.0 MB)
[2K   [90

In [None]:
model = models.StormModel(num_classes=9, dropout=0.1)
#model.load_state_dict(torch.load("/kaggle/input/colorization_edition/pytorch/18epochs/1/SSL_n17_ep.pth"))
model.to(device)
#torchsummary.summary(model, (3, 224, 224))

CUDA is available!  Training on GPU ...
cuda:0


In [4]:
class JigsawPuzzleDataset(Dataset):
    def __init__(self, image_files, transform=None):
        self.image_files = image_files
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        image = Image.open(image_path).convert('RGB')
        
        # Divide the image into 9 pieces
        pieces = self.divide_image(image)
        
        # swap two random pieces and get the label
        pieces, label = self.swap_pieces(pieces)
        
        # concatenate pieces back together
        jigsaw_image = self.concat_pieces(pieces)
        
        if self.transform:
            jigsaw_image = self.transform(jigsaw_image)
        
        return jigsaw_image, torch.tensor(label, dtype=torch.float)

    def divide_image(self, image):
        w, h = image.size
        piece_w, piece_h = w // 3, h // 3
        pieces = []

        for i in range(3):
            for j in range(3):
                left = j * piece_w
                top = i * piece_h
                right = left + piece_w
                bottom = top + piece_h
                pieces.append(image.crop((left, top, right, bottom)))

        return pieces

    def swap_pieces(self, pieces):
        idx1, idx2 = random.sample(range(9), 2)
        pieces[idx1], pieces[idx2] = pieces[idx2], pieces[idx1]
        
        # Binary map indicating swapped pieces
        label = [0] * 9
        label[idx1] = 1
        label[idx2] = 1
        
        return pieces, label

    def concat_pieces(self, pieces):
        piece_w, piece_h = pieces[0].size
        jigsaw_image = Image.new('RGB', (piece_w * 3, piece_h * 3))

        for i in range(3):
            for j in range(3):
                piece = pieces[i * 3 + j]
                jigsaw_image.paste(piece, (j * piece_w, i * piece_h))

        return jigsaw_image

# Utility function to split the dataset
def create_train_test_loaders(image_folder, transform, test_size=0.2, batch_size=4, random_state=42):
    # List all image files
    
    ## version to work on the second dataset
    
    # image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('jpg', 'png'))]
    
    
    ## version to work on the main dataset
    
    image_files = []
    for root, _, files in os.walk(image_folder):
        for file in files:
            if os.path.isfile(os.path.join(root, file)):
                image_files.append(os.path.join(root, file))
    
    ## end of the special version
    
    # Split the image files into train and test sets
    train_files, test_files = train_test_split(image_files, test_size=test_size, random_state=random_state)
    
    # Create datasets
    train_dataset = JigsawPuzzleDataset(train_files, transform=transform)
    test_dataset = JigsawPuzzleDataset(test_files, transform=transform)
    
    # Create dataloaders
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = 4)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers = 4)
    
    return train_dataloader, test_dataloader


In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    # no normalization for the plot
    transforms.ToTensor()
])

#image_folder = '/kaggle/input/foodsslimages/images'
image_folder = '/kaggle/input/food-dataset-sl/data/train'
train_dataloader, test_dataloader = create_train_test_loaders(image_folder, transform, test_size=0.2, batch_size=32)

In [6]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [7]:
num_epochs = 20

mlflow.end_run()

# Start a new MLflow run
mlflow.start_run(run_name="jigsaw version SSL")

# Logl parameters
mlflow.log_param("num_epochs", num_epochs)
mlflow.log_param("learning_rate", optimizer.param_groups[0]['lr'])
mlflow.log_param("batch_size", train_dataloader.batch_size)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    train_loader_tqdm = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")

    for inputs, labels in train_loader_tqdm:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Update running loss
        train_loader_tqdm.set_postfix(loss=loss.item())
        running_loss += loss.item()
    epoch_loss = running_loss / len(train_dataloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

    # opt step
    scheduler.step()

    # Log training loss
    mlflow.log_metric("train_loss", epoch_loss, step=epoch)

    # Validation loop
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    exact_matches = 0  # Counter for exact matches

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            val_loss += loss.item()

            # Calculate predictions
            preds = 1*(torch.sigmoid(outputs) > 0.5)

            # Check for exact matches
            exact_matches += (preds == labels).all(dim=1).sum().item()

    val_loss /= len(test_dataloader)
    exact_match_ratio = exact_matches / len(test_dataloader.dataset)
    print(f'Validation Loss: {val_loss:.4f}, Exact Match Ratio: {exact_match_ratio:.5f}')

    # Log validation loss and exact match ratio
    mlflow.log_metric("val_loss", val_loss, step=epoch)
    mlflow.log_metric("exact_match_ratio", exact_match_ratio, step=epoch)

# Log the model
mlflow.pytorch.log_model(model, "jigsaw_model_same_data")

# End the MLflow run
mlflow.end_run()
print('Finished Training')

Epoch 1/20: 100%|██████████| 2372/2372 [03:29<00:00, 11.33batch/s, loss=0.0252]

Epoch [1/20], Loss: 0.2806





Validation Loss: 0.0392, Exact Match Ratio: 0.91758


Epoch 2/20: 100%|██████████| 2372/2372 [02:07<00:00, 18.56batch/s, loss=0.027]

Epoch [2/20], Loss: 0.0269





Validation Loss: 0.0263, Exact Match Ratio: 0.94266


Epoch 3/20: 100%|██████████| 2372/2372 [02:10<00:00, 18.17batch/s, loss=0.0191]

Epoch [3/20], Loss: 0.0181





Validation Loss: 0.0217, Exact Match Ratio: 0.95700


Epoch 4/20: 100%|██████████| 2372/2372 [02:09<00:00, 18.35batch/s, loss=0.0293]

Epoch [4/20], Loss: 0.0129





Validation Loss: 0.0149, Exact Match Ratio: 0.96796


Epoch 5/20: 100%|██████████| 2372/2372 [02:08<00:00, 18.39batch/s, loss=0.0104]

Epoch [5/20], Loss: 0.0110





Validation Loss: 0.0083, Exact Match Ratio: 0.98145


Epoch 6/20: 100%|██████████| 2372/2372 [02:09<00:00, 18.30batch/s, loss=0.00221]

Epoch [6/20], Loss: 0.0067





Validation Loss: 0.0066, Exact Match Ratio: 0.98535


Epoch 7/20: 100%|██████████| 2372/2372 [02:09<00:00, 18.30batch/s, loss=3.77e-5]

Epoch [7/20], Loss: 0.0064





Validation Loss: 0.0062, Exact Match Ratio: 0.98630


Epoch 8/20: 100%|██████████| 2372/2372 [02:11<00:00, 18.08batch/s, loss=6.21e-5]

Epoch [8/20], Loss: 0.0057





Validation Loss: 0.0047, Exact Match Ratio: 0.98946


Epoch 9/20: 100%|██████████| 2372/2372 [02:11<00:00, 18.08batch/s, loss=0.00089]

Epoch [9/20], Loss: 0.0059





Validation Loss: 0.0046, Exact Match Ratio: 0.99030


Epoch 10/20: 100%|██████████| 2372/2372 [02:10<00:00, 18.12batch/s, loss=0.000172]

Epoch [10/20], Loss: 0.0054





Validation Loss: 0.0044, Exact Match Ratio: 0.99051


Epoch 11/20: 100%|██████████| 2372/2372 [02:10<00:00, 18.12batch/s, loss=3.04e-5]

Epoch [11/20], Loss: 0.0041





Validation Loss: 0.0035, Exact Match Ratio: 0.99236


Epoch 12/20: 100%|██████████| 2372/2372 [02:13<00:00, 17.74batch/s, loss=3.37e-5]

Epoch [12/20], Loss: 0.0039





Validation Loss: 0.0029, Exact Match Ratio: 0.99352


Epoch 13/20: 100%|██████████| 2372/2372 [02:11<00:00, 18.02batch/s, loss=3.15e-5]

Epoch [13/20], Loss: 0.0037





Validation Loss: 0.0033, Exact Match Ratio: 0.99310


Epoch 14/20: 100%|██████████| 2372/2372 [02:12<00:00, 17.88batch/s, loss=0.00221]

Epoch [14/20], Loss: 0.0034





Validation Loss: 0.0031, Exact Match Ratio: 0.99352


Epoch 15/20: 100%|██████████| 2372/2372 [02:13<00:00, 17.83batch/s, loss=0.0124]

Epoch [15/20], Loss: 0.0034





Validation Loss: 0.0031, Exact Match Ratio: 0.99352


Epoch 16/20: 100%|██████████| 2372/2372 [02:12<00:00, 17.87batch/s, loss=1.85e-5]

Epoch [16/20], Loss: 0.0029





Validation Loss: 0.0025, Exact Match Ratio: 0.99426


Epoch 17/20: 100%|██████████| 2372/2372 [02:12<00:00, 17.94batch/s, loss=1.78e-5]

Epoch [17/20], Loss: 0.0027





Validation Loss: 0.0026, Exact Match Ratio: 0.99410


Epoch 18/20: 100%|██████████| 2372/2372 [02:19<00:00, 17.00batch/s, loss=0.000123]

Epoch [18/20], Loss: 0.0025





Validation Loss: 0.0025, Exact Match Ratio: 0.99431


Epoch 19/20: 100%|██████████| 2372/2372 [02:10<00:00, 18.16batch/s, loss=0.0016]

Epoch [19/20], Loss: 0.0023





Validation Loss: 0.0021, Exact Match Ratio: 0.99552


Epoch 20/20: 100%|██████████| 2372/2372 [02:12<00:00, 17.94batch/s, loss=0.000294]

Epoch [20/20], Loss: 0.0025





Validation Loss: 0.0023, Exact Match Ratio: 0.99468
Finished Training


In [8]:

# save model to disk
torch.save(model.state_dict(), 'jigsaw_model_same_data.pth')