# Klasyfikator klastrów

## IMPORTS

In [1]:
!pip install comet-ml



In [1]:
import comet_ml

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import einops
import numpy as np

import matplotlib
from matplotlib import pyplot as plt

In [2]:
# import os

# from google.colab import userdata
# key = userdata.get('COMET_API_KEY')
# os.environ['COMET_API_KEY'] = key

In [2]:
def setup_device():
    if torch.cuda.is_available():
        device = torch.device('cuda')
        # Set default tensor type for cuda
        torch.set_default_dtype(torch.float32)
    elif torch.backends.mps.is_available():
        device = torch.device('mps')
    else:
        device = torch.device('cpu')
        # Ensure we're using float32 on CPU
        torch.set_default_dtype(torch.float64)
    return device

device = setup_device()

print(f"Using {device} device")

Using cuda device


## Dataset

In [5]:
!pip install datasets



In [3]:
from datasets import load_dataset, DatasetDict, load_from_disk


In [6]:
!ls

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [4]:
ds = load_from_disk("clustered_dataset").with_format("torch")

In [5]:
ds

DatasetDict({
    train: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'cluster'],
        num_rows: 82600
    })
    test: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'cluster'],
        num_rows: 10325
    })
    valid: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'cluster'],
        num_rows: 10325
    })
})

In [7]:
percent10 = True

if percent10:
  # Extract 10% of the train set
  ten_percent_train = ds["train"].select(range(int(len(ds["train"]) * 0.1)))
  # Extract 10% of the test set
  ten_percent_test = ds["test"].select(range(int(len(ds["test"]) * 0.1)))
  # Extract 10% of the validation set
  ten_percent_valid = ds["valid"].select(range(int(len(ds["valid"]) * 0.1)))

  # Combine the subsets into a new DatasetDict
  ten_percent_dataset = DatasetDict({
      "train": ten_percent_train,
      "test": ten_percent_test,
      "valid": ten_percent_valid
  })

  ds = ten_percent_dataset

In [7]:
  # change if not colab
  num_workers = 12
  pin_memory = True
  batch_size = 64
  train_loader = DataLoader(
      ds["train"],
      batch_size=batch_size,
      num_workers=num_workers,
      shuffle=True,
      pin_memory=True,
  )

  test_loader = DataLoader(
      ds["test"],
      batch_size=batch_size,
      num_workers=num_workers,
      shuffle=False,
      pin_memory=True,
  )

  val_loader = DataLoader(
      ds["test"],
      batch_size=batch_size,
      num_workers=num_workers,
      shuffle=False,
      pin_memory=True,
  )

## ENCODER

In [8]:
PATH = "models/inpating/mse_perceptual/model1.pth"
# model = VGG16Autoencoder()
new_model = torch.load(PATH)
new_model.eval()
new_model.to(device)

  new_model = torch.load(PATH)


VGG16Autoencoder(
  (encoder): VGG16EncoderWithSkipConnections(
    (block1): Sequential(
      (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (block2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (block3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): Conv2d(256, 256

In [9]:
encoder = new_model.encoder
encoder.eval()  # Set the encoder to evaluation mode
encoder.to(device)  # Move the encoder to the appropriate device

VGG16EncoderWithSkipConnections(
  (block1): Sequential(
    (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inpl

In [10]:
# Define the MLP model
import torch
import torch.nn as nn
import torch.nn.functional as F

class AdaptedStyleClusterCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),  # Flatten the latent output [batch_size, 128, 4, 4] -> [batch_size, 2048]
            
            # Fully Connected Layers
            nn.Linear(in_features=2048, out_features=128),  # Adjusted in_features
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=num_classes)
        )

    def forward(self, x):
        logits = self.model(x)
        return logits

In [11]:
# Set up model hyperparameters
input_size = 2048  
num_classes = 20  

# Create the model
model = AdaptedStyleClusterCNN(num_classes)
model.to(device)

AdaptedStyleClusterCNN(
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=2048, out_features=128, bias=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=20, bias=True)
  )
)

In [12]:
# pure PyTorch loop
num_epochs = 20
loss_func = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [13]:
from torchinfo import summary
from tqdm import tqdm 
from create_mask import generate_scaled_blob

# SET UP COMET ML
comet_experiment = comet_ml.Experiment(
    api_key="LP4wJZSrJYL1KJZ06ahrmPLUb",
    project_name="UczenieNienadzorowane")
comet_experiment.log_code(folder="/UN")
comet_experiment.log_parameters(
    {
        "batch_size": train_loader.batch_size,
        "train_size": ds["train"].num_rows,
        "val_size": ds["valid"].num_rows,
    }
)
input_size = (batch_size, 128, 4, 4)
summ = summary(model, input_size, device=device, depth=5)
comet_experiment.set_model_graph(f"{model.__repr__()}\n{summ}")

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/kamilciepluch/uczenienienadzorowane/c9f309b3b302484a866f2a780f11c8ab

[1;38;5;196mCOMET ERROR:[0m We failed to read directory '/UN' for uploading.
Please double-check the file path, permissions, and that it is a directory.


In [14]:
# train and validate
save_model_path = "models/cluster_clasification/model1_0"
for epoch in range(num_epochs):
        comet_experiment.set_epoch(epoch)

  


        model.train()
        with comet_experiment.train() as train:
            for idx, batch in tqdm(enumerate(train_loader), desc=f"TRAIN_{epoch}"):
                comet_experiment.set_step(idx + epoch * len(train_loader))

                optimizer.zero_grad()  # MUST be called on every batch
                
                images = batch["image"] / 255.0
                labels = batch["cluster"]

                # Generate and apply mask
                masks = generate_scaled_blob(images.shape, mask_percentage=(1 / 16) * 100).float() / 255.0
                images_with_mask = images * (1 - masks.unsqueeze(1))
                images_with_mask = torch.cat((images_with_mask, masks.unsqueeze(1)), dim=1)

                images = images_with_mask.to(device)
                
                # One-hot encode labels
                labels = F.one_hot(labels, num_classes=20).float().to(device)  # Shape: [batch_size, num_classes]

                latents, _ = encoder(images)
                outputs = model(latents)  # Shape: [batch_size, num_classes=20]

                loss = loss_func(outputs, labels)  # BCEWithLogitsLoss expects one-hot encoded labels
                loss.backward()
                optimizer.step()
                comet_experiment.log_metric("loss", loss.item())
        
        if epoch % 5 == 0:
            save_model_path = f"models/cluster_clasification/model1_{epoch}"
            
        torch.save(model.state_dict(), save_model_path)
        
        
        model.eval()
        with comet_experiment.validate() as validat, torch.no_grad() as nograd:
            for idx, batch in tqdm(enumerate(val_loader), desc=f"VAL_{epoch}"):
                comet_experiment.set_step(idx + epoch * len(val_loader))

                images = batch["image"] / 255.0
                labels = batch["cluster"]

                # Generate and apply mask
                masks = generate_scaled_blob(images.shape, mask_percentage=(1 / 16) * 100).float() / 255.0
                images_with_mask = images * (1 - masks.unsqueeze(1))
                images_with_mask = torch.cat((images_with_mask, masks.unsqueeze(1)), dim=1)

                images = images_with_mask.to(device)
                
                # One-hot encode labels
                labels = F.one_hot(labels, num_classes=20).float().to(device)  # Shape: [batch_size, num_classes]

                latents, _ = encoder(images)
                outputs = model(latents)  # Shape: [batch_size, num_classes=20]

                loss = loss_func(outputs, labels)  # BCEWithLogitsLoss expects one-hot encoded labels
                comet_experiment.log_metric("loss", loss.item())
                

spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
  res = _impl.splprep(x, w, u, ub, ue, k, task, s, t, full_output, nest, per,
TRAIN_0: 1291it [08:20,  2.58it/s]
VAL_0: 162it [00:34,  4.72it/s]
TRAIN_1: 1291it [08:25,  2.55it/s]
VAL_1: 162it [00:38,  4.16it/s]
TRAIN_2: 1291it [08:26,  2.55it/s]
VAL_2: 162it [00:35,  4.63it/s]
TRAIN_3: 1291it [08:16,  2.60it/s]
VAL_3: 162it [00:33,  4.84it/s]
TRAIN_4: 1291it [08:16,  2.60it/s]
VAL_4: 162it [00:33,  4.86it/s]
TRAIN_5: 1291it [08:16,  2.60it/s]
VAL_5: 162it [00:33,  4.84it/s]
TRAIN_6: 1291it [08:16,  2.60it/s]
VAL_6: 162it [00:33,  4.81it/s]
TRAIN_7: 1291it [08:23,  2.56it/s]
VAL_7: 162it [00:38,  4.15it/s]
TRAIN_8: 1291it [08:26,  2.55it/s]
VAL_8: 162it [00:35,  4.60it/s]
TRAIN_9: 1291it [08:17,  2.59it/s]
VAL_9: 162it [00:33,  4.79it/s]
TRAIN_10: 1291it [08:16,  2.60it/s]
VAL_10: 162it [00:33,  4.83it/s]
TRAIN_11: 1291it [08:17,  2.60it/s]
VAL_11: 162it [00:33,  4.82it/s]
TRAIN_12: 1291it [08:17,  2.60

In [15]:
model.eval()
with comet_experiment.test() as test, torch.no_grad():
    for idx, batch in tqdm(enumerate(test_loader), desc=f"TEST_{num_epochs}"):
        comet_experiment.set_step(idx + num_epochs * len(test_loader))

        images = batch["image"] / 255.0
        labels = batch["cluster"]

        # Generate and apply mask
        masks = generate_scaled_blob(images.shape, mask_percentage=(1 / 16) * 100).float() / 255.0
        images_with_mask = images * (1 - masks.unsqueeze(1))
        images_with_mask = torch.cat((images_with_mask, masks.unsqueeze(1)), dim=1)

        images = images_with_mask.to(device)
        
        # One-hot encode labels
        labels = F.one_hot(labels, num_classes=20).float().to(device)  # Shape: [batch_size, num_classes]

        latents, _ = encoder(images)
        outputs = model(latents)  # Shape: [batch_size, num_classes=20]

        loss = loss_func(outputs, labels)  # BCEWithLogitsLoss expects one-hot encoded labels
        
        comet_experiment.log_metric("loss", loss.item())

TEST_20: 162it [00:34,  4.74it/s]


In [16]:
torch.save(model.state_dict(), f"models/cluster_clasification/model1_{num_epochs}")

In [17]:
comet_experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : residential_hotel_7799
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/kamilciepluch/uczenienienadzorowane/c9f309b3b302484a866f2a780f11c8ab
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     test_loss [162]      : (0.14885829389095306, 0.17599892616271973)
[1;38;5;39mCOMET INFO:[0m     train_loss [28403]   : (0.14596815407276154, 0.6936233639717102)
[1;38;5;39mCOMET INFO:[0m     validate_loss [3240] : (0.14941684901714325, 0.1939256489276886)
[1;38;5;39mCOMET INFO:[0m   Parameters:
