# SwAv - Unsupervised Learning of Visual Features by Contrasting Cluster Assignments

## 0. Current Colab's Setup

In [1]:
import torch
torch.__version__
torch.cuda.is_available()

True

In [2]:
!python --version

Python 3.10.14


In [3]:
!nvidia-smi

Wed Aug 14 11:09:19 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1060 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   62C    P5             12W /   70W |     418MiB /   6144MiB |     28%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


## 1. Initial Setup



### 1.1. Clonning the repository

In [5]:
#%cd /content
#!git clone https://github.com/facebookresearch/swav.git

# THE REPO IS ALREADY CLONNED

### 1.2. Mounting Google Drives Files

In [6]:
#from google.colab import drive
#drive.mount('/content/drive')

# WE ARE NOT IN COLAB

### 1.3. Set environments variables

In [7]:
%env EXPERIMENT_PATH=/home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/experiments/swav_400ep_bs256_pretrain
%env DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset
%env TRAIN_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/train
%env VAL_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/val
%env TEST_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/test

env: EXPERIMENT_PATH=/home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/experiments/swav_400ep_bs256_pretrain
env: DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset
env: TRAIN_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/train
env: VAL_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/val
env: TEST_DATASET_PATH=/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/test


### 1.4. Check if Dataset and Environment paths exist

In [8]:
import os

# For EXPERIMENT_PATH
dir_path = os.getenv('EXPERIMENT_PATH')
if os.path.exists(dir_path):
    print(f"The directory '{dir_path}' exists.")
else:
    os.makedirs(dir_path, exist_ok=True)
    print(f"Directory '{dir_path}' created successfully.")

# For DATASET_PATH
dataset_path = os.getenv('DATASET_PATH') 
# Check if the directory exists
if os.path.exists(dataset_path):
    print(f"The directory '{dataset_path}' exists.")
else:
    print(f"The directory '{dataset_path}' does not exist.")

The directory '/home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/experiments/swav_400ep_bs256_pretrain' exists.
The directory '/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset' exists.


### 1.5. Copy our modificated codes to SwAV repository

In [9]:
#!cp /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/main_swav_no_apex.py /content/swav/
#!cp -f /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/utils.py /content/swav/src/
#!cp /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/eval_linear_no_distribution.py /content/swav/

# NOT NECESSARY IN LOCAL PC

## 2. Training and Evaluating SwAV
The training is divided into 2 main steps:


*   Self supervised training of SwAV model: In order to cluster the features of the images. But not predicting the classification as paper, plastic, etc.
*   Supervised Linear Classification training on top of SwaV: After getting the cluster of the features is necessary to classify them into the label that we want as paper, plastic, etc.

This process is common on this type of classification task in selfsupervised models.



### 2.1. Self supervised training of SwAV model

In [10]:
    %cd /home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/
    !python main_swav_no_apex.py \
    --data_path $TRAIN_DATASET_PATH \
    --epochs 15 \
    --base_lr 0.6 \
    --final_lr 0.0006 \
    --warmup_epochs 0 \
    --batch_size 16 \
    --workers 1 \
    --size_crops 224 96 \
    --nmb_crops 2 6 \
    --min_scale_crops 0.14 0.05 \
    --max_scale_crops 1. 0.14 \
    --use_fp16 true \
    --freeze_prototypes_niters 5005 \
    --queue_length 3840 \
    --epoch_queue_starts 15 \
    --crops_for_assign 0 1 \
    --temperature 0.1 \
    --epsilon 0.05 \
    --sinkhorn_iterations 3 \
    --feat_dim 128 \
    --nmb_prototypes 3000 \
    --wd 0.000001 \
    --arch resnet50 \
    --dump_path $EXPERIMENT_PATH

/home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu
INFO - 08/14/24 11:10:05 - 0:00:00 - arch: resnet50
                                     base_lr: 0.6
                                     batch_size: 16
                                     checkpoint_freq: 25
                                     crops_for_assign: [0, 1]
                                     data_path: /home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/SimplerDataset/train
                                     dump_checkpoints: /home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/experiments/swav_400ep_bs256_pretrain/checkpoints
                                     dump_path: /home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/experiments/swav_400ep_bs256_pretrain
                                     epoch_queue_starts: 15
                                     epochs: 15
                                     epsilon: 0.05
                                     feat_dim: 128
                                     

In [13]:
%env LOG=log
%load_ext tensorboard
%tensorboard --logdir $EXPERIMENT_PATH/$LOG

env: LOG=log
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 9636), started 0:01:46 ago. (Use '!kill 9636' to kill it.)

Copy final modifications into our Google Drive files

In [None]:
#!cp -f /content/swav/main_swav_no_apex.py /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/
#!cp -f /content/swav/src/utils.py /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/

### 2.2. Supervised learning of Linear Classification on top of SwAV model

In [None]:
import os
checkpoint = "swav_400ep_bs256_pretrain.pth.tar"
os.environ['CHECKPOINT'] = checkpoint

%cd %cd /home/jetshu/Documents/PLAEX/code/SwAV/swav_single_gpu/
!python eval_linear_no_distribution.py \
--data_path $DATASET_PATH \
--pretrained $EXPERIMENT_PATH/$CHECKPOINT

/content/swav
INFO - 08/12/24 07:24:05 - 0:00:00 - arch: resnet50
                                     batch_size: 32
                                     data_path: /content/drive/MyDrive/PLAEX/Dataset
                                     decay_epochs: [60, 80]
                                     dump_checkpoints: ./checkpoints
                                     dump_path: .
                                     epochs: 100
                                     final_lr: 0
                                     gamma: 0.1
                                     global_pooling: True
                                     lr: 0.3
                                     nesterov: False
                                     pretrained: /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/experiments/swav_400ep_bs256_pretrain/swav_400ep_bs256_pretrain.pth.tar
                                     scheduler_type: cosine
                                     seed: 31
                                 

Copy final modifications into our Google Drive files

In [None]:
#!cp -f /content/swav/eval_linear_no_distribution.py /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/
#!cp -f /content/swav/src/utils.py /content/drive/MyDrive/PLAEX/Selfsupervised_models/swav/code/

# **Given time limitation (<1h) in COLAB we are going to try to train locally**
- Graph cards: GTX 1060 Ti
- RAM: 16GB