Skip to content

Commit

Permalink
Dockerfile for self-supervised examples (#1322)
Browse files Browse the repository at this point in the history
* feat: Dockerfile for self-supervised examples

* added .dockerignore

* docs: Update README and learning_rate

* docs: update README

* docs: Update README

* Update README.md

* Update README.md

* added structure

* docs: added header in README

* fix: put get_loaders in common

* feat: get_loaders in common

* refactor: barlow twins

* refactor: byol example

* feat: added get_contrastive_model

* refactor: simCLR

* refactor: supervised contrastive

* style: 🎨

* style: 🎨

* docs: added docs to common functions

* fix: added ml dependency

* docs: update README

* fix: use unpacked args for get_contrastive_model

* fix: us unpacked paramets for get_loaders

* style: 🎨
  • Loading branch information
Nimrais committed Oct 8, 2021
1 parent 027e007 commit d81db52
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 176 deletions.
9 changes: 9 additions & 0 deletions examples/self_supervised/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Exclude everything:
*
# Include useful:
!barlow_twins.py
!byol.py
!common.py
!datasets.py
!simCLR.py
!supervised_contrastive.py
6 changes: 6 additions & 0 deletions examples/self_supervised/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM python:3.8

RUN pip install catalyst[cv]==21.09
RUN pip install catalyst[ml]==21.09

COPY . .
25 changes: 25 additions & 0 deletions examples/self_supervised/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Self-Supervised Learning Examples
## Description

All traing files have common command line parametrs:

--feature_dim - Feature dim for latent vector
--temperature - Temperature used in softmax
--batch_size - Number of images in each mini-batch
--epochs - Number of sweeps over the dataset to train
--num_workers - Number of workers to process a dataloader
--logdir - Logs directory (tensorboard, weights, etc)
--dataset - Dataset: CIFAR-10, CIFAR-100 or STL10
--learning_rate - Learning rate for optimizer

### Extra parametrs

Barlow-twins (barlow_twins.py) has an extra parametr ``--offdig_lambda`` - lambda that controls the on- and off-diagonal terms from Barlow twins loss.

## Usage

```
docker build . -t train-self-supervised
docker run train-self-supervised python3 simCLR.py --batch_size 32
```

66 changes: 11 additions & 55 deletions examples/self_supervised/barlow_twins.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,15 @@
# flake8: noqa
import argparse

from common import add_arguments, datasets
from common import add_arguments, get_contrastive_model, get_loaders
from sklearn.linear_model import LogisticRegression

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms

from catalyst import dl
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.contrib.nn import BarlowTwinsLoss
from catalyst.data import SelfSupervisedDatasetWrapper


class Model(nn.Module):
def __init__(self, feature_dim=128, **resnet_kwargs):
super(Model, self).__init__()
# encoder
self.encoder = nn.Sequential(ResnetEncoder(**resnet_kwargs), nn.Flatten())
# projection head
self.g = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Linear(512, feature_dim, bias=True),
)

def forward(self, x):
feature = self.encoder(x)
out = self.g(feature)
return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)


parser = argparse.ArgumentParser(description="Train Barlow Twins on cifar-10")
parser = argparse.ArgumentParser(description="Train Barlow Twins")
add_arguments(parser)
parser.add_argument(
"--offdig_lambda",
Expand All @@ -50,7 +23,6 @@ def forward(self, x):
# args parse
args = parser.parse_args()

# hyperparams
feature_dim, temperature = args.feature_dim, args.temperature
offdig_lambda = args.offdig_lambda
batch_size, epochs, num_workers = (
Expand All @@ -59,31 +31,19 @@ def forward(self, x):
args.num_workers,
)
dataset = args.dataset
# data

transforms = datasets[dataset]["train_transform"]
transform_original = datasets[dataset]["valid_transform"]
# model and optimizer

train_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
test_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=False, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
model = get_contrastive_model(args.feature_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_loader = DataLoader(test_data, batch_size=batch_size, pin_memory=True)
# criterion

criterion = BarlowTwinsLoss(offdiag_lambda=offdig_lambda)

callbacks = [
dl.ControlFlowCallback(
dl.CriterionCallback(
input_key="projection_left", target_key="projection_right", metric_key="loss"
),
loaders="train",
dl.CriterionCallback(
input_key="projection_left", target_key="projection_right", metric_key="loss"
),
dl.SklearnModelCallback(
feature_key="embedding_origin",
Expand All @@ -103,18 +63,14 @@ def forward(self, x):
),
]

model = Model(feature_dim, arch="resnet50")
criterion = BarlowTwinsLoss(offdiag_lambda=offdig_lambda)
optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)

runner = dl.SelfSupervisedRunner()

runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
callbacks=callbacks,
loaders={"train": train_loader, "valid": valid_loader},
loaders=get_loaders(arargs.dataset, args.batch_size, args.num_workersgs),
verbose=True,
num_epochs=epochs,
valid_loader="train",
Expand Down
62 changes: 12 additions & 50 deletions examples/self_supervised/byol.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
# flake8: noqa
import argparse

from common import add_arguments, ContrastiveModel, datasets
from common import add_arguments, get_contrastive_model, get_loaders

import torch
from torch.optim import Adam

from catalyst import dl
from catalyst.contrib import nn
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.contrib.nn.criterion import NTXentLoss
from catalyst.data.dataset.self_supervised import SelfSupervisedDatasetWrapper
from catalyst.dl import SelfSupervisedRunner

parser = argparse.ArgumentParser(description="Train SimCLR on cifar-10")
parser = argparse.ArgumentParser(description="Train BYOL")
add_arguments(parser)

parser.add_argument("--aug-strength", default=1.0, type=float, help="Strength of augmentations")


def set_requires_grad(model, val):
for p in model.parameters():
Expand All @@ -27,60 +22,31 @@ def set_requires_grad(model, val):
if __name__ == "__main__":
args = parser.parse_args()
batch_size = args.batch_size
aug_strength = args.aug_strength

transforms = datasets[args.dataset]["train_transform"]
transform_original = datasets[args.dataset]["valid_transform"]

train_data = SelfSupervisedDatasetWrapper(
datasets[args.dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batch_size, num_workers=args.num_workers
)

encoder_online = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head_online = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, args.feature_dim, bias=True),
)
encoder_target = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head_target = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, args.feature_dim, bias=True),
)
# 2. model and optimizer

model = nn.ModuleDict(
{
"online": ContrastiveModel(projection_head_online, encoder_online),
"target": ContrastiveModel(projection_head_target, encoder_target),
"online": get_contrastive_model(args.feature_dim),
"target": get_contrastive_model(args.feature_dim),
}
)

set_requires_grad(model["target"], False)

# 2. model and optimizer
optimizer = Adam(model["online"].parameters(), lr=args.learning_rate)

# 3. criterion with triplets sampling
# 3. criterion
criterion = NTXentLoss(tau=args.temperature)

callbacks = [
dl.ControlFlowCallback(
dl.CriterionCallback(
input_key="online_projection_left",
target_key="target_projection_right",
metric_key="loss",
),
loaders="train",
dl.CriterionCallback(
input_key="online_projection_left",
target_key="target_projection_right",
metric_key="loss",
),
dl.ControlFlowCallback(
dl.SoftUpdateCallaback(
target_model_key="target", source_model_key="online", tau=0.1, scope="on_batch_ned"
target_model_key="target", source_model_key="online", tau=0.1, scope="on_batch_end"
),
loaders="train",
),
Expand All @@ -93,15 +59,11 @@ def set_requires_grad(model, val):
criterion=criterion,
optimizer=optimizer,
callbacks=callbacks,
loaders={
"train": train_loader,
# "valid": valid_loader
},
loaders=get_loaders(args.dataset, args.batch_size, args.num_workers),
verbose=True,
logdir=args.logdir,
valid_loader="train",
valid_metric="loss",
minimize_valid_metric=True,
num_epochs=args.epochs,
engine=dl.DeviceEngine("cpu"),
)
63 changes: 62 additions & 1 deletion examples/self_supervised/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from typing import Dict, Optional

from datasets import datasets

import torch
from torch.utils.data import DataLoader

from catalyst.contrib import nn
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.data.dataset.self_supervised import SelfSupervisedDatasetWrapper


def add_arguments(parser) -> None:
Expand All @@ -11,7 +18,8 @@ def add_arguments(parser) -> None:
epochs: Number of sweeps over the dataset to train
num_workers: Number of workers to process a dataloader
logdir: Logs directory (tensorboard, weights, etc)
dataset: Dataset: CIFAR-10, CIFAR-100 or STL10
dataset: CIFAR-10, CIFAR-100 or STL10
learning-rate: Learning rate for optimizer
Args:
parser: argparser like object
Expand Down Expand Up @@ -74,3 +82,56 @@ def forward(self, x):
emb = self.encoder(x)
projection = self.model(emb)
return emb, projection


def get_loaders(
dataset: str, batch_size: int, num_workers: Optional[int]
) -> Dict[str, DataLoader]:
"""Init loaders based on parsed parametrs.
Args:
dataset: dataset for the experiment
batch_size: batch size for loaders
num_workers: number of workers to process loaders
Returns:
{"train":..., "valid":...}
"""
transforms = datasets[dataset]["train_transform"]
transform_original = datasets[dataset]["valid_transform"]

train_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
valid_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=False, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)

train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)

valid_loader = DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers)

return {"train": train_loader, "valid": valid_loader}


def get_contrastive_model(feature_dim: int) -> ContrastiveModel:
"""Init contrastive model based on parsed parametrs.
Args:
feature_dim: dimensinality of contrative projection
Returns:
ContrstiveModel instance
"""
encoder = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, feature_dim, bias=True),
)
model = ContrastiveModel(projection_head, encoder)
return model
Loading

0 comments on commit d81db52

Please sign in to comment.