Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dockerfile for self-supervised examples #1322

Merged
merged 25 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/self_supervised/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Exclude everything:
*
# Include useful:
!barlow_twins.py
!byol.py
!common.py
!datasets.py
!simCLR.py
!supervised_contrastive.py
6 changes: 6 additions & 0 deletions examples/self_supervised/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM python:3.8

RUN pip install catalyst[cv]==21.09
RUN pip install catalyst[ml]==21.09
Nimrais marked this conversation as resolved.
Show resolved Hide resolved

COPY . .
25 changes: 25 additions & 0 deletions examples/self_supervised/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Self-Supervised Learning Examples
## Description

All traing files have common command line parametrs:

--feature_dim - Feature dim for latent vector
--temperature - Temperature used in softmax
--batch_size - Number of images in each mini-batch
--epochs - Number of sweeps over the dataset to train
--num_workers - Number of workers to process a dataloader
--logdir - Logs directory (tensorboard, weights, etc)
--dataset - Dataset: CIFAR-10, CIFAR-100 or STL10
--learning_rate - Learning rate for optimizer

### Extra parametrs

Barlow-twins (barlow_twins.py) has an extra parametr ``--offdig_lambda`` - lambda that controls the on- and off-diagonal terms from Barlow twins loss.

## Usage

```
docker build . -t train-self-supervised
docker run train-self-supervised python3 simCLR.py --batch_size 32
```

66 changes: 11 additions & 55 deletions examples/self_supervised/barlow_twins.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,15 @@
# flake8: noqa
import argparse

from common import add_arguments, datasets
from common import add_arguments, get_contrastive_model, get_loaders
from sklearn.linear_model import LogisticRegression

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms

from catalyst import dl
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.contrib.nn import BarlowTwinsLoss
from catalyst.data import SelfSupervisedDatasetWrapper


class Model(nn.Module):
def __init__(self, feature_dim=128, **resnet_kwargs):
super(Model, self).__init__()
# encoder
self.encoder = nn.Sequential(ResnetEncoder(**resnet_kwargs), nn.Flatten())
# projection head
self.g = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Linear(512, feature_dim, bias=True),
)

def forward(self, x):
feature = self.encoder(x)
out = self.g(feature)
return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)


parser = argparse.ArgumentParser(description="Train Barlow Twins on cifar-10")
parser = argparse.ArgumentParser(description="Train Barlow Twins")
add_arguments(parser)
parser.add_argument(
"--offdig_lambda",
Expand All @@ -50,7 +23,6 @@ def forward(self, x):
# args parse
args = parser.parse_args()

# hyperparams
feature_dim, temperature = args.feature_dim, args.temperature
offdig_lambda = args.offdig_lambda
batch_size, epochs, num_workers = (
Expand All @@ -59,31 +31,19 @@ def forward(self, x):
args.num_workers,
)
dataset = args.dataset
# data

transforms = datasets[dataset]["train_transform"]
transform_original = datasets[dataset]["valid_transform"]
# model and optimizer

train_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
test_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=False, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
model = get_contrastive_model(args.feature_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_loader = DataLoader(test_data, batch_size=batch_size, pin_memory=True)
# criterion

criterion = BarlowTwinsLoss(offdiag_lambda=offdig_lambda)

callbacks = [
dl.ControlFlowCallback(
dl.CriterionCallback(
input_key="projection_left", target_key="projection_right", metric_key="loss"
),
loaders="train",
dl.CriterionCallback(
input_key="projection_left", target_key="projection_right", metric_key="loss"
),
dl.SklearnModelCallback(
feature_key="embedding_origin",
Expand All @@ -103,18 +63,14 @@ def forward(self, x):
),
]

model = Model(feature_dim, arch="resnet50")
criterion = BarlowTwinsLoss(offdiag_lambda=offdig_lambda)
optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)

runner = dl.SelfSupervisedRunner()

runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
callbacks=callbacks,
loaders={"train": train_loader, "valid": valid_loader},
loaders=get_loaders(arargs.dataset, args.batch_size, args.num_workersgs),
verbose=True,
num_epochs=epochs,
valid_loader="train",
Expand Down
62 changes: 12 additions & 50 deletions examples/self_supervised/byol.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
# flake8: noqa
import argparse

from common import add_arguments, ContrastiveModel, datasets
from common import add_arguments, get_contrastive_model, get_loaders

import torch
from torch.optim import Adam

from catalyst import dl
from catalyst.contrib import nn
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.contrib.nn.criterion import NTXentLoss
from catalyst.data.dataset.self_supervised import SelfSupervisedDatasetWrapper
from catalyst.dl import SelfSupervisedRunner

parser = argparse.ArgumentParser(description="Train SimCLR on cifar-10")
parser = argparse.ArgumentParser(description="Train BYOL")
add_arguments(parser)

parser.add_argument("--aug-strength", default=1.0, type=float, help="Strength of augmentations")


def set_requires_grad(model, val):
for p in model.parameters():
Expand All @@ -27,60 +22,31 @@ def set_requires_grad(model, val):
if __name__ == "__main__":
args = parser.parse_args()
batch_size = args.batch_size
aug_strength = args.aug_strength

transforms = datasets[args.dataset]["train_transform"]
transform_original = datasets[args.dataset]["valid_transform"]

train_data = SelfSupervisedDatasetWrapper(
datasets[args.dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batch_size, num_workers=args.num_workers
)

encoder_online = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head_online = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, args.feature_dim, bias=True),
)
encoder_target = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head_target = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, args.feature_dim, bias=True),
)
# 2. model and optimizer

model = nn.ModuleDict(
{
"online": ContrastiveModel(projection_head_online, encoder_online),
"target": ContrastiveModel(projection_head_target, encoder_target),
"online": get_contrastive_model(args.feature_dim),
"target": get_contrastive_model(args.feature_dim),
}
)

set_requires_grad(model["target"], False)

# 2. model and optimizer
optimizer = Adam(model["online"].parameters(), lr=args.learning_rate)

# 3. criterion with triplets sampling
# 3. criterion
criterion = NTXentLoss(tau=args.temperature)

callbacks = [
dl.ControlFlowCallback(
dl.CriterionCallback(
input_key="online_projection_left",
target_key="target_projection_right",
metric_key="loss",
),
loaders="train",
dl.CriterionCallback(
input_key="online_projection_left",
target_key="target_projection_right",
metric_key="loss",
),
dl.ControlFlowCallback(
dl.SoftUpdateCallaback(
target_model_key="target", source_model_key="online", tau=0.1, scope="on_batch_ned"
target_model_key="target", source_model_key="online", tau=0.1, scope="on_batch_end"
),
loaders="train",
),
Expand All @@ -93,15 +59,11 @@ def set_requires_grad(model, val):
criterion=criterion,
optimizer=optimizer,
callbacks=callbacks,
loaders={
"train": train_loader,
# "valid": valid_loader
},
loaders=get_loaders(args.dataset, args.batch_size, args.num_workers),
verbose=True,
logdir=args.logdir,
valid_loader="train",
valid_metric="loss",
minimize_valid_metric=True,
num_epochs=args.epochs,
engine=dl.DeviceEngine("cpu"),
)
63 changes: 62 additions & 1 deletion examples/self_supervised/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from typing import Dict, Optional

from datasets import datasets

import torch
from torch.utils.data import DataLoader

from catalyst.contrib import nn
from catalyst.contrib.models.cv.encoders import ResnetEncoder
from catalyst.data.dataset.self_supervised import SelfSupervisedDatasetWrapper


def add_arguments(parser) -> None:
Expand All @@ -11,7 +18,8 @@ def add_arguments(parser) -> None:
epochs: Number of sweeps over the dataset to train
num_workers: Number of workers to process a dataloader
logdir: Logs directory (tensorboard, weights, etc)
dataset: Dataset: CIFAR-10, CIFAR-100 or STL10
dataset: CIFAR-10, CIFAR-100 or STL10
learning-rate: Learning rate for optimizer

Args:
parser: argparser like object
Expand Down Expand Up @@ -74,3 +82,56 @@ def forward(self, x):
emb = self.encoder(x)
projection = self.model(emb)
return emb, projection


def get_loaders(
dataset: str, batch_size: int, num_workers: Optional[int]
) -> Dict[str, DataLoader]:
"""Init loaders based on parsed parametrs.

Args:
dataset: dataset for the experiment
batch_size: batch size for loaders
num_workers: number of workers to process loaders

Returns:
{"train":..., "valid":...}
"""
transforms = datasets[dataset]["train_transform"]
transform_original = datasets[dataset]["valid_transform"]

train_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=True, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)
valid_data = SelfSupervisedDatasetWrapper(
datasets[dataset]["dataset"](root="data", train=False, transform=None, download=True),
transforms=transforms,
transform_original=transform_original,
)

train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)

valid_loader = DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers)

return {"train": train_loader, "valid": valid_loader}


def get_contrastive_model(feature_dim: int) -> ContrastiveModel:
"""Init contrastive model based on parsed parametrs.

Args:
feature_dim: dimensinality of contrative projection

Returns:
ContrstiveModel instance
"""
encoder = nn.Sequential(ResnetEncoder(arch="resnet50", frozen=False), nn.Flatten())
projection_head = nn.Sequential(
nn.Linear(2048, 512, bias=False),
nn.ReLU(inplace=True),
nn.Linear(512, feature_dim, bias=True),
)
model = ContrastiveModel(projection_head, encoder)
return model
Loading