In [1]:
!pip install mmcv-full

Collecting mmcv-full
  Downloading mmcv-full-1.7.1.tar.gz (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.4/605.4 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting addict (from mmcv-full)
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting yapf (from mmcv-full)
  Downloading yapf-0.40.1-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.3/250.3 kB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting importlib-metadata>=6.6.0 (from yapf->mmcv-full)
  Downloading importlib_metadata-6.8.0-py3-none-any.whl (22 kB)
Building wheels for collected packages: mmcv-full
  Building wheel for mmcv-full (setup.py) ... [?25l[?25hdone
  Created wheel for mmcv-full: filename=mmcv_full-1.7.1-cp310-cp310-linux_x86_64.whl size=30370720 sha256=b33d22e0394877860b56d54eacce0f8386abc08b5eef0e196232425e48a8429f
  Stored in directory: /root/.cache/pip/wheel

# Involution CUDA

In [2]:
import torch

# set the random seed for reproducibility
torch.manual_seed(43)

<torch._C.Generator at 0x7d00f032c2f0>

In [3]:
import torch.nn as nn
from mmcv.cnn import ConvModule


class involution(nn.Module):

    def __init__(self,
                 channels,
                 kernel_size,
                 stride):
        super(involution, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.channels = channels
        reduction_ratio = 4
        self.group_channels = 16
        self.groups = self.channels // self.group_channels
        self.conv1 = ConvModule(
            in_channels=channels,
            out_channels=channels // reduction_ratio,
            kernel_size=1,
            conv_cfg=None,
            norm_cfg=dict(type='BN'),
            act_cfg=dict(type='ReLU'))
        self.conv2 = ConvModule(
            in_channels=channels // reduction_ratio,
            out_channels=kernel_size**2 * self.groups,
            kernel_size=1,
            stride=1,
            conv_cfg=None,
            norm_cfg=None,
            act_cfg=None)
        if stride > 1:
            self.avgpool = nn.AvgPool2d(stride, stride)
        self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride)

    def forward(self, x):
        weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
        b, c, h, w = weight.shape
        weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2)
        out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w)
        out = (weight * out).sum(dim=3).view(b, self.channels, h, w)
        return out



In [4]:
from torch.autograd import Function
import torch
from torch.nn.modules.utils import _pair
import torch.nn.functional as F
import torch.nn as nn
from mmcv.cnn import ConvModule


from collections import namedtuple
import cupy
from string import Template


Stream = namedtuple('Stream', ['ptr'])


def Dtype(t):
    if isinstance(t, torch.cuda.FloatTensor):
        return 'float'
    elif isinstance(t, torch.cuda.DoubleTensor):
        return 'double'


@cupy._util.memoize(for_each_device=True)
def load_kernel(kernel_name, code, **kwargs):
    code = Template(code).substitute(**kwargs)
    kernel_code = cupy.cuda.compile_with_cache(code)
    return kernel_code.get_function(kernel_name)


CUDA_NUM_THREADS = 1024

kernel_loop = '''
#define CUDA_KERNEL_LOOP(i, n)                        \
  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
      i < (n);                                       \
      i += blockDim.x * gridDim.x)
'''


def GET_BLOCKS(N):
    return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS


_involution_kernel = kernel_loop + '''
extern "C"
__global__ void involution_forward_kernel(
const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) {
  CUDA_KERNEL_LOOP(index, ${nthreads}) {
    const int n = index / ${channels} / ${top_height} / ${top_width};
    const int c = (index / ${top_height} / ${top_width}) % ${channels};
    const int h = (index / ${top_width}) % ${top_height};
    const int w = index % ${top_width};
    const int g = c / (${channels} / ${groups});
    ${Dtype} value = 0;
    #pragma unroll
    for (int kh = 0; kh < ${kernel_h}; ++kh) {
      #pragma unroll
      for (int kw = 0; kw < ${kernel_w}; ++kw) {
        const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
        const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
        if ((h_in >= 0) && (h_in < ${bottom_height})
          && (w_in >= 0) && (w_in < ${bottom_width})) {
          const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
            * ${bottom_width} + w_in;
          const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h)
            * ${top_width} + w;
          value += weight_data[offset_weight] * bottom_data[offset];
        }
      }
    }
    top_data[index] = value;
  }
}
'''


_involution_kernel_backward_grad_input = kernel_loop + '''
extern "C"
__global__ void involution_backward_grad_input_kernel(
    const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) {
  CUDA_KERNEL_LOOP(index, ${nthreads}) {
    const int n = index / ${channels} / ${bottom_height} / ${bottom_width};
    const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels};
    const int h = (index / ${bottom_width}) % ${bottom_height};
    const int w = index % ${bottom_width};
    const int g = c / (${channels} / ${groups});
    ${Dtype} value = 0;
    #pragma unroll
    for (int kh = 0; kh < ${kernel_h}; ++kh) {
      #pragma unroll
      for (int kw = 0; kw < ${kernel_w}; ++kw) {
        const int h_out_s = h + ${pad_h} - kh * ${dilation_h};
        const int w_out_s = w + ${pad_w} - kw * ${dilation_w};
        if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) {
          const int h_out = h_out_s / ${stride_h};
          const int w_out = w_out_s / ${stride_w};
          if ((h_out >= 0) && (h_out < ${top_height})
                && (w_out >= 0) && (w_out < ${top_width})) {
            const int offset = ((n * ${channels} + c) * ${top_height} + h_out)
                  * ${top_width} + w_out;
            const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out)
                  * ${top_width} + w_out;
            value += weight_data[offset_weight] * top_diff[offset];
          }
        }
      }
    }
    bottom_diff[index] = value;
  }
}
'''


_involution_kernel_backward_grad_weight = kernel_loop + '''
extern "C"
__global__ void involution_backward_grad_weight_kernel(
    const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) {
  CUDA_KERNEL_LOOP(index, ${nthreads}) {
    const int h = (index / ${top_width}) % ${top_height};
    const int w = index % ${top_width};
    const int kh = (index / ${kernel_w} / ${top_height} / ${top_width})
          % ${kernel_h};
    const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w};
    const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
    const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
    if ((h_in >= 0) && (h_in < ${bottom_height})
          && (w_in >= 0) && (w_in < ${bottom_width})) {
      const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups};
      const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num};
      ${Dtype} value = 0;
      #pragma unroll
      for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) {
        const int top_offset = ((n * ${channels} + c) * ${top_height} + h)
              * ${top_width} + w;
        const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
              * ${bottom_width} + w_in;
        value += top_diff[top_offset] * bottom_data[bottom_offset];
      }
      buffer_data[index] = value;
    } else {
      buffer_data[index] = 0;
    }
  }
}
'''


In [5]:
import torch.nn as nn
import torch.utils.checkpoint as cp
from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
                      constant_init, kaiming_init)




In [None]:
#!pip uninstall mmcv -y
#!pip install mmcv-full==1.3.14


In [6]:
import logging
from abc import ABCMeta, abstractmethod

import torch.nn as nn
from mmcv.runner import load_checkpoint


class BaseBackbone(nn.Module, metaclass=ABCMeta):
    """Base backbone.
    This class defines the basic functions of a backbone.
    Any backbone that inherits this class should at least
    define its own `forward` function.
    """

    def __init__(self):
        super(BaseBackbone, self).__init__()

    def init_weights(self, pretrained=None):
        """Init backbone weights
        Args:
            pretrained (str | None): If pretrained is a string, then it
                initializes backbone weights by loading the pretrained
                checkpoint. If pretrained is None, then it follows default
                initializer or customized initializer in subclasses.
        """
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            # use default initializer or customized initializer in subclasses
            pass
        else:
            raise TypeError('pretrained must be a str or None.'
                            f' But received {type(pretrained)}.')

    @abstractmethod
    def forward(self, x):
        """Forward computation
        Args:
            x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of
                Torch.tensor, containing input data for forward computation.
        """
        pass

    def train(self, mode=True):
        """Set module status before forward computation
        Args:
            mode (bool): Whether it is train_mode or test_mode
        """
        super(BaseBackbone, self).train(mode)

In [7]:
import torch.nn as nn
import torch.utils.checkpoint as cp
from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
                      constant_init, kaiming_init)
from mmcv.utils.parrots_wrapper import _BatchNorm


#from ..utils.involution_cuda import involution

In [8]:
class Bottleneck(nn.Module):


    def __init__(self,
                 in_channels,
                 out_channels,
                 expansion=4,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN')):
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.expansion = expansion
        assert out_channels % expansion == 0
        self.mid_channels = out_channels // expansion
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(
            norm_cfg, self.mid_channels, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(
            norm_cfg, self.mid_channels, postfix=2)
        self.norm3_name, norm3 = build_norm_layer(
            norm_cfg, out_channels, postfix=3)

        self.conv1 = build_conv_layer(
            conv_cfg,
            in_channels,
            self.mid_channels,
            kernel_size=1,
            stride=self.conv1_stride,
            bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = involution(self.mid_channels, 7, self.conv2_stride)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(
            conv_cfg,
            self.mid_channels,
            out_channels,
            kernel_size=1,
            bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    @property
    def norm1(self):
        return getattr(self, self.norm1_name)

    @property
    def norm2(self):
        return getattr(self, self.norm2_name)

    @property
    def norm3(self):
        return getattr(self, self.norm3_name)

    def forward(self, x):

        def _inner_forward(x):
            identity = x

            out = self.conv1(x)
            out = self.norm1(out)
            out = self.relu(out)

            out = self.conv2(out)
            out = self.norm2(out)
            out = self.relu(out)

            out = self.conv3(out)
            out = self.norm3(out)

            if self.downsample is not None:
                identity = self.downsample(x)

            out += identity

            return out

        if self.with_cp and x.requires_grad:
            out = cp.checkpoint(_inner_forward, x)
        else:
            out = _inner_forward(x)

        out = self.relu(out)

        return out


def get_expansion(block, expansion=None):
    """.
    """
    if isinstance(expansion, int):
        assert expansion > 0
    elif expansion is None:
        if hasattr(block, 'expansion'):
            expansion = block.expansion
        elif issubclass(block, Bottleneck):
            expansion = 4
        else:
            raise TypeError(f'expansion is not specified for {block.__name__}')
    else:
        raise TypeError('expansion must be an integer or None')

    return expansion


class ResLayer(nn.Sequential):


    def __init__(self,
                 block,
                 num_blocks,
                 in_channels,
                 out_channels,
                 expansion=None,
                 stride=1,
                 avg_down=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 **kwargs):
        self.block = block
        self.expansion = get_expansion(block, expansion)

        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = []
            conv_stride = stride
            if avg_down and stride != 1:
                conv_stride = 1
                downsample.append(
                    nn.AvgPool2d(
                        kernel_size=stride,
                        stride=stride,
                        ceil_mode=True,
                        count_include_pad=False))
            downsample.extend([
                build_conv_layer(
                    conv_cfg,
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=conv_stride,
                    bias=False),
                build_norm_layer(norm_cfg, out_channels)[1]
            ])
            downsample = nn.Sequential(*downsample)

        layers = []
        layers.append(
            block(
                in_channels=in_channels,
                out_channels=out_channels,
                expansion=self.expansion,
                stride=stride,
                downsample=downsample,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                **kwargs))
        in_channels = out_channels
        for i in range(1, num_blocks):
            layers.append(
                block(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    expansion=self.expansion,
                    stride=1,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    **kwargs))
        super(ResLayer, self).__init__(*layers)



class RedNet(BaseBackbone):
    """
    """

    arch_settings = {
        26: (Bottleneck, (1, 2, 4, 1)),
        38: (Bottleneck, (2, 3, 5, 2)),
        50: (Bottleneck, (3, 4, 6, 3)),
        101: (Bottleneck, (3, 4, 23, 3)),
        152: (Bottleneck, (3, 8, 36, 3))
    }

    def __init__(self,
                 depth,
                 in_channels=3,
                 stem_channels=64,
                 base_channels=64,
                 expansion=None,
                 num_stages=4,
                 strides=(1, 2, 2, 2),
                 dilations=(1, 1, 1, 1),
                 out_indices=(3, ),
                 style='pytorch',
                 avg_down=False,
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', requires_grad=True),
                 norm_eval=False,
                 with_cp=False,
                 zero_init_residual=True):
        super(RedNet, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for resnet')
        self.depth = depth
        self.stem_channels = stem_channels
        self.base_channels = base_channels
        self.num_stages = num_stages
        assert num_stages >= 1 and num_stages <= 4
        self.strides = strides
        self.dilations = dilations
        assert len(strides) == len(dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.style = style
        self.avg_down = avg_down
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.with_cp = with_cp
        self.norm_eval = norm_eval
        self.zero_init_residual = zero_init_residual
        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.expansion = get_expansion(self.block, expansion)

        self._make_stem_layer(in_channels, stem_channels)

        self.res_layers = []
        _in_channels = stem_channels
        _out_channels = base_channels * self.expansion
        for i, num_blocks in enumerate(self.stage_blocks):
            stride = strides[i]
            dilation = dilations[i]
            res_layer = self.make_res_layer(
                block=self.block,
                num_blocks=num_blocks,
                in_channels=_in_channels,
                out_channels=_out_channels,
                expansion=self.expansion,
                stride=stride,
                dilation=dilation,
                style=self.style,
                avg_down=self.avg_down,
                with_cp=with_cp,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg)
            _in_channels = _out_channels
            _out_channels *= 2
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self._freeze_stages()

        self.feat_dim = res_layer[-1].out_channels

    def make_res_layer(self, **kwargs):
        return ResLayer(**kwargs)

    @property
    def norm1(self):
        return getattr(self, self.norm1_name)

    def _make_stem_layer(self, in_channels, stem_channels):
        self.stem = nn.Sequential(
            ConvModule(
                in_channels,
                stem_channels // 2,
                kernel_size=3,
                stride=2,
                padding=1,
                conv_cfg=self.conv_cfg,
                norm_cfg=self.norm_cfg,
                inplace=True),
            involution(stem_channels // 2, 3, 1),
            nn.BatchNorm2d(stem_channels // 2),
            nn.ReLU(inplace=True),
            ConvModule(
                stem_channels // 2,
                stem_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                conv_cfg=self.conv_cfg,
                norm_cfg=self.norm_cfg,
                inplace=True))
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            self.stem.eval()
            for param in self.stem.parameters():
                param.requires_grad = False

        for i in range(1, self.frozen_stages + 1):
            m = getattr(self, f'layer{i}')
            m.eval()
            for param in m.parameters():
                param.requires_grad = False

    def init_weights(self, pretrained=None):
        super(RedNet, self).init_weights(pretrained)
        if pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
                    constant_init(m, 1)

            if self.zero_init_residual:
                for m in self.modules():
                    if isinstance(m, Bottleneck):
                        constant_init(m.norm3, 0)

    def forward(self, x):
        x = self.stem(x)
        x = self.maxpool(x)
        outs = []
        for i, layer_name in enumerate(self.res_layers):
            res_layer = getattr(self, layer_name)
            x = res_layer(x)
            if i in self.out_indices:
                outs.append(x)
        if len(outs) == 1:
            return outs[0]
        else:
            return tuple(outs)

    def train(self, mode=True):
        super(RedNet, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

In [9]:
# model settings
model = dict(
    type='ImageClassifier',
    backbone=dict(
        type='RedNet',
        depth=26,
        num_stages=4,
        out_indices=(3, ),
        style='pytorch'),
    neck=dict(type='GlobalAveragePooling'),
    head=dict(
        type='LinearClsHead',
        num_classes=10,
        in_channels=2048,
        loss=dict(
            type='LabelSmoothLoss',
            loss_weight=1.0,
            label_smooth_val=0.1,
            num_classes=10),
        topk=(1, 5),
    ))

In [10]:
import torch
from torchsummary import summary

# create an instance of your model
model = RedNet(depth=26)

# print the summary of your model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
summary(model, (3,32,32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 16, 16]             864
       BatchNorm2d-2           [-1, 32, 16, 16]              64
              ReLU-3           [-1, 32, 16, 16]               0
        ConvModule-4           [-1, 32, 16, 16]               0
            Conv2d-5            [-1, 8, 16, 16]             256
       BatchNorm2d-6            [-1, 8, 16, 16]              16
              ReLU-7            [-1, 8, 16, 16]               0
        ConvModule-8            [-1, 8, 16, 16]               0
            Conv2d-9           [-1, 18, 16, 16]             162
       ConvModule-10           [-1, 18, 16, 16]               0
           Unfold-11             [-1, 288, 256]               0
       involution-12           [-1, 32, 16, 16]               0
      BatchNorm2d-13           [-1, 32, 16, 16]              64
             ReLU-14           [-1, 32,

In [11]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transforms)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 13099201.79it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [12]:
import torch.utils.data as data

trainloader = data.DataLoader(trainset, batch_size=128, shuffle=False, num_workers=2)
testloader = data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)


In [13]:
import torch.optim as optim

In [14]:
import torch.nn as nn

class RedNetClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(RedNetClassifier, self).__init__()
        self.backbone = RedNet(depth=26, num_stages=4, out_indices=(3,), style='pytorch')
        self.head = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(2048, num_classes)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x

# Create an instance of the classifier
model = RedNetClassifier(num_classes=10)

# Pass the model's parameters to the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


In [None]:
# Define the optimizer and learning rate scheduler
#optimizer = optim.SGD(model.parameters(), lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
#lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=130, eta_min=0)

In [15]:
model.to(device)
data = data.to(device)
new_data = torch.zeros_like(data, device=device, dtype=torch.float32)
new_data.copy_(data)
output = model(new_data)



AttributeError: ignored

In [16]:
# Define the loss function
criterion = nn.CrossEntropyLoss()

In [17]:
import torch.optim as optim
import numpy as np

num_epochs = 200
lr_schedule = [80, 120, 160]

def schedule(epoch_idx):
    if (epoch_idx + 1) < lr_schedule[0]:
        return 0.1
    elif (epoch_idx + 1) > lr_schedule[0] and (epoch_idx + 1) < lr_schedule[1]:
    # Code to execute when epoch_idx is between lr_schedule[0] and lr_schedule[1]
        return 0.01 # lr_decay_ratio = 0.2
    elif (epoch_idx + 1)>=lr_schedule[1] and (epoch_idx + 1) < lr_schedule[2]:
        return 0.001
    return 0.0008
# Define your optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9,nesterov=True)

# Define your learning rate scheduler
lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: schedule(epoch))




In [18]:
weight_decay = 0.0001

In [19]:
# training loop
# create a summary writer for TensorBoard
import time
start_time = time.time()

num_epochs = 1
rednet_targets=[]
for epoch in range(num_epochs):
    model.train()
    train_correct = 0
    train_total = 0
    for batch_idx, (data, target) in enumerate(trainloader):
        data = data.to(device)  # move the data to the GPU
        target = target.to(device)  # move the data to the GPU
        rednet_targets.append(target)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # compute training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += target.size(0)
        train_correct += (predicted == target).sum().item()

    train_acc = 100 * train_correct / train_total
    lr_scheduler.step()
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, target in testloader:
            data = data.to(device)  # move the data to the GPU
            target = target.to(device)  # move the data to the GPU
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
        acc = 100 * correct / total
    rednet_targets = torch.cat(rednet_targets, dim=0)
    print(rednet_targets.shape)
    # Save the targets to a .pt file
    torch.save(rednet_targets, 'rednet_targets.pt')
    print('Epoch: {} - Loss: {:.4f} - Train accuracy: {:.2f}% - Test accuracy: {:.2f}%'.format(epoch+1, loss.item(), train_acc, acc))
     #lr_scheduler.step()

end_time = time.time()

total_time = end_time - start_time

print(f'Total training time: {total_time:.2f} seconds')


torch.Size([50000])
Epoch: 1 - Loss: 1.6150 - Train accuracy: 27.88% - Test accuracy: 41.67%
Total training time: 18.32 seconds


In [20]:
RednetTest = torch.save(model.state_dict(), 'Rednettest.T7')

In [21]:
rednet_targets = torch.load('/content/rednet_targets.pt')
print(rednet_targets)
print(rednet_targets.shape)

tensor([6, 9, 9,  ..., 9, 1, 1], device='cuda:0')
torch.Size([50000])


In [22]:
rednet_targets = torch.load('/content/drive/MyDrive/RednetExp29july/concatenated_subset1.pt')
print(rednet_targets)
print(rednet_targets.shape)

tensor([[  0.4628, -18.5866,  -2.4622,  ...,   8.7815,  -5.7277, -15.7938],
        [  0.4969, -11.9467,   4.1564,  ...,   4.3835,   7.6666,   7.2141],
        [  6.2863, -12.6247,   3.4577,  ...,   1.8666,   5.4522,  -0.9015],
        ...,
        [ 10.1681,   1.9288,  -0.1346,  ...,  -7.9062,   9.7158,   0.6753],
        [  1.9233, -16.4646,   2.9002,  ...,   1.5539,  -3.6887,  -7.6992],
        [  9.5798, -10.6653,  -8.2000,  ...,   4.9272,   8.4362,   8.0167]],
       device='cuda:0', requires_grad=True)
torch.Size([49998, 10])


In [45]:
import torch

# Load the tensor from the specified file
rednet_targets = torch.load('/content/drive/MyDrive/RednetExp29july/concatenated_subset11.pt')

# Print the loaded tensor and its shape
#print(rednet_targets)
#print(rednet_targets.shape)

# Apply the argmax function along a specified dimension (e.g., dim=1)
rednet_targets= torch.argmax(rednet_targets, dim=1)
print(rednet_targets.shape)
print(rednet_targets)


torch.Size([49998])
tensor([6, 9, 8,  ..., 8, 6, 9], device='cuda:0')


In [30]:
import torch

# Load the tensor from the specified file
Deit_targets = torch.load('/content/saved_targets123.pt')
print(Deit_targets)
print(Deit_targets .shape)

tensor([6, 9, 9,  ..., 9, 1, 1], device='cuda:0')
torch.Size([50000])


In [31]:
Deit_targets  = Deit_targets [:49998]

print(Deit_targets )
print(Deit_targets .shape)

tensor([6, 9, 9,  ..., 2, 6, 9], device='cuda:0')
torch.Size([49998])


In [40]:
rednet_targets11 = torch.load('/content/rednet_targets.pt')
print(rednet_targets11)
print(rednet_targets11)
rednet_targets11  = rednet_targets11  [:49998]

print(rednet_targets11 )
print(rednet_targets11 .shape)

tensor([6, 9, 9,  ..., 9, 1, 1], device='cuda:0')
tensor([6, 9, 9,  ..., 9, 1, 1], device='cuda:0')
tensor([6, 9, 9,  ..., 2, 6, 9], device='cuda:0')
torch.Size([49998])


In [41]:
print(torch.eq(rednet_targets11 , rednet_targets).sum())

tensor(18923, device='cuda:0')


In [None]:

# define the file path to save the model

file_path = '/content/drive/MyDrive/Colab/model4.pt'

# save the entire model
torch.save(model.state_dict, file_path)

# save the entire model
torch.save(model, file_path)


# **Generating Training logits from pretrained models**

In [57]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as data
from torch.utils.data import Subset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
import torch.utils.data as data
from torch.utils.data.dataloader import default_collate

#dataset Transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


train_transforms = transforms.Compose([
    #transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),

])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
trainloader = data.DataLoader(trainset, batch_size=128, shuffle=False, num_workers=2)

#dividing Dataset into five subsets

num_samples = len(trainset) // 6
print(num_samples)

subset_datasets = []
subset_loaders = []

for i in range(6):
    # Calculate the starting and ending indices for the current subset
    start_idx = i * num_samples
    end_idx = (i + 1) * num_samples

    # Create a subset of the training dataset using the current indices
    subset_indices = torch.arange(start_idx, end_idx)
    subset_dataset = Subset(trainset, subset_indices)
    subset_datasets.append(subset_dataset)

    # Create a DataLoader for the current subset
    subset_loader = DataLoader(subset_dataset, batch_size=128, shuffle=False)
    subset_loaders.append(subset_loader)

# Example usage: access the subsets and their respective loaders

subset1 = subset_datasets[0]
subset_loader1 = subset_loaders[0]
#print(subset1)


subset2 = subset_datasets[1]
subset_loader2 = subset_loaders[1]
#print(subset2)

subset3 = subset_datasets[2]
subset_loader3 = subset_loaders[2]
#print(subset3)

subset4 = subset_datasets[3]
subset_loader4 = subset_loaders[3]
#print(subset4)


subset5 = subset_datasets[4]
subset_loader5 = subset_loaders[4]

subset6 = subset_datasets[5]
subset_loader6 = subset_loaders[5]

#print(subset5)

#trainloader1 = DataLoader(subset1, batch_size=64, shuffle=False,
                    #collate_fn=lambda x: tuple(x_.to(device) for x_ in default_collate(x)))
trainloader1 = DataLoader(subset1, batch_size=128, shuffle=False,
                    collate_fn=lambda x: tuple(x_.to(device) for x_ in default_collate(x)))
#trainloader3 = DataLoader(subset3, batch_size=128, shuffle=False,
                    #collate_fn=lambda x: tuple(x_.to(device) for x_ in default_collate(x)))
#trainloader4 = DataLoader(subset4, batch_size=128, shuffle=False,
                    #collate_fn=lambda x: tuple(x_.to(device) for x_ in default_collate(x)))
#trainloader5 = DataLoader(subset5, batch_size=128, shuffle=False,
                    #collate_fn=lambda x: tuple(x_.to(device) for x_ in default_collate(x)))

Files already downloaded and verified
8333


In [60]:
import torch

# Clear GPU memory
torch.cuda.empty_cache()

In [25]:
# Load the model on CPU
model1 = RedNetClassifier(num_classes=10)
model1.load_state_dict(torch.load('/content/Rednettest.T7', map_location='cpu'))

# Move the model to GPU
model1.to('cuda')
import torch

# Check available GPU memory
print(f"Currently allocated GPU memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"Max allocated GPU memory: {torch.cuda.max_memory_allocated() / 1024**2:.2f} MB")

Currently allocated GPU memory: 129.53 MB
Max allocated GPU memory: 725.53 MB


In [68]:
 #Assuming you have defined your model and loaded the trained weights
#model1 = RedNetClassifier(num_classes=10)

#model1.load_state_dict(torch.load('/content/drive/MyDrive/RednetExp29july/Rednet0.T7'))

# Assuming you're using GPU, move the model to GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#print(device)
#model1.to(device)

# Set the model to evaluation mode
model1.eval()

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Variables to track metrics
total_loss = 0.0
correct_predictions = 0
total_predictions = 0
soft_outputs=[]

rednet_target1=[]
# Iterate over the test dataset
for images, target in subset_loader6:
    images = images.to(device)
    target = target.to(device)
    #print(images.is_cuda)
    # Forward pass
    outputs = model1(images)
    soft_outputs.append(outputs)
    rednet_target1.append(target)

    # Calculate loss
    loss = criterion(outputs, target)
    total_loss += loss.item()

    # Get predicted labels
    _, predicted = torch.max(outputs.data, 1)
    total_predictions += target.size(0)
    correct_predictions += (predicted == target).sum().item()

# Calculate average loss and accuracy

print(type(soft_outputs))
rednet_target1 = torch.cat(rednet_target1, dim=0)
print(rednet_target1.shape)
    # Save the targets to a .pt file
torch.save(rednet_target1, 'rednet_targetloader5.pt')
average_loss = total_loss / len(trainloader1)
accuracy = correct_predictions / total_predictions

# Save softmax outputs to a file
if len(soft_outputs) > 0:
    soft_outputs = torch.cat(soft_outputs, dim=0)
    torch.save(soft_outputs, 'rednetsubsetloader5.pt')
else:
    print("No softmax outputs to save.")


print("Train Loss: {:.4f}".format(average_loss))
print("Train Accuracy: {:.2%}".format(accuracy))

<class 'list'>
torch.Size([8333])
Train Loss: 176.4078
Train Accuracy: 17.29%


In [80]:
import torch

# Load the tensor from the specified file
rednet_target1 = torch.load('/content/rednet_targetloader.pt')
print(rednet_target1.shape)
rednet_target2 = torch.load('/content/rednet_targetloader1.pt')
print(rednet_target2.shape)
rednet_target3 = torch.load('/content/rednet_targetloader2.pt')
print(rednet_target3.shape)
rednet_target4 = torch.load('/content/rednet_targetloader3.pt')
print(rednet_target4.shape)
rednet_target5 = torch.load('/content/rednet_targetloader4.pt')
print(rednet_target5.shape)
rednet_target6 = torch.load('/content/rednet_targetloader5.pt')
print(rednet_target6.shape)

torch.Size([8333])
torch.Size([8333])
torch.Size([8333])
torch.Size([8333])
torch.Size([8333])
torch.Size([8333])


In [85]:


# Stack the tensors along the first dimension
concatenated_targets = torch.cat((rednet_target1, rednet_target2, rednet_target3, rednet_target4, rednet_target5, rednet_target6), dim=0)

print(concatenated_targets.shape)  # This will show the shape of the concatenated tensor
print(concatenated_targets)


torch.Size([49998])
tensor([6, 9, 9,  ..., 2, 6, 9], device='cuda:0')


In [76]:
# Apply argmax along dimension 1
concatenated_targets = concatenated_targets.argmax(dim=1)
print(concatenated_targets.shape)

torch.Size([49998])


In [84]:
rednet_targets = torch.load('/content/rednet_targets.pt')
print(rednet_targets.shape)
import torch


# Retain indices up to 49998
rednet_targets = rednet_targets[:49998]

print(rednet_targets.shape)  # This will show the shape of the retained indices tensor
print(rednet_targets)


torch.Size([50000])
torch.Size([49998])
tensor([6, 9, 9,  ..., 2, 6, 9], device='cuda:0')


In [78]:
print(torch.eq(rednet_targets , concatenated_targets).sum())

tensor(8830, device='cuda:0')


In [79]:
matches = torch.eq(concatenated_targets, rednet_targets)
total_matches = matches.sum()

print(total_matches)

tensor(8830, device='cuda:0')


In [88]:
rednet_targets = torch.load('/content/saved_targets123.pt')
print(rednet_targets.shape)
print(rednet_targets)
import torch

torch.Size([50000])
tensor([6, 9, 9,  ..., 9, 1, 1], device='cuda:0')


In [89]:
# Retain indices up to 49998
rednet_targets = rednet_targets[:49998]

print(rednet_targets.shape)  # This will show the shape of the retained indices tensor
print(rednet_targets)

torch.Size([49998])
tensor([6, 9, 9,  ..., 2, 6, 9], device='cuda:0')
