In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!pip install timm

model

In [None]:
import torch.nn as nn
import torch

class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
                 bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,
                              stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes, eps=1e-5,
                                 momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

utils progressive

In [None]:
import numpy as np
import torchvision
from torch.autograd import Variable
from torchvision import transforms
#from model import *

def cosine_anneal_schedule(t, nb_epoch, lr):
    cos_inner = np.pi * (t % (nb_epoch))  # t - 1 is used when t has 1-based indexing.
    cos_inner /= (nb_epoch)
    cos_out = np.cos(cos_inner) + 1

    return float(lr / 2 * cos_out)



def model_info(model):  # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
    for i, (name, p) in enumerate(model.named_parameters()):
        name = name.replace('module_list.', '')
        print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))


def test(net, criterion, batch_size):
    net.eval()
    use_cuda = True
    test_loss = 0
    correct = 0
    correct_com = 0
    total = 0
    idx = 0
    device = torch.device("cuda")

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    testset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/test1',
                                               transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=4)

    for batch_idx, (inputs, targets) in enumerate(testloader):
        idx = batch_idx
        if use_cuda:
            inputs, targets = inputs.to(device), targets.to(device)

        inputs, targets = Variable(inputs ), Variable(targets)
        output_1, output_2, output_3, output_concat= net(inputs)
        outputs_com = output_1 + output_2 + output_3 + output_concat

        loss = criterion(output_concat, targets)

        test_loss += loss.item()
        _, predicted = torch.max(output_concat.data, 1)
        _, predicted_com = torch.max(outputs_com.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        correct_com += predicted_com.eq(targets.data).cpu().sum()

        if batch_idx % 50 == 0:
            print('Step: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) |Combined Acc: %.3f%% (%d/%d)' % (
            batch_idx, test_loss / (batch_idx + 1), 100. * float(correct) / total, correct, total, 100. * float(correct_com) / total, correct_com, total))

    test_acc = 100. * float(correct) / total
    test_acc_en = 100. * float(correct_com) / total
    test_loss = test_loss / (idx + 1)

    return test_acc, test_acc_en, test_loss

Train Teacher


In [None]:
from __future__ import print_function
import torch.optim as optim
import torch.backends.cudnn as cudnn
#from pytorchimagemodels
import timm
import os
import random
import imgaug.augmenters as iaa
class Features(nn.Module):
    def __init__(self, net_layers):
        super(Features, self).__init__()
        self.net_layer_0 = nn.Sequential(net_layers[0])
        self.net_layer_1 = nn.Sequential(net_layers[1])
        self.net_layer_2 = nn.Sequential(net_layers[2])
        self.net_layer_3 = nn.Sequential(net_layers[3])
        self.net_layer_4 = nn.Sequential(*net_layers[4])
        self.net_layer_5 = nn.Sequential(*net_layers[5])
        self.net_layer_6 = nn.Sequential(*net_layers[6])
        self.net_layer_7 = nn.Sequential(*net_layers[7])
    def forward(self, x):
        x = self.net_layer_0(x)
        x = self.net_layer_1(x)
        x = self.net_layer_2(x)
        x = self.net_layer_3(x)
        x = self.net_layer_4(x)
        x1 = self.net_layer_5(x)
        x2 = self.net_layer_6(x1)
        x3 = self.net_layer_7(x2)
        return x1, x2, x3
def img_progressive(x, limit, p=0.5):
    if random.random()<p:
        aug = iaa.MultiplyBrightness((1-limit, 1+limit))

        x = x.permute(0, 2, 3, 1)
        x = x.cpu().numpy()
        x = (x*255).astype(np.uint8)
        x = aug(images=x)
        x = torch.from_numpy(x.astype(np.float32)).clone()
        x = x/255
        x = x.permute(0, 3, 1, 2)
    return x
class Network_Wrapper(nn.Module):
    def __init__(self, net_layers):
        super().__init__()
        self.Features = Features(net_layers)
        self.max_pool1 = nn.MaxPool2d(kernel_size=28, stride=1)
        self.max_pool2 = nn.MaxPool2d(kernel_size=14, stride=1)
        self.max_pool3 = nn.MaxPool2d(kernel_size=7, stride=1)
        self.conv_block1 = nn.Sequential(
            BasicConv(512, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier1 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10)
        )
        self.conv_block2 = nn.Sequential(
            BasicConv(1024, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier2 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.conv_block3 = nn.Sequential(
            BasicConv(2048, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier3 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.classifier_concat = nn.Sequential(
            nn.BatchNorm1d(1024 * 3),
            nn.Linear(1024 * 3, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x1, x2, x3 = self.Features(x)
        x1_ = self.conv_block1(x1)
        x1_ = self.max_pool1(x1_)
        x1_f = x1_.view(x1_.size(0), -1)
        x1_c = self.classifier1(x1_f)
        x2_ = self.conv_block2(x2)
        x2_ = self.max_pool2(x2_)
        x2_f = x2_.view(x2_.size(0), -1)
        x2_c = self.classifier2(x2_f)
        x3_ = self.conv_block3(x3)
        x3_ = self.max_pool3(x3_)
        x3_f = x3_.view(x3_.size(0), -1)
        x3_c = self.classifier3(x3_f)

        x_c_all = torch.cat((x1_f, x2_f, x3_f), -1)
        x_c_all = self.classifier_concat(x_c_all)

        return x1_c, x2_c, x3_c, x_c_all


def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None):

    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = True
    print(use_cuda)

    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/train',
                                                transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
    print("List of models")
    for i in timm.list_models(pretrained=True):
        print(i)
    model_name = "skresnext50_32x4d"
    #model_name = "resnet18"
    print(model_name)
    #model_info(net)
    net = timm.create_model(model_name, pretrained=True, num_classes=10)
    model_info(net)
    net_layers = list(net.children())
    net_layers = net_layers[0:8]
    print
    net = Network_Wrapper(net_layers)
    model_info(net)
    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net.parameters()])))


    netp = torch.nn.DataParallel(net)

    device = torch.device("cuda")
    net.to(device)
    cudnn.benchmark = True

    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.SGD([
        {'params': net.classifier_concat.parameters(), 'lr': 0.002},
        {'params': net.conv_block1.parameters(), 'lr': 0.002},
        {'params': net.classifier1.parameters(), 'lr': 0.002},
        {'params': net.conv_block2.parameters(), 'lr': 0.002},
        {'params': net.classifier2.parameters(), 'lr': 0.002},
        {'params': net.conv_block3.parameters(), 'lr': 0.002},
        {'params': net.classifier3.parameters(), 'lr': 0.002},
        {'params': net.Features.parameters(), 'lr': 0.0002}
    ],
        momentum=0.9, weight_decay=5e-4)


    max_val_acc = 0
    lr = [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.0002, 0.0002, 0.0002, 0.0002]
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        train_loss1 = 0
        train_loss2 = 0
        train_loss3 = 0
        train_loss4 = 0
        correct = 0
        total = 0
        idx = 0
        NORM = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs1 = img_progressive(inputs.clone(), 0.3, p=0.3)
            inputs2 = img_progressive(inputs.clone(), 0.2, p=0.3)
            inputs3 = img_progressive(inputs.clone(), 0.1, p=0.3)

            inputs = NORM(inputs)
            inputs1 = NORM(inputs1)
            inputs2 = NORM(inputs2)
            inputs3 = NORM(inputs3)


            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda:
                inputs, targets, inputs1, inputs2, inputs3 = inputs.to(device), targets.to(device), inputs1.to(device), inputs2.to(device), inputs3.to(device)

            inputs, targets , inputs1, inputs2, inputs3 = Variable(inputs), Variable(targets), Variable(inputs1), Variable(inputs2), Variable(inputs3)

            # update learning rate
            for nlr in range(len(optimizer.param_groups)):
                optimizer.param_groups[nlr]['lr'] = cosine_anneal_schedule(epoch, nb_epoch, lr[nlr])


            optimizer.zero_grad()
            output_1, _, _, _ = netp(inputs1)
            loss1 = CELoss(output_1, targets) * 1
            loss1.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, output_2, _, _ = netp(inputs2)
            loss2 = CELoss(output_2, targets) * 1
            loss2.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, _, output_3, _ = netp(inputs3)
            loss3 = CELoss(output_3, targets) * 1
            loss3.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, _, _, output_concat = netp(inputs)
            concat_loss = CELoss(output_concat, targets) * 2
            concat_loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output_concat.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            train_loss += (loss1.item() + loss2.item() + loss3.item() + concat_loss.item())
            train_loss1 += loss1.item()
            train_loss2 += loss2.item()
            train_loss3 += loss3.item()
            train_loss4 += concat_loss.item()

            if batch_idx % 50 == 0:
                print(
                    'Step: %d | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f | Loss: %.3f | Acc: %.3f%% (%d/%d)' % (
                    batch_idx, train_loss1 / (batch_idx + 1), train_loss2 / (batch_idx + 1),
                    train_loss3 / (batch_idx + 1), train_loss4 / (batch_idx + 1), train_loss / (batch_idx + 1),
                    100. * float(correct) / total, correct, total))

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f |\n' % (
                epoch, train_acc, train_loss, train_loss1 / (idx + 1), train_loss2 / (idx + 1), train_loss3 / (idx + 1),
                train_loss4 / (idx + 1)))

        if epoch < 10 :#or epoch >= 50:
            val_acc, val_acc_com, val_loss = test(net, CELoss, 8)
            if val_acc_com > max_val_acc:
                max_val_acc = val_acc_com
                net.cpu()
                torch.save(net,  store_name + '/model%d.pth'%(max_val_acc))
                net.to(device)
            with open(exp_dir + '/results_test.txt', 'a') as file:
                file.write('Iteration %d, test_acc = %.5f, test_acc_combined = %.5f, test_loss = %.6f\n' % (
                epoch, val_acc, val_acc_com, val_loss))
        else:
             net.cpu()
             torch.save(net,  store_name + '/model.pth')

             net.to(device)
        torch.cuda.memory_summary()


if __name__ == '__main__':
    save_path = '/content/gdrive/MyDrive/STF/save_teacher'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    train(nb_epoch=10,  # number of epoch
          batch_size=32,  # batch size
          store_name=save_path,  # folder for output
          resume=False,  # resume training from checkpoint
          start_epoch=0,  # the start epoch number when you resume the training
          model_path='')  # the saved model where you want to resume the training

In [None]:
from __future__ import print_function
import torch.optim as optim
import torch.backends.cudnn as cudnn
#from pytorchimagemodels
import timm
import os
import random
import imgaug.augmenters as iaa

class Features(nn.Module):
    def __init__(self, net_layers):
        super(Features, self).__init__()
        self.net_layer_0 = nn.Sequential(net_layers[0])
        self.net_layer_1 = nn.Sequential(net_layers[1])
        self.net_layer_2 = nn.Sequential(net_layers[2])
        self.net_layer_3 = nn.Sequential(net_layers[3])
        self.net_layer_4 = nn.Sequential(*net_layers[4])
        self.net_layer_5 = nn.Sequential(*net_layers[5])
        self.net_layer_6 = nn.Sequential(*net_layers[6])
        self.net_layer_7 = nn.Sequential(*net_layers[7])


    def forward(self, x):
        x = self.net_layer_0(x)
        x = self.net_layer_1(x)
        x = self.net_layer_2(x)
        x = self.net_layer_3(x)
        x = self.net_layer_4(x)
        x1 = self.net_layer_5(x)
        x2 = self.net_layer_6(x1)
        x3 = self.net_layer_7(x2)
        return x1, x2, x3



def img_progressive(x, limit, p=0.5):
    if random.random()<p:
        aug = iaa.MultiplyBrightness((1-limit, 1+limit))

        x = x.permute(0, 2, 3, 1)
        x = x.cpu().numpy()
        x = (x*255).astype(np.uint8)
        x = aug(images=x)
        x = torch.from_numpy(x.astype(np.float32)).clone()
        x = x/255
        x = x.permute(0, 3, 1, 2)
    return x



class Network_Wrapper(nn.Module):
    def __init__(self, net_layers):
        super().__init__()
        self.Features = Features(net_layers)

        self.max_pool1 = nn.MaxPool2d(kernel_size=28, stride=1)
        self.max_pool2 = nn.MaxPool2d(kernel_size=14, stride=1)
        self.max_pool3 = nn.MaxPool2d(kernel_size=7, stride=1)

        self.conv_block1 = nn.Sequential(
            BasicConv(512, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier1 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10)
        )

        self.conv_block2 = nn.Sequential(
            BasicConv(1024, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier2 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.conv_block3 = nn.Sequential(
            BasicConv(2048, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier3 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.classifier_concat = nn.Sequential(
            nn.BatchNorm1d(1024 * 3),
            nn.Linear(1024 * 3, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x1, x2, x3 = self.Features(x)

        x1_ = self.conv_block1(x1)
        x1_ = self.max_pool1(x1_)
        x1_f = x1_.view(x1_.size(0), -1)

        x1_c = self.classifier1(x1_f)

        x2_ = self.conv_block2(x2)
        x2_ = self.max_pool2(x2_)
        x2_f = x2_.view(x2_.size(0), -1)
        x2_c = self.classifier2(x2_f)

        x3_ = self.conv_block3(x3)
        x3_ = self.max_pool3(x3_)
        x3_f = x3_.view(x3_.size(0), -1)
        x3_c = self.classifier3(x3_f)

        x_c_all = torch.cat((x1_f, x2_f, x3_f), -1)
        x_c_all = self.classifier_concat(x_c_all)

        return x1_c, x2_c, x3_c, x_c_all




In [None]:
def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None):

    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = True
    print(use_cuda)

    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/train',
                                                transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
    print("List of models")
    for i in timm.list_models(pretrained=True):
        print(i)
    #model_name = "skresnext50_32x4d"
    model_name = "resnet18"
    net = timm.create_model(model_name, pretrained=True, num_classes=10)

    net_layers = list(net.children())
    net_layers = net_layers[0:8]

    net = Network_Wrapper(net_layers)
    model_info(net)
    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net.parameters()])))


    netp = torch.nn.DataParallel(net)

    device = torch.device("cuda")
    net.to(device)
    cudnn.benchmark = True

    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.SGD([
        {'params': net.classifier_concat.parameters(), 'lr': 0.002},
        {'params': net.conv_block1.parameters(), 'lr': 0.002},
        {'params': net.classifier1.parameters(), 'lr': 0.002},
        {'params': net.conv_block2.parameters(), 'lr': 0.002},
        {'params': net.classifier2.parameters(), 'lr': 0.002},
        {'params': net.conv_block3.parameters(), 'lr': 0.002},
        {'params': net.classifier3.parameters(), 'lr': 0.002},
        {'params': net.Features.parameters(), 'lr': 0.0002}
    ],
        momentum=0.9, weight_decay=5e-4)


    max_val_acc = 0
    lr = [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.0002, 0.0002, 0.0002, 0.0002]
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        train_loss1 = 0
        train_loss2 = 0
        train_loss3 = 0
        train_loss4 = 0
        correct = 0
        total = 0
        idx = 0
        NORM = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs1 = img_progressive(inputs.clone(), 0.3, p=0.3)
            inputs2 = img_progressive(inputs.clone(), 0.2, p=0.3)
            inputs3 = img_progressive(inputs.clone(), 0.1, p=0.3)

            inputs = NORM(inputs)
            inputs1 = NORM(inputs1)
            inputs2 = NORM(inputs2)
            inputs3 = NORM(inputs3)


            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda:
                inputs, targets, inputs1, inputs2, inputs3 = inputs.to(device), targets.to(device), inputs1.to(device), inputs2.to(device), inputs3.to(device)

            inputs, targets , inputs1, inputs2, inputs3 = Variable(inputs), Variable(targets), Variable(inputs1), Variable(inputs2), Variable(inputs3)

            # update learning rate
            for nlr in range(len(optimizer.param_groups)):
                optimizer.param_groups[nlr]['lr'] = cosine_anneal_schedule(epoch, nb_epoch, lr[nlr])


            optimizer.zero_grad()
            output_1, _, _, _ = netp(inputs1)
            loss1 = CELoss(output_1, targets) * 1
            loss1.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, output_2, _, _ = netp(inputs2)
            loss2 = CELoss(output_2, targets) * 1
            loss2.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, _, output_3, _ = netp(inputs3)
            loss3 = CELoss(output_3, targets) * 1
            loss3.backward()
            optimizer.step()

            optimizer.zero_grad()
            _, _, _, output_concat = netp(inputs)
            concat_loss = CELoss(output_concat, targets) * 2
            concat_loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output_concat.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            train_loss += (loss1.item() + loss2.item() + loss3.item() + concat_loss.item())
            train_loss1 += loss1.item()
            train_loss2 += loss2.item()
            train_loss3 += loss3.item()
            train_loss4 += concat_loss.item()

            if batch_idx % 50 == 0:
                print(
                    'Step: %d | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f | Loss: %.3f | Acc: %.3f%% (%d/%d)' % (
                    batch_idx, train_loss1 / (batch_idx + 1), train_loss2 / (batch_idx + 1),
                    train_loss3 / (batch_idx + 1), train_loss4 / (batch_idx + 1), train_loss / (batch_idx + 1),
                    100. * float(correct) / total, correct, total))

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f |\n' % (
                epoch, train_acc, train_loss, train_loss1 / (idx + 1), train_loss2 / (idx + 1), train_loss3 / (idx + 1),
                train_loss4 / (idx + 1)))

        if epoch < 10 :#or epoch >= 50:
            val_acc, val_acc_com, val_loss = test(net, CELoss, 8)
            if val_acc_com > max_val_acc:
                max_val_acc = val_acc_com
                net.cpu()
                torch.save(net,  store_name + '/model%d.pth'%(max_val_acc))
                net.to(device)
            with open(exp_dir + '/results_test.txt', 'a') as file:
                file.write('Iteration %d, test_acc = %.5f, test_acc_combined = %.5f, test_loss = %.6f\n' % (
                epoch, val_acc, val_acc_com, val_loss))
        else:
             net.cpu()
             torch.save(net,  store_name + '/model.pth')

             net.to(device)
        torch.cuda.memory_summary()


if __name__ == '__main__':
    save_path = '/content/gdrive/MyDrive/STF/save_teacher'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    train(nb_epoch=10,  # number of epoch
          batch_size=32,  # batch size
          store_name=save_path,  # folder for output
          resume=False,  # resume training from checkpoint
          start_epoch=0,  # the start epoch number when you resume the training
          model_path='')  # the saved model where you want to resume the training

pyconv models

PYCONV2

In [None]:
import torch
import torch.nn as nn
import os
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, groups=1):
    """standard convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
                     padding=padding, dilation=dilation, groups=groups, bias=False)

class PyConv2(nn.Module):

    def __init__(self, inplans, planes,pyconv_kernels=[3, 5], stride=1, pyconv_groups=[1, 4]):
        super(PyConv2, self).__init__()
        self.conv2_1 = conv(inplans, planes // 2, kernel_size=pyconv_kernels[0], padding=pyconv_kernels[0] // 2,
                            stride=stride, groups=pyconv_groups[0])
        self.conv2_2 = conv(inplans, planes // 2, kernel_size=pyconv_kernels[1], padding=pyconv_kernels[1] // 2,
                            stride=stride, groups=pyconv_groups[1])

    def forward(self, x):
        return torch.cat((self.conv2_1(x), self.conv2_2(x)), dim=1)


logits

In [None]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F


class Logits(nn.Module):
	def __init__(self):
		super(Logits, self).__init__()

	def forward(self, out_s, out_t):
		loss = F.mse_loss(out_s, out_t)

		return loss


traindriversearch

utils

In [None]:
import numpy as np
import torchvision
from torch.autograd import Variable
from torchvision import transforms
#from model import *



def cosine_anneal_schedule(t, nb_epoch, lr):
    cos_inner = np.pi * (t % (nb_epoch))  # t - 1 is used when t has 1-based indexing.
    cos_inner /= (nb_epoch)
    cos_out = np.cos(cos_inner) + 1

    return float(lr / 2 * cos_out)



def model_info(model):  # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
    for i, (name, p) in enumerate(model.named_parameters()):
        name = name.replace('module_list.', '')
        print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))


def test_search(net, criterion, batch_size):
    net.eval()
    use_cuda = torch.cuda.is_available()
    test_loss = 0
    correct = 0
    correct_com = 0
    total = 0
    idx = 0
    device = torch.device("cuda")

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    testset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/test1',
                                               transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=4)

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            idx = batch_idx
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs, volatile=True), Variable(targets)
            output = net(inputs)

            loss = criterion(output, targets)

            test_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            if batch_idx % 50 == 0 or batch_idx == testloader.__len__()-1:
                print('Step: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) |' % (
                batch_idx, test_loss / (batch_idx + 1), 100. * float(correct) / total, correct, total))

    test_acc = 100. * float(correct) / total
    test_loss = test_loss / (idx + 1)

    return test_acc, test_loss

In [None]:
from __future__ import print_function
import os
import logging
import torch.optim as optim


class BasicLayer(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, kernel_size_, group=1, group_=4):
        super(BasicLayer, self).__init__()
        self.convs = nn.Sequential(
            PyConv2(in_planes, out_planes, pyconv_kernels=[kernel_size, kernel_size_], stride=1, pyconv_groups=[group, group_]),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.convs(x)
        return x


class Cells1(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(4, 1))

        self.cell1 = BasicLayer(3, 32, kernel_size=11, kernel_size_=7, group=1, group_=1)

        self.cell2 = BasicLayer(3, 32, kernel_size=11, kernel_size_=5, group=1, group_=1)

        self.cell3 = BasicLayer(3, 32, kernel_size=11, kernel_size_=3, group=1, group_=1)

        self.cell4 = BasicLayer(3, 32, kernel_size=11, kernel_size_=1, group=1, group_=1)

    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        x3 = self.cell3(x)
        x4 = self.cell4(x)

        p = torch.nn.functional.softmax(self.params, dim=0)

        x = x1*p[0] + x2*p[1] + x3*p[2] + x4*p[3]
        return x


class Cells1_pool(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(2, 1))

        self.cell1 = nn.AvgPool2d(2, stride=2)

        self.cell2 = nn.MaxPool2d(2, stride=2)

    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        p = torch.nn.functional.softmax(self.params, dim=0)
        x = x1*p[0] + x2*p[1]
        return x


class Cells2(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(9, 1))

        self.cell1 = BasicLayer(32, 64, kernel_size=9, kernel_size_=5, group=1, group_=1)
        self.cell2 = BasicLayer(32, 64, kernel_size=9, kernel_size_=5, group=1, group_=2)
        self.cell3 = BasicLayer(32, 64, kernel_size=9, kernel_size_=5)

        self.cell4 = BasicLayer(32, 64, kernel_size=9, kernel_size_=3, group=1, group_=1)
        self.cell5 = BasicLayer(32, 64, kernel_size=9, kernel_size_=3, group=1, group_=2)
        self.cell6 = BasicLayer(32, 64, kernel_size=9, kernel_size_=3)

        self.cell7 = BasicLayer(32, 64, kernel_size=9, kernel_size_=1, group=1, group_=1)
        self.cell8 = BasicLayer(32, 64, kernel_size=9, kernel_size_=1, group=1, group_=2)
        self.cell9 = BasicLayer(32, 64, kernel_size=9, kernel_size_=1)

    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        x3 = self.cell3(x)
        x4 = self.cell4(x)
        x5 = self.cell5(x)
        x6 = self.cell6(x)
        x7 = self.cell7(x)
        x8 = self.cell8(x)
        x9 = self.cell9(x)

        p = torch.nn.functional.softmax(self.params, dim=0)

        x = x1*p[0] + x2*p[1] + x3*p[2] + x4*p[3] + x5*p[4] + x6*p[5] + x7*p[6] + x8*p[7] + x9*p[8]
        return x

class Cells2_pool(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(2, 1))

        self.cell1 = nn.AvgPool2d(2, stride=2)

        self.cell2 = nn.MaxPool2d(2, stride=2)


    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        p = torch.nn.functional.softmax(self.params, dim=0)
        x = x1*p[0] + x2*p[1]
        return x


class Cells3(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(6, 1))

        self.cell1 = BasicLayer(64, 128, kernel_size=5, kernel_size_=3, group=1, group_=1)
        self.cell2 = BasicLayer(64, 128, kernel_size=5, kernel_size_=3, group=1, group_=2)
        self.cell3 = BasicLayer(64, 128, kernel_size=5, kernel_size_=3)

        self.cell4 = BasicLayer(64, 128, kernel_size=5, kernel_size_=1, group=1, group_=1)
        self.cell5 = BasicLayer(64, 128, kernel_size=5, kernel_size_=1, group=1, group_=2)
        self.cell6 = BasicLayer(64, 128, kernel_size=5, kernel_size_=1)


    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        x3 = self.cell3(x)
        x4 = self.cell4(x)
        x5 = self.cell5(x)
        x6 = self.cell6(x)

        p = torch.nn.functional.softmax(self.params, dim=0)

        x = x1*p[0] + x2*p[1] + x3*p[2] + x4*p[3] + x5*p[4] + x6*p[5]
        return x

class Cells3_pool(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(2, 1))

        self.cell1 = nn.AvgPool2d(2, stride=2)

        self.cell2 = nn.MaxPool2d(2, stride=2)


    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        p = torch.nn.functional.softmax(self.params, dim=0)
        x = x1*p[0] + x2*p[1]
        return x



class Cells4(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(3, 1))

        self.cell1 = BasicLayer(128, 256, kernel_size=3, kernel_size_=1, group=1, group_=1)
        self.cell2 = BasicLayer(128, 256, kernel_size=3, kernel_size_=1, group=1, group_=2)
        self.cell3 = BasicLayer(128, 256, kernel_size=3, kernel_size_=1)

    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        x3 = self.cell3(x)

        p = torch.nn.functional.softmax(self.params, dim=0)

        x = x1*p[0] + x2*p[1] + x3*p[2]
        return x

class Cells4_pool(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.ones(3, 1))

        self.cell1 = nn.AvgPool2d(2, stride=2)

        self.cell2 = nn.MaxPool2d(2, stride=2)

    def forward(self, x):
        x1 = self.cell1(x)
        x2 = self.cell2(x)
        p = torch.nn.functional.softmax(self.params, dim=0)
        x = x1*p[0] + x2*p[1]
        return x


class Network_Wrapper(nn.Module):
    def __init__(self, net_layers):
        super().__init__()
        self.Features = Features(net_layers)

        self.max_pool1 = nn.MaxPool2d(kernel_size=28, stride=1)
        self.max_pool2 = nn.MaxPool2d(kernel_size=14, stride=1)
        self.max_pool3 = nn.MaxPool2d(kernel_size=7, stride=1)

        self.conv_block1 = nn.Sequential(
            BasicConv(512, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier1 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10)
        )

        self.conv_block2 = nn.Sequential(
            BasicConv(1024, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier2 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.conv_block3 = nn.Sequential(
            BasicConv(2048, 512, kernel_size=1, stride=1, padding=0, relu=True),
            BasicConv(512, 1024, kernel_size=3, stride=1, padding=1, relu=True)
        )
        self.classifier3 = nn.Sequential(
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

        self.classifier_concat = nn.Sequential(
            nn.BatchNorm1d(1024 * 3),
            nn.Linear(1024 * 3, 512),
            nn.BatchNorm1d(512),
            nn.ELU(inplace=True),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x1, x2, x3 = self.Features(x)

        x1_ = self.conv_block1(x1)
        x1_ = self.max_pool1(x1_)
        x1_f = x1_.view(x1_.size(0), -1)

        x1_c = self.classifier1(x1_f)


        x2_ = self.conv_block2(x2)
        x2_ = self.max_pool2(x2_)
        x2_f = x2_.view(x2_.size(0), -1)
        x2_c = self.classifier2(x2_f)

        x3_ = self.conv_block3(x3)
        x3_ = self.max_pool3(x3_)
        x3_f = x3_.view(x3_.size(0), -1)
        x3_c = self.classifier3(x3_f)

        x_c_all = torch.cat((x1_f, x2_f, x3_f), -1)
        x_c_all = self.classifier_concat(x_c_all)

        return x1_c, x2_c, x3_c, x_c_all


class Features(nn.Module):
    def __init__(self, net_layers):
        super(Features, self).__init__()
        self.net_layer_0 = nn.Sequential(net_layers[0])
        self.net_layer_1 = nn.Sequential(net_layers[1])
        self.net_layer_2 = nn.Sequential(net_layers[2])
        self.net_layer_3 = nn.Sequential(net_layers[3])
        self.net_layer_4 = nn.Sequential(*net_layers[4])
        self.net_layer_5 = nn.Sequential(*net_layers[5])
        self.net_layer_6 = nn.Sequential(*net_layers[6])
        self.net_layer_7 = nn.Sequential(*net_layers[7])


    def forward(self, x):
        x = self.net_layer_0(x)
        x = self.net_layer_1(x)
        x = self.net_layer_2(x)
        x = self.net_layer_3(x)
        x = self.net_layer_4(x)
        x1 = self.net_layer_5(x)
        x2 = self.net_layer_6(x1)
        x3 = self.net_layer_7(x2)
        return x1, x2, x3


class Search_Wrapper(nn.Module):
    def __init__(self):
        super().__init__()
        self.L1 = Cells1()
        self.L1P = Cells1_pool()
        self.L2 = Cells2()
        self.L2P = Cells2_pool()
        self.L3 = Cells3()
        self.L3P = Cells3_pool()
        self.L4 = Cells4()
        self.L4P = Cells4_pool()
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.L1(x)
        x = self.L1P(x)
        x = self.L2(x)
        x = self.L2P(x)
        x = self.L3(x)
        x = self.L3P(x)
        x = self.L4(x)
        x = self.L4P(x)
        x = self.pool(x).view(x.shape[0], -1)
        x = self.classifier(x)
        return x




In [None]:
def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None):
    criterionKD = Logits()
    _logger = logging.getLogger('train')

    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = True
    print(use_cuda)


    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/train/',
                                                transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
    net_teacher = torch.load("/content/gdrive/MyDrive/STF/save_teacher/model99.510.pth")

    net = Search_Wrapper()


    model_info(net)
    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net.parameters()])))
    net = torch.nn.DataParallel(net).cuda()
    net_teacher = torch.nn.DataParallel(net_teacher).cuda()
    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net_teacher.parameters()])))

    device = torch.device("cuda")
    net.to(device)


    CELoss = nn.CrossEntropyLoss()


    optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.9, weight_decay=5e-4)

    max_val_acc = 0
    lr = [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002]
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        correct = 0
        total = 0
        idx = 0
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs), Variable(targets)

            # update learning rate
            for nlr in range(len(optimizer.param_groups)):
                optimizer.param_groups[nlr]['lr'] = cosine_anneal_schedule(epoch, nb_epoch, lr[nlr])

            with torch.no_grad():
                _, _, _, output_teacher = net_teacher(inputs)

            optimizer.zero_grad()
            output = net(inputs)
            loss = 0.7*criterionKD(output, output_teacher) + 0.3 * CELoss(output, targets)
            loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            train_loss += loss.item()

            if batch_idx % 50 == 0 or batch_idx == trainloader.__len__() - 1:
                print(
                    'Step: %d | Loss1: %.3f | Acc: %.3f%% (%d/%d)' % (
                        batch_idx, train_loss / (batch_idx + 1),
                        100. * float(correct) / total, correct, total))

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f |\n' % (
                    epoch, train_acc, train_loss))

        if epoch < 50 :
            val_acc, val_loss = test_search(net, CELoss, 16)
            if val_acc > max_val_acc:
                max_val_acc = val_acc
                net.cpu()
                torch.save(net, store_name + '/model%.5f.pth'%(max_val_acc))
                net.to(device)
            with open(exp_dir + '/results_test.txt', 'a') as file:
                file.write('IteratiFon %d, test_acc = %.5f, test_loss = %.6f\n' % (
                    epoch, val_acc, val_loss))
        else:
            net.cpu()
            torch.save(net,  store_name + '/model%.5f.pth'%(max_val_acc))
            net.to(device)


if __name__ == '__main__':
    save_path = '/content/gdrive/MyDrive/STF/save_search'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    train(nb_epoch=300,  # number of epoch
          batch_size=32, # batch size
          store_name=save_path,  # folder for output
          resume=True,  # resume training from checkpoint
          start_epoch=0,  # the start epoch number when you resume the training
          model_path='/content/gdrive/MyDrive/STF/save_search/model71.pth')  # the saved model where you want to resume the training

Train driver transfer

Utils distill

In [None]:
import numpy as np
import torchvision
from torch.autograd import Variable
from torchvision import transforms
#from model import *


def cosine_anneal_schedule(t, nb_epoch, lr):
    cos_inner = np.pi * (t % (nb_epoch))  # t - 1 is used when t has 1-based indexing.
    cos_inner /= (nb_epoch)
    cos_out = np.cos(cos_inner) + 1

    return float(lr / 2 * cos_out)


def model_info(model):  # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
    for i, (name, p) in enumerate(model.named_parameters()):
        name = name.replace('module_list.', '')
        print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))


def test(net, net_teacher, criterion, batch_size):
    net.eval()
    net_teacher.eval()
    use_cuda = torch.cuda.is_available()
    test_loss = 0
    correct = 0
    correct_t = 0
    correct_com = 0
    total = 0
    idx = 0
    device = torch.device("cuda")

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    testset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/test1',
                                               transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=4)

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            idx = batch_idx
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs, volatile=True), Variable(targets)
            output = net(inputs)
            _, _, _, output_t = net_teacher(inputs)

            loss = criterion(output, targets)

            test_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            _, predicted_t = torch.max(output_t.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()
            correct_t += predicted_t.eq(targets.data).cpu().sum()

            if batch_idx % 50 == 0 or batch_idx == testloader.__len__()-1:
                print('Step: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) |' % (
                batch_idx, test_loss / (batch_idx + 1), 100. * float(correct) / total, correct, total))

    test_acc = 100. * float(correct) / total

    test_loss = test_loss / (idx + 1)


    return test_acc, test_loss

In [None]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F


class Logits(nn.Module):
	def __init__(self):
		super(Logits, self).__init__()

	def forward(self, out_s, out_t):
		loss = F.mse_loss(out_s, out_t)

		return loss


sAVE TRANFER  

dummy   changed architecture which reduced parameter size

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
# changed architecture which reduced parameter size
class BasicLayer(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, kernel_size_, group=1, group_=4):
        super(BasicLayer, self).__init__()
        self.convs = nn.Sequential(
            PyConv2(in_planes, out_planes, pyconv_kernels=[kernel_size, kernel_size_], stride=1, pyconv_groups=[group, group_]),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )
        self.pool = nn.MaxPool2d(2, stride=2)

    def forward(self, x):
        x = self.convs(x)
        x = self.pool(x)
        return x

class Searched_Net(nn.Module):
    def __init__(self):
        super().__init__()
        # changed architecture which reduced parameter size
        self.convs = nn.Sequential(
            BasicLayer(3, 16, kernel_size=3, kernel_size_=3, group=1, group_=1),  # Reduced channels to 16
            BasicLayer(16, 32, kernel_size=3, kernel_size_=3, group=1, group_=1),  # Reduced channels to 32
            BasicLayer(32, 64, kernel_size=3,  kernel_size_=3, group=1, group_=1),  # Reduced channels to 64
            BasicLayer(64, 128, kernel_size=3,  kernel_size_=3, group=1, group_=1)  # Reduced channels to 128
        )
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Linear(128, 10)  # Reduced input size to 128
        )

    def forward(self, x):
        x = self.convs(x)
        x = self.pool(x).view(x.shape[0], -1)
        x = self.classifier(x)
        return x





In [None]:
def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None):
    # setup output

    #if args.kd_mode == 'logits':
    kd_mode = 'logits'
    criterionKD = Logits()


    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = True
    print(use_cuda)


    # Data
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    trainset = torchvision.datasets.ImageFolder(root="/content/gdrive/MyDrive/STF/train",
                                                transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)

    net_teacher = torch.load("/content/gdrive/MyDrive/STF/save_teacher/model99.510.pth")
    net = Searched_Net()
    model_info(net)
    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net.parameters()])))


    netp_teacher = torch.nn.DataParallel(net_teacher).cuda()
    netp = torch.nn.DataParallel(net).cuda()
    device = torch.device("cuda")
    cudnn.benchmark = True

    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.9, weight_decay=5e-4)

    max_val_acc = 0
    lr = [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002]
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        correct = 0
        total = 0
        idx = 0
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs), Variable(targets)

            # update learning rate
            for nlr in range(len(optimizer.param_groups)):
                optimizer.param_groups[nlr]['lr'] = cosine_anneal_schedule(epoch, nb_epoch, lr[nlr])

            if kd_mode in ['sobolev', 'lwm']:
                inputs.requires_grad = True

            with torch.no_grad():
                _, _, _, output_teacher = netp_teacher(inputs)


            optimizer.zero_grad()
            output = netp(inputs)

            if kd_mode in ['sobolev']:
                loss = criterionKD(output, output_teacher, inputs, targets)
            else:
                loss = 0.7*criterionKD(output, output_teacher) + 0.3 * CELoss(output, targets)


            loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            train_loss += loss.item()

            if batch_idx % 50 == 0 or batch_idx == trainloader.__len__() - 1:
                print(
                    'Step: %d | Loss1: %.3f | Acc: %.3f%% (%d/%d)' % (
                        batch_idx, train_loss / (batch_idx + 1),
                        100. * float(correct) / total, correct, total))

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f |\n' % (
                    epoch, train_acc, train_loss))
        val_acc, val_loss = test(net, netp_teacher, CELoss, 16)
        if  epoch <= 500:

            if val_acc > max_val_acc:
                max_val_acc = val_acc
                net.cpu()
                torch.save(net,  store_name + '/model %.5f.pth'%(max_val_acc))
                net.to(device)
            with open(exp_dir + '/results_test.txt', 'a') as file:
                file.write('Iteration %d, test_acc = %.5f, test_loss = %.6f\n' % (
                    epoch, val_acc, val_loss))
        else:
            net.cpu()
            torch.save(net, store_name + '/model %.5f.pth'%(val_acc))
            net.to(device)

if __name__ == '__main__':
    save_path = '/content/gdrive/MyDrive/STF/dummy_save_transfer_' + 'ST'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    train(nb_epoch=300,  # number of epoch
          batch_size=32,  # batch size
          store_name=save_path,  # folder for output
          resume=True,  # resume training from checkpoint
          start_epoch=4,  # the start epoch number when you resume the training
          model_path='/content/gdrive/MyDrive/STF/dummy_save_transfer_ST/model 40.47151.pth')  # the saved model where you want to resume the training

finetunetest

In [None]:
def finetunetest(net,  criterion, batch_size):
    net.eval()

    use_cuda = torch.cuda.is_available()
    test_loss = 0
    correct = 0
    correct_t = 0
    correct_com = 0
    total = 0
    idx = 0
    device = torch.device("cuda")


    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    testset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/test1',
                                               transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=4)


    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            idx = batch_idx
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs, volatile=True), Variable(targets)
            output = net(inputs)
           # _, _, _, output_t = net(inputs)


            loss = criterion(output, targets)


            test_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
           # , predicted_t = torch.max(output_t.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()
          #  correct_t += predicted_t.eq(targets.data).cpu().sum()


            if batch_idx % 50 == 0 or batch_idx == testloader.__len__()-1:
                print('Step: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) |' % (
                batch_idx, test_loss / (batch_idx + 1), 100. * float(correct) / total, correct, total))


    test_acc = 100. * float(correct) / total


    test_loss = test_loss / (idx + 1)



    return test_acc, test_loss

Fintune


In [None]:

def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None):
    _logger = logging.getLogger('train')
    # setup output
    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = torch.cuda.is_available()
    print(use_cuda)

    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/content/gdrive/MyDrive/STF/train',
                                                transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)

    #if args.OE:
    #    net = torch.load("save_transfer_OE_logits/model.pth")
    #else:
    net = torch.load("/content/gdrive/MyDrive/STF/dummy_save_transfer_ST/model 99.21415.pth")

    print('Model %s created, param count: %d' %
          ('Created_model', sum([m.numel() for m in net.parameters()])))


    netp = torch.nn.DataParallel(net).cuda()
    #nets = torch.nn.DataParallel(netp).cuda()

    device = torch.device("cuda")
    net.to(device)


    CELoss = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.9, weight_decay=5e-4)

    max_val_acc = 0
    lr = [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002]
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        correct = 0
        total = 0
        idx = 0
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda:
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs), Variable(targets)

            for nlr in range(len(optimizer.param_groups)):
                optimizer.param_groups[nlr]['lr'] = cosine_anneal_schedule(epoch, nb_epoch, lr[nlr])

            optimizer.zero_grad()
            output = netp(inputs)
            loss = CELoss(output, targets)
            loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            train_loss += loss.item()

            if batch_idx % 50 == 0 or batch_idx == trainloader.__len__() - 1:
                print(
                    'Step: %d | Loss1: %.3f | Acc: %.3f%% (%d/%d)' % (
                        batch_idx, train_loss / (batch_idx + 1),
                        100. * float(correct) / total, correct, total))

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f |\n' % (
                    epoch, train_acc, train_loss))
        val_acc, val_loss = finetunetest(netp, CELoss, 16)

            #val_acc, val_loss = finetunetest(netp, CELoss, 16)
        if val_acc > max_val_acc:
            max_val_acc = val_acc
            net.cpu()
            torch.save(net, store_name + '/model %.5f.pth'%(max_val_acc))
            net.to(device)
        with open(exp_dir + '/results_test.txt', 'a') as file:
            file.write('Iteration %d, test_acc = %.5f, test_loss = %.6f\n' % (
                epoch, val_acc, val_loss))



if __name__ == '__main__':

    save_path = "/content/gdrive/MyDrive/STF/dummy_save_finetune"

    if not os.path.exists(save_path):
        os.mkdir(save_path)
    train(nb_epoch=500,  # number of epoch
          batch_size=32,  # batch size
          store_name=save_path,  # folder for output
          resume=False,  # resume training from checkpoint
          start_epoch=0,  # the start epoch number when you resume the training
          model_path='')  # the saved model where you want to resume the training

Prediction

In [None]:
from __future__ import division
import torch
torch.manual_seed(0)
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image, ImageOps, ImageEnhance
import cv2
from google.colab.patches import cv2_imshow


transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

model=torch.load('/content/gdrive/MyDrive/STF/dummy_save_finetune/model 99.98.pth')  # Load pretrained parameters
model.eval()  # Set to eval mode to change behavior of Dropout, BatchNorm

predictions=["safe driving","texting - right","talking on the phone - right","texting - left","talking on the phone - left","operating the radio","drinking",
"reaching behind","hair and makeup", "talking to passenger"]

path="/content/gdrive/MyDrive/STF/test1/c8/img_100015.jpg"

img = Image.open(path)  # Load image as PIL.Image
image=cv2.imread(path)
x = transform(img)  # Preprocess image
x = x.unsqueeze(0)  # Add batch dimension

output = model(x)  # Forward pass
pred = torch.argmax(output, 1)  # Get predicted class if multi-class classification
print('Image predicted as ', predictions[pred[0]])
window=predictions[pred[0]]
cv2_imshow(image)