In [1]:
from torchvision import datasets, transforms
import torch

import os

In [6]:
!python train_moe.py -a Moe1sc -b resnet18 -j 8 --epochs 1 --gpu 0 -bs 256 --exp_str test

Use GPU: 0 for training
=> creating model 'Moe1sc'
  gn_softmax = nn.Softmax()(gn_output.mean(dim=0))
Epoch: [0][390/391], lr: 0.00200	Time 3.403 (0.738)	Data 0.001 (0.038)	Loss 4.1468 (4.7511)	Prec@1 9.375 (5.296)	Prec@5 30.000 (16.678))
Epoch: 0, Gated Network Weight Gate = [0]:0.63 [1]:0.37 
Test: [0][99/100]	Time 0.079 (0.129)	Loss 4.1902 (4.3159)	Prec@1 7.000 (9.410)	Prec@5 32.000 (27.550)))
val Results: Prec@1 9.410 Prec@5 27.550 Loss 4.31586
Best Prec@1: 9.410



In [7]:
!python train_moe.py -h

usage: train_moe.py [-h] [-a ARCH] [-b Backbone] [--exp_str EXP_STR] [-j N]
                    [--epochs N] [--start-epoch N] [--gpu GPU] [-bs N]
                    [--lr LR] [--momentum M] [--wd W] [--seed SEED] [-p N]
                    [--root_log ROOT_LOG] [--root_model ROOT_MODEL]
                    [--r_ratio R_RATIO] [--resume PATH]

PyTorch Tiny-Imagenet Moe Training

optional arguments:
  -h, --help            show this help message and exit
  -a ARCH, --arch ARCH  model architecture: Lorot | Moe1 | Moe1flip | Moe1sc |
                        vanilla (default: vanilla)
  -b Backbone, --backbone Backbone
                        model backbone: resnet18 | resnet34 | resnet50 |
                        resnet101 | resnet152 | resnext50_32x4d |
                        resnext101_32x8d | resnext101_64x4d | wide_resnet50_2
                        | wide_resnet101_2 (default: resnet18)
  --exp_str EXP_STR     number to indicate which experiment it is
  -j N, --workers N     number

In [2]:
import models

In [4]:
thedir = 'data/tiny-imagenet-200/val/images'

In [6]:
len([ name for name in os.listdir(thedir) if os.path.isdir(os.path.join(thedir, name)) ])

200

In [3]:
model_names = sorted(name for name in models.__dict__
    if not name.startswith("__")
    and callable(models.__dict__[name]))

In [4]:
model_names

['Lorot', 'Moe1', 'Moe1flip', 'Moe1sc', 'vanilla']

In [2]:
mean = [x / 255 for x in [127.5, 127.5, 127.5]]
std = [x / 255 for x in [127.5, 127.5, 127.5]]

In [3]:
train_transform = transforms.Compose([
    transforms.RandomCrop(64, padding=4),
    transforms.ToTensor(),
    # transforms.Normalize(mean, std)
])

test_transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean, std)
])

In [4]:
data_target_dir = 'data/tiny-imagenet-200/'

In [5]:
train_root = os.path.join(data_target_dir,
                                  'train')  # this is path to training images folder
validation_root = os.path.join(data_target_dir,
                                'val/images')  # this is path to validation images folder
train_data = datasets.ImageFolder(train_root, transform=train_transform)
test_data = datasets.ImageFolder(validation_root, transform=test_transform)
num_classes = 200

In [6]:
batch_size = 128
workers = 2 

In [7]:
train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=workers,
                                               pin_memory=True)

In [8]:
inputs, labels = next(iter(train_loader))

In [9]:
labels

tensor([ 86, 178,  18, 100,   1, 134, 108, 197,  61,   0, 130, 105,  77, 162,
         97, 153, 139,  10, 127,  47, 132,  51,  45,  44, 161,   8,  81,   7,
         88, 173,   3,  62, 140,   6,  68,   3,  22, 123,  54,  73,   7, 152,
         84, 154,  92,  48,  29, 126,  87,  28, 121,  81,  94,  78,  50,   6,
        196,  41,  72, 169,  46, 114, 184, 161, 155, 104,  51,  43,  33, 153,
         33,  66, 162,   7,  74, 169,   0,   2, 179, 106,  23, 114,  29,  95,
         64,  14,  16,  25,  35, 145, 138, 134, 187, 183, 156,  85,  70, 125,
        181, 173, 177,  28, 195, 176,  63,  22,  73,  75,  40,  77, 171,  96,
         76,  52,  16,  75,  33,  33,   7,  81, 107, 102, 156, 168,  61, 142,
         31, 117])

In [10]:
inputs.shape

torch.Size([128, 3, 64, 64])

In [11]:
from torchvision.utils import make_grid
import numpy as np
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F

In [12]:
from torchvision import models

In [16]:
model = models.__dict__['resnet18'](num_classes=200)

In [17]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
with torch.no_grad():
    model.eval()
    input, label = next(iter(train_loader))
    y = model(input)

In [19]:
y.shape

torch.Size([128, 200])

In [1]:
import argparse

In [None]:
parser = argparse.ArgumentParser(description='test')
parser.add_argument()

In [6]:
acc , =[2]

In [7]:
acc

2