In [1]:
import torch
from torch import nn,optim
import sys
import os
import shutil
import torchvision

sys.path.append('../../code')
import pandas as pd
import time
import d2lzh_pytorch as d2l
device=('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def read_label_file(data_dir,label_file,train_dir,valid_ratio):
    with open(os.path.join(data_dir,label_file),'r') as f:
        lines=f.readlines()[1:]
        tokens=[l.rstrip().split(',') for l in lines]
        idx_label=dict(((int(idx),label) for idx,label in tokens))
    labels=set(idx_label.values())
    n_train_valid=len(os.listdir(os.path.join(data_dir,train_dir)))
    n_train=int(n_train_valid*(1-valid_ratio))
    assert 0<n_train<n_train_valid
    return n_train//len(labels),idx_label

In [3]:
def mkdir_if_not_exist(path):
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))

In [4]:
def reorg_train_valid(data_dir,train_dir,input_dir,n_train_per_label,idx_label):
    label_count={}
    for train_file in os.listdir(os.path.join(data_dir,train_dir)):
        idx=int(train_file.split('.')[0])
        label=idx_label[idx]
        mkdir_if_not_exist([data_dir,input_dir,'train_valid',label])
        shutil.copy(os.path.join(data_dir,train_dir,train_file),
                    os.path.join(data_dir,input_dir,'train_valid',label))
        if label not in label_count or label_count[label]<n_train_per_label:
            mkdir_if_not_exist([data_dir,input_dir,'train',label])
            shutil.copy(os.path.join(data_dir,train_dir,train_file),
                        os.path.join(data_dir,input_dir,'train',label))
            label_count[label]=label_count.get(label,0)+1
        else:
            mkdir_if_not_exist([data_dir,input_dir,'valid',label])
            shutil.copy(os.path.join(data_dir,train_dir,train_file),
                        os.path.join(data_dir,input_dir,'valid',label))

In [5]:
def reorg_test(data_dir,test_dir,input_dir):
    mkdir_if_not_exist([data_dir,input_dir,'test','unknown'])
    for test_file in os.listdir(os.path.join(data_dir,test_dir)):
        shutil.copy(os.path.join(data_dir,test_dir,test_file),
               os.path.join(data_dir,input_dir,'test','unknown'))

In [6]:
def reorg_cifar10_data(data_dir,label_file,train_dir,test_dir,input_dir,valid_ratio):
    n_trin_per_label,idx_label=read_label_file(data_dir,label_file,train_dir,valid_ratio)
    reorg_train_valid(data_dir,train_dir,input_dir,n_trin_per_label,idx_label)
    reorg_test(data_dir,test_dir,input_dir)

In [7]:
train_dir,test_dir,batch_size='train','test',128
data_dir,label_file='../../data/kaggle_cifar10/','trainLabels.csv'
input_dir,valid_ratio='train_valid_test',0.1
#reorg_cifar10_data(data_dir,label_file,train_dir,test_dir,input_dir,valid_ratio)

In [8]:
transform_train=torchvision.transforms.Compose([
    torchvision.transforms.Resize(40),
    torchvision.transforms.RandomResizedCrop(32,scale=(0.64,1.0),ratio=(1.0,1.0)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.4914,0.4822,0.4465],
                                     [0.2023,0.1994,0.2010])
    ])

In [9]:
transform_test=torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.4914,0.4822,0.4465],
                                     [0.2023,0.1994,0.2010])
    ])

In [10]:
train_ds=torchvision.datasets.ImageFolder(os.path.join(data_dir,input_dir,'train'),transform=transform_train)
valid_ds=torchvision.datasets.ImageFolder(os.path.join(data_dir,input_dir,'valid'),transform=transform_test)
train_valid_ds=torchvision.datasets.ImageFolder(os.path.join(data_dir,input_dir,'train_valid'),transform=transform_train)
test_ds=torchvision.datasets.ImageFolder(os.path.join(data_dir,input_dir,'test'),transform=transform_test)

In [11]:
def get_key (dict, value):
    return [k for k, v in dict.items() if v == value]
train_ds.class_to_idx[1]

KeyError: 1

In [12]:
train_iter=torch.utils.data.DataLoader(train_ds,batch_size,shuffle=True)
valid_iter=torch.utils.data.DataLoader(valid_ds,batch_size,shuffle=True)
train_valid_iter=torch.utils.data.DataLoader(train_valid_ds,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(test_ds,batch_size,shuffle=False)

In [13]:
for X,y in train_iter:
    print(X.shape)
    print(y.shape)
    break

torch.Size([128, 3, 32, 32])
torch.Size([128])


In [14]:
#net=d2l.resnet18()
net=torchvision.models.resnet18(pretrained=True)
loss=nn.CrossEntropyLoss()

In [15]:
net.fc=nn.Linear(512,10)

In [16]:
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [17]:
def train(net,train_iter,valid_iter,num_epochs,lr,wd,device,lr_period,lr_decay):
    optimizer=optim.SGD(net.parameters(),lr=lr,momentum=0.9,weight_decay=wd)
    #optimizer=optim.Adam(net.parameters(),lr=lr,weight_decay=wd)
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start=0.0,0.0,0,time.time()
        batch_count=0
        if epoch>0 and epoch % lr_period==0:
            for param_group in optim.param_groups:
                param_group['lr']=optim.lr*lr_decay
        for X,y in train_iter:
            y=y.to(device)
            X=X.to(device)
            net=net.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc=d2l.evaluate_accuracy(valid_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

In [18]:
num_epochs, lr, wd = 35, 0.1, 5e-4
lr_period, lr_decay,= 80, 0.1
train(net, train_iter, valid_iter, num_epochs, lr, wd, device, lr_period,
      lr_decay)

epoch 1, loss 2.9531, train acc 0.188, test acc 0.253, time 88.1 sec
epoch 2, loss 1.8087, train acc 0.334, test acc 0.408, time 80.6 sec
epoch 3, loss 1.5727, train acc 0.421, test acc 0.456, time 80.6 sec
epoch 4, loss 1.4482, train acc 0.469, test acc 0.461, time 81.0 sec
epoch 5, loss 1.3497, train acc 0.512, test acc 0.536, time 80.8 sec
epoch 6, loss 1.2522, train acc 0.553, test acc 0.552, time 80.6 sec
epoch 7, loss 1.1679, train acc 0.586, test acc 0.564, time 80.4 sec
epoch 8, loss 1.0997, train acc 0.610, test acc 0.590, time 80.2 sec
epoch 9, loss 1.0472, train acc 0.629, test acc 0.597, time 80.2 sec
epoch 10, loss 0.9961, train acc 0.649, test acc 0.620, time 80.2 sec
epoch 11, loss 0.9684, train acc 0.659, test acc 0.611, time 80.7 sec
epoch 12, loss 0.9396, train acc 0.672, test acc 0.659, time 80.5 sec
epoch 13, loss 0.9162, train acc 0.680, test acc 0.680, time 80.5 sec
epoch 14, loss 0.8875, train acc 0.687, test acc 0.649, time 80.6 sec
epoch 15, loss 0.8780, train 

KeyboardInterrupt: 

In [None]:
preds=[]
train(net,train_valid_iter,test_iter,num_epochs,lr,wd,device,lr_period,
      lr_decay)

In [None]:
preds=[]
for X,_ in test_iter:
    y_hat=net.to(device)((X.to(device)))
    preds.extend(y_hat.argmax(axis=1))
sorted_ids=list(range(1,len(test_ds)+1))
sorted_ids.sort(key=lambda x:str(x))
df=pd.DataFrame({'id':sorted_ids,'label':preds})
df['label']=df['label'].apply(lambda x: get_key(train_valid_ds.class_to_idx,x))
df.to_csv('submission.csv',index=False)