In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
os.chdir('/content/drive/MyDrive/esun_ai_competition/')

In [None]:
import sys
import numpy as np
import pandas as pd
import cv2
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as tvm
from torch.nn import init
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [None]:
def readfile(path, word2idx, wrong_label):
  image_dir = sorted(os.listdir(path))
  x = np.zeros((len(image_dir), 64, 64, 3), dtype=np.uint8)
  y = np.zeros((len(image_dir)), dtype=np.int)
  for i, file in enumerate(tqdm(image_dir)):
    img = cv2.imread(os.path.join(path, file))
    x[i, :, :] = cv2.resize(img,(64, 64))
    idx = file.split('_')[0]
    label = file.split('_')[1][0]
    if label in word2idx.keys():
      y[i] = word2idx[label]
    else:
      y[i] = word2idx['isnull']
    
    if idx in wrong_label.keys():
      if wrong_label[idx] in word2idx.keys():
        y[i] = word2idx[wrong_label[idx]]
      else:
        y[i] = word2idx['isnull']
  return x, y

In [None]:
# data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((330, 330)),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomCrop((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((299, 299)),                                    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
    
    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None
        
        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))
        
        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x



class Xception(nn.Module):
    """
    Xception optimized for the ImageNet dataset, as specified in
    https://arxiv.org/pdf/1610.02357.pdf
    """
    def __init__(self, num_classes=1000):
        """ Constructor
        Args:
            num_classes: number of classes
        """
        super(Xception, self).__init__()

        
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        #do relu here

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        #do relu here
        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)

        self.fc = nn.Linear(2048, num_classes)



        #------- init weights --------
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        #-----------------------------





    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)

        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
idx2word, word2idx = {}, {}
with open('training data dic.txt') as f:
  lines = f.readlines()

idx2word[0] = 'isnull'
word2idx['isnull'] = 0
for i in range(len(lines)):
  word = lines[i].strip('\n')
  idx2word[i+1] = word
  word2idx[word] = i+1

In [None]:
wrong_label = {}
with open('wrong_label8030.txt') as f:
  lines = f.readlines()
for i in range(len(lines)):
  word = lines[i].strip('\n')
  idx = word.split('_')[0]
  label = word.split('_')[1]
  wrong_label[idx] = label

In [None]:
#!tar xvf train.tar

In [None]:
# read file
x, y = readfile('train', word2idx, wrong_label)

HBox(children=(FloatProgress(value=0.0, max=68804.0), HTML(value='')))




In [None]:
train_x, val_x, train_y, val_y = train_test_split(x, y, test_size=0.1, random_state=0)
print("train size = " + str(len(train_x)))
print("val size = "+ str(len(val_x)))

train size = 61923
val size = 6881


In [None]:
# batch_size = 32

# train_set = ImgDataset(train_x, train_y, train_transform)
# val_set = ImgDataset(val_x, val_y, test_transform)
# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# model = tvm.inception_v3(init_weights=True, pretrained=False, num_classes=801).cuda()

# model.aux_logits = False
# loss = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-5)
# num_epoch = 40
# best_acc = 0.0

# for epoch in range(num_epoch):
#     train_acc = 0.0
#     train_loss = 0.0

#     model.train()
#     for i, data in enumerate(tqdm(train_loader)):
#         optimizer.zero_grad()
#         train_pred = model(data[0].cuda())
#         batch_loss = loss(train_pred, data[1].cuda())
#         batch_loss.backward()
#         optimizer.step()

#         train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
#         train_loss += batch_loss.item()

#     print(f"Epoch {epoch + 1} | loss = {train_loss / train_set.__len__()}, acc = {train_acc / train_set.__len__()}")
  
#     model.eval()
#     with torch.no_grad():
#       val_acc = 0.0
#       for i, data in enumerate(tqdm(val_loader)):
#           val_pred = model(data[0].cuda())
#           val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
#       if val_acc > best_acc:
#         best_acc = val_acc
#         torch.save(model, 'inception_best.pt')

#       print(f"Validation | acc = {val_acc / val_set.__len__()}")



In [None]:
batch_size = 32

train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

In [None]:
num_epoch = 30
best_acc = 0.0

model = torch.load("inception_best.ckpt")
model.aux_logits = False
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-5)
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0

    model.train()
    for i, data in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()
        train_pred = model(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    print(f"Epoch {epoch + 1} | loss = {train_loss / train_set.__len__()}, acc = {train_acc / train_set.__len__()}")
  
    model.eval()
    with torch.no_grad():
      val_acc = 0.0
      for i, data in enumerate(tqdm(val_loader)):
          val_pred = model(data[0].cuda())
          val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
      if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model, 'inception_best.ckpt')

      print(f"Validation | acc = {val_acc / val_set.__len__()}")

