In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
from tensorflow.keras import layers
from glob import glob
import gc

In [None]:
!pip install timm

Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[?25l[K     |▊                               | 10 kB 25.2 MB/s eta 0:00:01[K     |█▌                              | 20 kB 32.7 MB/s eta 0:00:01[K     |██▎                             | 30 kB 21.7 MB/s eta 0:00:01[K     |███                             | 40 kB 9.0 MB/s eta 0:00:01[K     |███▉                            | 51 kB 8.7 MB/s eta 0:00:01[K     |████▋                           | 61 kB 10.3 MB/s eta 0:00:01[K     |█████▎                          | 71 kB 10.4 MB/s eta 0:00:01[K     |██████                          | 81 kB 10.6 MB/s eta 0:00:01[K     |██████▉                         | 92 kB 11.8 MB/s eta 0:00:01[K     |███████▋                        | 102 kB 9.7 MB/s eta 0:00:01[K     |████████▍                       | 112 kB 9.7 MB/s eta 0:00:01[K     |█████████▏                      | 122 kB 9.7 MB/s eta 0:00:01[K     |█████████▉                      | 133 kB 9.7 MB/s eta 0:00:01[K  

In [None]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time
from PIL import Image 

In [None]:
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose([
    transforms.Resize((512,512)),
    transforms.RandomPerspective(0.5, 0.2),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(size=400),
    transforms.RandomRotation(90, expand=False),
    transforms.ToTensor()
])


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
train = sorted(glob('/content/gdrive/My Drive/Colab/train/train/*.png'))
test = sorted(glob('/content/gdrive/MyDrive/Colab/test/test/*.png'))

In [None]:
train_y = pd.read_csv("/content/gdrive/My Drive/Colab/train_df.csv")

In [None]:
train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [None]:
len(train_y)

4277

In [None]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train', transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.transform = transform
        
    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx] 
        img = Image.open(img_path)

        if self.transform is not None: 
            if self.mode=='train':               
                img = transform(img)
                if img.size()[0] ==1 :
                  img = torch.stack((img,img,img)).squeeze()
               
        else:
            if self.mode=='test':
                transformm = transforms.Compose([
                    transforms.Resize((512,512)),
                    transforms.ToTensor(),
                ])
                img = transformm(img)
                if img.size()[0] ==1 :
                  img = torch.stack((img,img,img)).squeeze()

        label = self.labels[idx]
        return img, label

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
batch_size = 32
epochs = 3

# Train
train_dataset = Custom_dataset(np.array(train), np.array(train_labels), mode='train', transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test), np.array(["tmp"]*len(test)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [None]:
torch.cuda.empty_cache()
device = torch.device('cuda')
net = Network().to("cuda:0")

In [None]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 



best=0
epochs = 25
for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        gc.collect()
        torch.cuda.empty_cache()
        
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

epoch : 1/25    time : 279s/6698s
TRAIN    loss : 1.21894    f1 : 0.16224
epoch : 2/25    time : 258s/5944s
TRAIN    loss : 0.76358    f1 : 0.25753
epoch : 3/25    time : 258s/5678s
TRAIN    loss : 0.65116    f1 : 0.34094
epoch : 4/25    time : 259s/5439s
TRAIN    loss : 0.60641    f1 : 0.33786
epoch : 5/25    time : 258s/5164s
TRAIN    loss : 0.58085    f1 : 0.36299
epoch : 6/25    time : 260s/4936s
TRAIN    loss : 0.53569    f1 : 0.39167
epoch : 7/25    time : 261s/4704s
TRAIN    loss : 0.50039    f1 : 0.43289
epoch : 8/25    time : 264s/4487s
TRAIN    loss : 0.47557    f1 : 0.46627
epoch : 9/25    time : 255s/4084s
TRAIN    loss : 0.49175    f1 : 0.44593
epoch : 10/25    time : 255s/3828s
TRAIN    loss : 0.45456    f1 : 0.47877
epoch : 11/25    time : 255s/3569s
TRAIN    loss : 0.43245    f1 : 0.50414
epoch : 12/25    time : 256s/3322s
TRAIN    loss : 0.41802    f1 : 0.51655
epoch : 13/25    time : 255s/3060s
TRAIN    loss : 0.40393    f1 : 0.53200
epoch : 14/25    time : 254s/2799s

In [None]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [None]:
submission = pd.read_csv("/content/gdrive/MyDrive/anomaly/sample_submission.csv")

submission["label"] = f_result

submission

In [None]:
submission.to_csv("4th_result.csv", index = False)