In [1]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm
import torchvision.transforms as transforms
import torchvision.models as models
import torch
from torch import nn
import warnings
warnings.filterwarnings("ignore")

In [2]:
class LeaveDataDataset(Dataset):
    def __init__(self,train_csv_path,mode='train',train_ratio=0.8,
                 resize_height=256,resize_width=256):
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.train_ratio = train_ratio
        self.data = pd.read_csv(train_csv_path)
        self.mode = mode
        self.data_len = len(self.data)
        self.train_len = int(self.data_len * self.train_ratio)
        if mode == 'train':
            self.image = np.asarray(self.data.iloc[:self.train_len, 0])
            self.label = np.asarray(self.data.iloc[:self.train_len, 1])
        elif mode == 'valid':
            self.image = np.asarray(self.data.iloc[self.train_len:, 0])
            self.label = np.asarray(self.data.iloc[self.train_len:, 1])
        elif mode == 'test':
            self.image =np.asarray(self.data.iloc[:, 0])
        self.len = len(self.image)
        print('Finished reading the {} set of Leaves Dataset ({} samples found)'
              .format(mode, self.len))
    def __getitem__(self, index):
        image_path = self.image[index]
        image = Image.open(image_path)
        # if image.mode != 'L':
        #     image = image.convert('L')
        if self.mode == 'train':
            transform = transforms.Compose([transforms.Resize((224, 224)),
                                            transforms.RandomHorizontalFlip(p=0.5), 
                                            transforms.RandomVerticalFlip(p=0.5),
                                            transforms.ToTensor()])
        else:
            transform = transforms.Compose([transforms.Resize((224, 224)),
                                            transforms.ToTensor()])
        img = transform(image)
        if self.mode == 'test':
            return img
        else:
            label = self.label[index]
            label_num = class_to_num[label]
            return img,label_num
    def __len__(self):
        return self.len

In [3]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        model = model
        for param in model.parameters():
            param.requires_grad = False

def res_model(num_classes, feature_extract = False, use_pretrained=True):

    model_ft = models.resnet34(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))

    return model_ft

In [4]:
def train_leaf(net, num_epochs, lr, device,best_acc):
    print(device)
    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    net = net.to(device)    
    train_dataset = LeaveDataDataset(train_path,mode='train')
    val_dataset = LeaveDataDataset(train_path,mode='valid')

    train_iter = DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True)
    val_iter = DataLoader(dataset=val_dataset,batch_size=batch_size, shuffle=True)
    
    for epoch in range(num_epochs):
        net.train()
        print('Train Epoch: {}/{}'.format(epoch, num_epochs),'train len: {}'.format(len(train_iter)))
        train_loss, train_acc, n = 0.0, 0.0, 0   
        for X, y in tqdm(train_iter):
            X = X.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            train_loss += l.item() * y.shape[0]
            train_acc += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        train_loss /= n
        train_acc /= n
        print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}")
        
        net.eval()
        print('Val Epoch: {}/{}'.format(epoch, num_epochs),'val len: {}'.format(len(val_iter)))
        val_loss, val_acc, m = 0.0, 0.0, 0
        for X, y in tqdm(val_iter):
            X = X.to(device)
            y = y.to(device)
            with torch.no_grad():
                y_hat = net(X)
            l = loss(y_hat, y)
            val_loss += l.item() * y.shape[0]
            val_acc += (y_hat.argmax(dim=1) == y).sum().item()
            m += y.shape[0]
        val_loss /= m
        val_acc /= m
        print(f"Epoch {epoch}: Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}")
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), model_path)
            print('saving model with acc {:.3f}'.format(best_acc))

In [5]:
num_epochs = 20
batch_size=16
lr=0.05
best_acc = 0.1
model_path = './working/leaf_resnet34.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_path = 'train.csv'
test_path = 'test.csv'
class_to_num = np.load('class_to_num.npy', allow_pickle=True).item()
num_to_class = np.load('num_to_class.npy', allow_pickle=True).item()
n_classes = len(class_to_num)
model = res_model(n_classes)

In [6]:
train_leaf(model, num_epochs, lr, device,best_acc)

cuda
Finished reading the train set of Leaves Dataset (14682 samples found)
Finished reading the valid set of Leaves Dataset (3671 samples found)
Train Epoch: 0/20 train len: 918


100%|██████████| 918/918 [07:00<00:00,  2.18it/s]


Epoch 0: Train Loss=2.3664, Train Acc=0.3903
Val Epoch: 0/20 val len: 230


100%|██████████| 230/230 [00:27<00:00,  8.33it/s]


Epoch 0: Val Loss=1.5379, Val Acc=0.5530
saving model with acc 0.553
Train Epoch: 1/20 train len: 918


100%|██████████| 918/918 [03:32<00:00,  4.33it/s]


Epoch 1: Train Loss=1.0713, Train Acc=0.6776
Val Epoch: 1/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.69it/s]


Epoch 1: Val Loss=1.2680, Val Acc=0.6227
saving model with acc 0.623
Train Epoch: 2/20 train len: 918


100%|██████████| 918/918 [03:32<00:00,  4.33it/s]


Epoch 2: Train Loss=0.7347, Train Acc=0.7668
Val Epoch: 2/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.83it/s]


Epoch 2: Val Loss=1.0419, Val Acc=0.6873
saving model with acc 0.687
Train Epoch: 3/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.33it/s]


Epoch 3: Train Loss=0.5287, Train Acc=0.8353
Val Epoch: 3/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 10.97it/s]


Epoch 3: Val Loss=0.9374, Val Acc=0.7284
saving model with acc 0.728
Train Epoch: 4/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.34it/s]


Epoch 4: Train Loss=0.4170, Train Acc=0.8685
Val Epoch: 4/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 10.97it/s]


Epoch 4: Val Loss=0.7145, Val Acc=0.7873
saving model with acc 0.787
Train Epoch: 5/20 train len: 918


100%|██████████| 918/918 [03:33<00:00,  4.30it/s]


Epoch 5: Train Loss=0.3388, Train Acc=0.8928
Val Epoch: 5/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.07it/s]


Epoch 5: Val Loss=0.4256, Val Acc=0.8613
saving model with acc 0.861
Train Epoch: 6/20 train len: 918


100%|██████████| 918/918 [03:41<00:00,  4.14it/s]


Epoch 6: Train Loss=0.2974, Train Acc=0.9015
Val Epoch: 6/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.46it/s]


Epoch 6: Val Loss=0.7856, Val Acc=0.7815
Train Epoch: 7/20 train len: 918


100%|██████████| 918/918 [03:36<00:00,  4.24it/s]


Epoch 7: Train Loss=0.2477, Train Acc=0.9185
Val Epoch: 7/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.92it/s]


Epoch 7: Val Loss=0.4965, Val Acc=0.8447
Train Epoch: 8/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.35it/s]


Epoch 8: Train Loss=0.2150, Train Acc=0.9285
Val Epoch: 8/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.06it/s]


Epoch 8: Val Loss=0.5972, Val Acc=0.8300
Train Epoch: 9/20 train len: 918


100%|██████████| 918/918 [03:34<00:00,  4.28it/s]


Epoch 9: Train Loss=0.1752, Train Acc=0.9416
Val Epoch: 9/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.01it/s]


Epoch 9: Val Loss=0.2748, Val Acc=0.9207
saving model with acc 0.921
Train Epoch: 10/20 train len: 918


100%|██████████| 918/918 [03:30<00:00,  4.35it/s]


Epoch 10: Train Loss=0.1700, Train Acc=0.9410
Val Epoch: 10/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.10it/s]


Epoch 10: Val Loss=0.3167, Val Acc=0.8978
Train Epoch: 11/20 train len: 918


100%|██████████| 918/918 [03:30<00:00,  4.37it/s]


Epoch 11: Train Loss=0.1573, Train Acc=0.9471
Val Epoch: 11/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.17it/s]


Epoch 11: Val Loss=0.6868, Val Acc=0.8071
Train Epoch: 12/20 train len: 918


100%|██████████| 918/918 [03:30<00:00,  4.36it/s]


Epoch 12: Train Loss=0.1298, Train Acc=0.9549
Val Epoch: 12/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.18it/s]


Epoch 12: Val Loss=0.4315, Val Acc=0.8701
Train Epoch: 13/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.33it/s]


Epoch 13: Train Loss=0.1115, Train Acc=0.9619
Val Epoch: 13/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.95it/s]


Epoch 13: Val Loss=0.2185, Val Acc=0.9316
saving model with acc 0.932
Train Epoch: 14/20 train len: 918


100%|██████████| 918/918 [03:33<00:00,  4.29it/s]


Epoch 14: Train Loss=0.0899, Train Acc=0.9689
Val Epoch: 14/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.77it/s]


Epoch 14: Val Loss=0.2060, Val Acc=0.9406
saving model with acc 0.941
Train Epoch: 15/20 train len: 918


100%|██████████| 918/918 [03:34<00:00,  4.28it/s]


Epoch 15: Train Loss=0.0843, Train Acc=0.9711
Val Epoch: 15/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.95it/s]


Epoch 15: Val Loss=0.2568, Val Acc=0.9251
Train Epoch: 16/20 train len: 918


100%|██████████| 918/918 [03:36<00:00,  4.25it/s]


Epoch 16: Train Loss=0.0782, Train Acc=0.9719
Val Epoch: 16/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.67it/s]


Epoch 16: Val Loss=0.2135, Val Acc=0.9341
Train Epoch: 17/20 train len: 918


100%|██████████| 918/918 [03:32<00:00,  4.31it/s]


Epoch 17: Train Loss=0.0673, Train Acc=0.9777
Val Epoch: 17/20 val len: 230


100%|██████████| 230/230 [00:21<00:00, 10.93it/s]


Epoch 17: Val Loss=0.1827, Val Acc=0.9442
saving model with acc 0.944
Train Epoch: 18/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.33it/s]


Epoch 18: Train Loss=0.0758, Train Acc=0.9725
Val Epoch: 18/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 10.98it/s]


Epoch 18: Val Loss=0.2173, Val Acc=0.9346
Train Epoch: 19/20 train len: 918


100%|██████████| 918/918 [03:31<00:00,  4.34it/s]


Epoch 19: Train Loss=0.0607, Train Acc=0.9779
Val Epoch: 19/20 val len: 230


100%|██████████| 230/230 [00:20<00:00, 11.04it/s]


Epoch 19: Val Loss=0.1727, Val Acc=0.9472
saving model with acc 0.947


In [8]:
saveFileName = './resnet34/submission.csv'
test_dataset = LeaveDataDataset(test_path, mode='test')
test_iter = DataLoader(dataset=test_dataset,batch_size=batch_size, shuffle=False)

model = model.to(device)
model.load_state_dict(torch.load(model_path))
model.eval()

predictions = []
for X in tqdm(test_iter):
    X = X.to(device)
    with torch.no_grad():
        logits = model(X)
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())
preds = []
for i in predictions:
    preds.append(num_to_class[i])

test_data = pd.read_csv(test_path)
test_data['label'] = pd.Series(preds)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(saveFileName, index=False)
print("Done!!!!!!!!!!!!!!!!!!!!!!!!!!!")

Finished reading the test set of Leaves Dataset (8800 samples found)


100%|██████████| 550/550 [00:49<00:00, 11.03it/s]

Done!!!!!!!!!!!!!!!!!!!!!!!!!!!



