In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install albumentations
import albumentations as albu



In [0]:
import os
import numpy as np
import pandas as pd
import random

import torch 
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt


from torch.autograd import Variable
import torchvision.models as models
from torch import optim

from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [0]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(43)

In [9]:
IMG_DIR = "/content/drive/My Drive/Age Detection/Train/"
train_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/train.csv")
test_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/test.csv")
train_csv.head(10)

Unnamed: 0,ID,Class
0,377.jpg,MIDDLE
1,17814.jpg,YOUNG
2,21283.jpg,MIDDLE
3,16496.jpg,YOUNG
4,4487.jpg,MIDDLE
5,6283.jpg,MIDDLE
6,23495.jpg,YOUNG
7,7100.jpg,YOUNG
8,6028.jpg,YOUNG
9,22617.jpg,OLD


In [0]:
labels = {
    'OLD': 0,
    'MIDDLE': 1,
    'YOUNG': 2
}

In [0]:
class AgeDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        super(AgeDataset, self).__init__()
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,0])
        label = labels[self.csv_file.iloc[index, 1]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, label     

In [0]:
from albumentations.pytorch.transforms import ToTensor
def train_transform():
    return albu.Compose([
        albu.Resize(224,224),
        albu.HorizontalFlip(p=0.6),
        albu.RGBShift(),
        albu.Rotate(60),
        albu.RandomBrightness(),
        #albu.RandomCrop(200),
        albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensor()
    ])

def valid_transform():
    return albu.Compose([
        albu.Resize(224,224),
        albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensor()
        
    ])


In [0]:
train_data, valid_data = train_test_split(train_csv, test_size = 0.33, shuffle = True)

In [0]:
train_dataset = AgeDataset(train_data, IMG_DIR, train_transform())
valid_dataset = AgeDataset(valid_data, IMG_DIR, valid_transform())

In [0]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [0]:
if device == 'cuda':
    workers = 4
else:
    workers = 0
train_loader = DataLoader(
    train_dataset,
    batch_size = 16,
    shuffle = True,
    num_workers = workers
)

valid_loader = DataLoader(
    valid_dataset, 
    batch_size = 16,
    shuffle = False,
    num_workers = workers
)

In [13]:
!pip install timm
import timm
model = timm.create_model('efficientnet_b1', pretrained = True)
model.train()
model.classifier = nn.Linear(in_features=1280, out_features=3, bias=True)

Collecting timm
[?25l  Downloading https://files.pythonhosted.org/packages/44/c7/b1ce15cfca60d10d069dafbc2138761940f73b691496fd998a680f04dad1/timm-0.1.26-py3-none-any.whl (179kB)
[K     |█▉                              | 10kB 25.9MB/s eta 0:00:01[K     |███▋                            | 20kB 1.7MB/s eta 0:00:01[K     |█████▌                          | 30kB 2.3MB/s eta 0:00:01[K     |███████▎                        | 40kB 2.6MB/s eta 0:00:01[K     |█████████▏                      | 51kB 2.0MB/s eta 0:00:01[K     |███████████                     | 61kB 2.3MB/s eta 0:00:01[K     |████████████▉                   | 71kB 2.5MB/s eta 0:00:01[K     |██████████████▋                 | 81kB 2.7MB/s eta 0:00:01[K     |████████████████▌               | 92kB 2.9MB/s eta 0:00:01[K     |██████████████████▎             | 102kB 2.8MB/s eta 0:00:01[K     |████████████████████            | 112kB 2.8MB/s eta 0:00:01[K     |██████████████████████          | 122kB 2.8MB/s eta 0:00:01

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth" to /root/.cache/torch/checkpoints/efficientnet_b1-533bc792.pth


In [0]:
model.to(device)
optimizer1 = optim.Adam(model.parameters(), lr = 0.001)
optimizer2 = optim.SGD(model.parameters(), lr = 0.0001)
criterion = nn.CrossEntropyLoss()

from torch.optim import lr_scheduler
scheduler1 = lr_scheduler.ReduceLROnPlateau(optimizer1,factor=0.33, mode="min", patience=4)
scheduler2 = lr_scheduler.ReduceLROnPlateau(optimizer2,factor=0.33, mode="min", patience=4)


In [0]:
def train(model, trainloader, validloader, optimizer, scheduler, criterion, num_epochs = 20):
    print("Training .....")
    i=0
    train_loss_history = []
    valid_loss_history = []
    best_acc = 0
    best_model = None
    for epoch in tqdm(range(num_epochs)):
        i+=1
        running_loss =0
        running_corrects =0 
        print("Epoch: ", i)
        for image, label in tqdm(trainloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            optimizer.zero_grad()
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label)) 
            loss.backward()
            optimizer.step()
            train_loss_history.append(loss)
        
        train_loss = running_loss / train_data.shape[0]
        train_acc = (running_corrects / float(train_data.shape[0]))
        print("train_loss: ", train_loss)
        print("train_accuracy: ", train_acc)
        train_loss_history.append(train_loss)
        
        running_loss =0
        running_corrects =0 
        for image,label in tqdm(validloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label))

        valid_loss = running_loss / valid_data.shape[0]
        valid_acc =(running_corrects / float(valid_data.shape[0]))
        print("validation_ loss: " , valid_loss)
        print("validation_accuracy: " , valid_acc)
        valid_loss_history.append(valid_loss)
        
        scheduler.step(valid_loss)

        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = model

    print("Best_accuracy: ", best_acc)

    print("Training Ends")

    return best_model

In [18]:
model = train(model, train_loader, valid_loader, optimizer1, scheduler1, criterion, 10)

Training .....


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch:  1


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.27380689496716754
train_accuracy:  tensor(0.8953, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.35272878875950553
validation_accuracy:  tensor(0.8659, device='cuda:0')
Epoch:  2


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.2496885127541078
train_accuracy:  tensor(0.9049, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.34382148383505395
validation_accuracy:  tensor(0.8683, device='cuda:0')
Epoch:  3


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.23663851219897883
train_accuracy:  tensor(0.9076, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.38107616513681697
validation_accuracy:  tensor(0.8610, device='cuda:0')
Epoch:  4


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.22342792766298178
train_accuracy:  tensor(0.9156, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.3457713008543947
validation_accuracy:  tensor(0.8735, device='cuda:0')
Epoch:  5


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.20973727563121664
train_accuracy:  tensor(0.9216, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.35232651415145605
validation_accuracy:  tensor(0.8753, device='cuda:0')
Epoch:  6


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.19022608131042903
train_accuracy:  tensor(0.9278, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.37747838660850064
validation_accuracy:  tensor(0.8670, device='cuda:0')
Epoch:  7


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.1895326533591333
train_accuracy:  tensor(0.9269, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.34493452948437997
validation_accuracy:  tensor(0.8819, device='cuda:0')
Epoch:  8


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.10731383680832514
train_accuracy:  tensor(0.9608, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.31562629596594355
validation_accuracy:  tensor(0.9004, device='cuda:0')
Epoch:  9


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.07675740624509617
train_accuracy:  tensor(0.9732, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.3549865431519908
validation_accuracy:  tensor(0.8971, device='cuda:0')
Epoch:  10


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.07002406361308727
train_accuracy:  tensor(0.9745, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.3577727733418393
validation_accuracy:  tensor(0.8954, device='cuda:0')

Best_accuracy:  tensor(0.9004, device='cuda:0')
Training Ends


In [0]:
torch.save(model, '/content/drive/My Drive/Age Detection/effb1_1a.pth')

In [32]:
model = train(model, train_loader, valid_loader, optimizer2, scheduler2, criterion, 5)

Training .....


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

Epoch:  1


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.045442047601894894
train_accuracy:  tensor(0.9845, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.33611012165690424
validation_accuracy:  tensor(0.9070, device='cuda:0')
Epoch:  2


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.04725652652446066
train_accuracy:  tensor(0.9840, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.3378986478814381
validation_accuracy:  tensor(0.9062, device='cuda:0')
Epoch:  3


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.04430809830006553
train_accuracy:  tensor(0.9857, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.33942495005412693
validation_accuracy:  tensor(0.9074, device='cuda:0')
Epoch:  4


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.04259369770522895
train_accuracy:  tensor(0.9850, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.33876625611223504
validation_accuracy:  tensor(0.9061, device='cuda:0')
Epoch:  5


HBox(children=(FloatProgress(value=0.0, max=834.0), HTML(value='')))


train_loss:  0.045493023733303196
train_accuracy:  tensor(0.9839, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=411.0), HTML(value='')))


validation_ loss:  0.3384783065994536
validation_accuracy:  tensor(0.9076, device='cuda:0')

Best_accuracy:  tensor(0.9076, device='cuda:0')
Training Ends


In [0]:
torch.save(model, '/content/drive/My Drive/Age Detection/effb1_1b.pth')

In [65]:
model = torch.load('/content/drive/My Drive/Age Detection/effb1_1b.pth')
model.eval()

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): Swish()
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): Swish()
        (se): SqueezeExcite(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): Swish()
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
 

In [66]:
sub = pd.read_csv(r"/content/drive/My Drive/Age Detection/sample_submission_sDO3m7O.csv")
sub.head()

Unnamed: 0,Class,ID
0,MIDDLE,25321.jpg
1,MIDDLE,989.jpg
2,MIDDLE,19277.jpg
3,MIDDLE,13093.jpg
4,MIDDLE,5367.jpg


In [67]:
output = pd.DataFrame(index=sub.index,columns = sub.keys())
output['ID'] = sub['ID']
output.shape

(6636, 2)

In [68]:
sub[sub.loc[:,'ID'] == '25321.jpg'].iloc[:,0]

0    MIDDLE
Name: Class, dtype: object

In [0]:
class AgeTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,1])
        label = labels[self.csv_file.iloc[index, 0]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, self.csv_file.iloc[index,1]       

In [0]:
TEST_DIR = "/content/drive/My Drive/Age Detection/Test/"
testdata = AgeTestDataset(sub, TEST_DIR, valid_transform())
testloader =  DataLoader(testdata, batch_size = 32)

In [0]:
key_to_label = {
    0:'OLD',
    1: 'MIDDLE',
    2:'YOUNG'
}

In [0]:
import time
def test_submission(model):
    since = time.time()
    model.eval()
    for image, label in tqdm(testloader):
        image = image.to(device)
        target = model(image)
        #print(label)
        _,pred = target.max(1)
        #print(pred)
        out = [key_to_label[f] for f in pred.cpu().numpy()]
        for i in range(len(label)):
            output.loc[output['ID'] == label[i], 'Class'] = out[i]
    time_elapsed = time.time() - since
    print('Run complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

In [73]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
model.eval()
model = model.to(device)
test_submission(model)

HBox(children=(FloatProgress(value=0.0, max=208.0), HTML(value='')))


Run complete in 0m 44s


In [74]:
print(sub)

       Class         ID
0     MIDDLE  25321.jpg
1     MIDDLE    989.jpg
2     MIDDLE  19277.jpg
3     MIDDLE  13093.jpg
4     MIDDLE   5367.jpg
...      ...        ...
6631  MIDDLE   1876.jpg
6632  MIDDLE  14940.jpg
6633  MIDDLE   3638.jpg
6634  MIDDLE    376.jpg
6635  MIDDLE   9357.jpg

[6636 rows x 2 columns]


In [0]:
output.to_csv("submission_effb1_1d.csv", index=False)