In [53]:
!pip install albumentations
import albumentations as albu



In [0]:
import os
import numpy as np
import pandas as pd
import random

import torch 
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt


from torch.autograd import Variable
import torchvision.models as models
from torch import optim

from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [0]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(43)

In [56]:
IMG_DIR = "/content/drive/My Drive/Age Detection/Train/"
train_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/train.csv")
test_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/test.csv")
train_csv.head(10)

Unnamed: 0,ID,Class
0,377.jpg,MIDDLE
1,17814.jpg,YOUNG
2,21283.jpg,MIDDLE
3,16496.jpg,YOUNG
4,4487.jpg,MIDDLE
5,6283.jpg,MIDDLE
6,23495.jpg,YOUNG
7,7100.jpg,YOUNG
8,6028.jpg,YOUNG
9,22617.jpg,OLD


In [0]:
labels = {
    'OLD': 0,
    'MIDDLE': 1,
    'YOUNG': 2
}

In [0]:
class AgeDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        super(AgeDataset, self).__init__()
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,0])
        label = labels[self.csv_file.iloc[index, 1]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, label     

In [0]:
from albumentations.pytorch.transforms import ToTensor
def train_transform():
    return albu.Compose([
        albu.Resize(224,224),
        albu.VerticalFlip(p=0.4),
        albu.HorizontalFlip(p=0.6),
        albu.RGBShift(),
        albu.Rotate(60),
        ToTensor()
    ])

def valid_transform():
    return albu.Compose([
        albu.Resize(224,224),
        ToTensor()
    ])


In [0]:
train_data, valid_data = train_test_split(train_csv, test_size = 0.33, shuffle = True)

In [0]:
train_dataset = AgeDataset(train_data, IMG_DIR, train_transform())
valid_dataset = AgeDataset(valid_data, IMG_DIR, valid_transform())

In [0]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [0]:
if device == 'cuda':
    workers = 4
else:
    workers = 0
train_loader = DataLoader(
    train_dataset,
    batch_size = 32,
    shuffle = True,
    num_workers = workers
)

valid_loader = DataLoader(
    valid_dataset, 
    batch_size = 32,
    shuffle = False,
    num_workers = workers
)

In [0]:
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 3)

In [72]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [0]:
model.to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

In [0]:
def train(model, trainloader, validloader, optimizer, criterion, num_epochs = 20):
    print("Training .....")
    i=0
    train_loss_history = []
    valid_loss_history = []
    best_acc = 0
    best_model = None
    for epoch in tqdm(range(num_epochs)):
        i+=1
        running_loss =0
        running_corrects =0 
        print("Epoch: ", i)
        for image, label in tqdm(trainloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            optimizer.zero_grad()
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label)) 
            loss.backward()
            optimizer.step()
            train_loss_history.append(loss)
        
        train_loss = running_loss / train_data.shape[0]
        train_acc = (running_corrects / float(train_data.shape[0]))
        print("train_loss: ", train_loss)
        print("train_accuracy: ", train_acc)
        train_loss_history.append(train_loss)
        
        running_loss =0
        running_corrects =0 
        for image,label in tqdm(validloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label))

        valid_loss = running_loss / valid_data.shape[0]
        valid_acc =(running_corrects / float(valid_data.shape[0]))
        print("validation_ loss: " , valid_loss)
        print("validation_accuracy: " , valid_acc)
        valid_loss_history.append(valid_loss)

        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = model

    print("Best_accuracy: ", best_acc)

    print("Training Ends")

    return best_model

In [77]:
model = train(model, train_loader, valid_loader, optimizer, criterion, 20)

Training .....


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))

Epoch:  1


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7947586336316059
train_accuracy:  tensor(0.6468, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7693586200679849
validation_accuracy:  tensor(0.6634, device='cuda:0')
Epoch:  2


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7914819059910387
train_accuracy:  tensor(0.6471, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7704971792636569
validation_accuracy:  tensor(0.6610, device='cuda:0')
Epoch:  3


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7928086079748136
train_accuracy:  tensor(0.6490, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7692292337548988
validation_accuracy:  tensor(0.6625, device='cuda:0')
Epoch:  4


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7875210539533288
train_accuracy:  tensor(0.6519, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7676889572010717
validation_accuracy:  tensor(0.6672, device='cuda:0')
Epoch:  5


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7846305999493433
train_accuracy:  tensor(0.6501, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.765427756416578
validation_accuracy:  tensor(0.6648, device='cuda:0')
Epoch:  6


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7867770585384899
train_accuracy:  tensor(0.6479, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7664295531495156
validation_accuracy:  tensor(0.6578, device='cuda:0')
Epoch:  7


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7924103448887391
train_accuracy:  tensor(0.6456, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7643498863998224
validation_accuracy:  tensor(0.6646, device='cuda:0')
Epoch:  8


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7891399751737789
train_accuracy:  tensor(0.6477, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7643008451789168
validation_accuracy:  tensor(0.6649, device='cuda:0')
Epoch:  9


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7894017728424201
train_accuracy:  tensor(0.6504, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7668412925083173
validation_accuracy:  tensor(0.6640, device='cuda:0')
Epoch:  10


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7849009890440853
train_accuracy:  tensor(0.6510, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7619942232868882
validation_accuracy:  tensor(0.6677, device='cuda:0')
Epoch:  11


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7849551458055579
train_accuracy:  tensor(0.6517, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7622403427425413
validation_accuracy:  tensor(0.6687, device='cuda:0')
Epoch:  12


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7896453535046419
train_accuracy:  tensor(0.6464, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.76207903012563
validation_accuracy:  tensor(0.6654, device='cuda:0')
Epoch:  13


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.785141292215064
train_accuracy:  tensor(0.6473, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7608120045735967
validation_accuracy:  tensor(0.6640, device='cuda:0')
Epoch:  14


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.783780603110455
train_accuracy:  tensor(0.6543, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7639899296309781
validation_accuracy:  tensor(0.6616, device='cuda:0')
Epoch:  15


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7827310279864211
train_accuracy:  tensor(0.6495, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7665541100164476
validation_accuracy:  tensor(0.6564, device='cuda:0')
Epoch:  16


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7829897724242281
train_accuracy:  tensor(0.6478, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.763198692571048
validation_accuracy:  tensor(0.6639, device='cuda:0')
Epoch:  17


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7810229674677732
train_accuracy:  tensor(0.6500, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7614944621314921
validation_accuracy:  tensor(0.6630, device='cuda:0')
Epoch:  18


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7816898789489485
train_accuracy:  tensor(0.6499, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7603748192306631
validation_accuracy:  tensor(0.6631, device='cuda:0')
Epoch:  19


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7803402430893036
train_accuracy:  tensor(0.6531, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.7599608064931828
validation_accuracy:  tensor(0.6657, device='cuda:0')
Epoch:  20


HBox(children=(FloatProgress(value=0.0, max=417.0), HTML(value='')))


train_loss:  0.7835466385718951
train_accuracy:  tensor(0.6471, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))


validation_ loss:  0.758116593887534
validation_accuracy:  tensor(0.6694, device='cuda:0')

Best_accuracy:  tensor(0.6694, device='cuda:0')
Training Ends


In [0]:
torch.save(model, "/content/drive/My Drive/Age Detection/res50_2.pth")

In [79]:
sub = pd.read_csv(r"/content/drive/My Drive/Age Detection/sample_submission_sDO3m7O.csv")
sub.head()

Unnamed: 0,Class,ID
0,MIDDLE,25321.jpg
1,MIDDLE,989.jpg
2,MIDDLE,19277.jpg
3,MIDDLE,13093.jpg
4,MIDDLE,5367.jpg


In [80]:
output = pd.DataFrame(index=sub.index,columns = sub.keys())
output['ID'] = sub['ID']
output.shape

(6636, 2)

In [81]:
sub[sub.loc[:,'ID'] == '25321.jpg'].iloc[:,0]

0    MIDDLE
Name: Class, dtype: object

In [0]:
class AgeTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,1])
        label = labels[self.csv_file.iloc[index, 0]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, self.csv_file.iloc[index,1]       

In [0]:
TEST_DIR = "/content/drive/My Drive/Age Detection/Test/"
testdata = AgeTestDataset(sub, TEST_DIR, valid_transform())
testloader =  DataLoader(testdata, batch_size = 32)

In [0]:
key_to_label = {
    0:'OLD',
    1: 'MIDDLE',
    2:'YOUNG'
}

In [0]:
import time
def test_submission(model):
    since = time.time()
    model.eval()
    for image, label in tqdm(testloader):
        image = image.to(device)
        target = model(image)
        #print(label)
        _,pred = target.max(1)
        #print(pred)
        out = [key_to_label[f] for f in pred.cpu().numpy()]
        for i in range(len(label)):
            output.loc[output['ID'] == label[i], 'Class'] = out[i]
    time_elapsed = time.time() - since
    print('Run complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

In [115]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
model = model.to(device)
test_submission(model)

HBox(children=(FloatProgress(value=0.0, max=208.0), HTML(value='')))


Run complete in 1m 25s


In [116]:
print(sub)

       Class         ID
0     MIDDLE  25321.jpg
1     MIDDLE    989.jpg
2     MIDDLE  19277.jpg
3     MIDDLE  13093.jpg
4     MIDDLE   5367.jpg
...      ...        ...
6631  MIDDLE   1876.jpg
6632  MIDDLE  14940.jpg
6633  MIDDLE   3638.jpg
6634  MIDDLE    376.jpg
6635  MIDDLE   9357.jpg

[6636 rows x 2 columns]


In [0]:
output.to_csv("submission_res50_2.csv", index=False)