In [0]:
!pip install albumentations
import albumentations as albu



In [0]:
import os
import numpy as np
import pandas as pd
import random

import torch 
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt


from torch.autograd import Variable
import torchvision.models as models
from torch import optim

from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [0]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(43)

In [0]:
IMG_DIR = "/content/drive/My Drive/Age Detection/Train/"
train_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/train.csv")
test_csv = pd.read_csv(r"/content/drive/My Drive/Age Detection/test.csv")
train_csv.head(10)

Unnamed: 0,ID,Class
0,377.jpg,MIDDLE
1,17814.jpg,YOUNG
2,21283.jpg,MIDDLE
3,16496.jpg,YOUNG
4,4487.jpg,MIDDLE
5,6283.jpg,MIDDLE
6,23495.jpg,YOUNG
7,7100.jpg,YOUNG
8,6028.jpg,YOUNG
9,22617.jpg,OLD


In [0]:
labels = {
    'OLD': 0,
    'MIDDLE': 1,
    'YOUNG': 2
}

In [0]:
class AgeDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        super(AgeDataset, self).__init__()
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,0])
        label = labels[self.csv_file.iloc[index, 1]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, label     

In [0]:
from albumentations.pytorch.transforms import ToTensor
def train_transform():
    return albu.Compose([
        albu.Resize(224,224),
        albu.VerticalFlip(p=0.4),
        albu.HorizontalFlip(p=0.6),
        albu.RGBShift(),
        albu.Rotate(60),
        ToTensor()
    ])

def valid_transform():
    return albu.Compose([
        albu.Resize(224,224),
        ToTensor()
    ])


In [0]:
train_data, valid_data = train_test_split(train_csv, test_size = 0.33, shuffle = True)

In [0]:
train_dataset = AgeDataset(train_data, IMG_DIR, train_transform())
valid_dataset = AgeDataset(valid_data, IMG_DIR, valid_transform())

In [0]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [0]:
if device == 'cuda':
    workers = 4
else:
    workers = 0
train_loader = DataLoader(
    train_dataset,
    batch_size = 32,
    shuffle = True,
    num_workers = workers
)

valid_loader = DataLoader(
    valid_dataset, 
    batch_size = 32,
    shuffle = False,
    num_workers = workers
)

In [0]:
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 3)

In [0]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [0]:
model.to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

In [0]:
def train(model, trainloader, validloader, optimizer, criterion, num_epochs = 20):
    print("Training .....")
    i=0
    train_loss_history = []
    valid_loss_history = []
    best_acc = 0
    best_model = None
    for epoch in tqdm(range(num_epochs)):
        i+=1
        running_loss =0
        running_corrects =0 
        print("Epoch: ", i)
        for image, label in tqdm(trainloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            optimizer.zero_grad()
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label)) 
            loss.backward()
            optimizer.step()
            train_loss_history.append(loss)
        
        train_loss = running_loss / train_data.shape[0]
        train_acc = (running_corrects / float(train_data.shape[0]))
        print("train_loss: ", train_loss)
        print("train_accuracy: ", train_acc)
        train_loss_history.append(train_loss)
        
        running_loss =0
        running_corrects =0 
        for image,label in tqdm(validloader):
            image = image.to(device)
            label = label.to(device)
            target = model(image)
            _,pred = target.max(1)
            loss = criterion(target, label)    
            running_loss += loss.item()*image.size(0)
            running_corrects += (torch.sum(pred == label))

        valid_loss = running_loss / valid_data.shape[0]
        valid_acc =(running_corrects / float(valid_data.shape[0]))
        print("validation_ loss: " , valid_loss)
        print("validation_accuracy: " , valid_acc)
        valid_loss_history.append(valid_loss)

        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = model

    print("Best_accuracy: ", best_acc)

    print("Training Ends")

    return best_model

In [0]:
# For Resnet50:
model = torch.load(r"/content/drive/My Drive/Age Detection/res50_2")
model.eval()

In [0]:
#for Efficient_net:
!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet
model = torch.load(r"/content/drive/My Drive/Age Detection/efficient_2.pth")
model.eval()

In [0]:
#for Densenet121:
model = torch.load(r"/content/drive/My Drive/Age Detection/dense121_3")
model.eval()

In [0]:
sub = pd.read_csv(r"/content/drive/My Drive/Age Detection/sample_submission_sDO3m7O.csv")
sub.head()

Unnamed: 0,Class,ID
0,MIDDLE,25321.jpg
1,MIDDLE,989.jpg
2,MIDDLE,19277.jpg
3,MIDDLE,13093.jpg
4,MIDDLE,5367.jpg


In [0]:
output = pd.DataFrame(index=sub.index,columns = sub.keys())
output['ID'] = sub['ID']
output.shape

(6636, 2)

In [0]:
sub[sub.loc[:,'ID'] == '25321.jpg'].iloc[:,0]

0    MIDDLE
Name: Class, dtype: object

In [0]:
class AgeTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transforms = None):
        self.csv_file = csv_file
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self):
        return self.csv_file.shape[0]

    def __getitem__(self, index):
        image = plt.imread(self.image_dir + self.csv_file.iloc[index,1])
        label = labels[self.csv_file.iloc[index, 0]]
        if self.transforms:
            image = self.transforms(image = image)
        img = image['image']
        
        return img, self.csv_file.iloc[index,1]       

In [0]:
TEST_DIR = "/content/drive/My Drive/Age Detection/Test/"
testdata = AgeTestDataset(sub, TEST_DIR, valid_transform())
testloader =  DataLoader(testdata, batch_size = 32)

In [0]:
key_to_label = {
    0:'OLD',
    1: 'MIDDLE',
    2:'YOUNG'
}

In [0]:
import time
def test_submission(model):
    since = time.time()
    model.eval()
    for image, label in tqdm(testloader):
        image = image.to(device)
        target = model(image)
        #print(label)
        _,pred = target.max(1)
        #print(pred)
        out = [key_to_label[f] for f in pred.cpu().numpy()]
        for i in range(len(label)):
            output.loc[output['ID'] == label[i], 'Class'] = out[i]
    time_elapsed = time.time() - since
    print('Run complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

In [0]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
model = model.to(device)
test_submission(model)

HBox(children=(FloatProgress(value=0.0, max=208.0), HTML(value='')))


Run complete in 1m 25s


In [0]:
print(sub)

       Class         ID
0     MIDDLE  25321.jpg
1     MIDDLE    989.jpg
2     MIDDLE  19277.jpg
3     MIDDLE  13093.jpg
4     MIDDLE   5367.jpg
...      ...        ...
6631  MIDDLE   1876.jpg
6632  MIDDLE  14940.jpg
6633  MIDDLE   3638.jpg
6634  MIDDLE    376.jpg
6635  MIDDLE   9357.jpg

[6636 rows x 2 columns]


In [0]:
#output.to_csv("submission_res50_2.csv", index=False)