In [1]:
#Import PyTorch
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torch import optim

#Import TorchVision
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
import torchvision.models as models
from torchvision.models import resnet
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

#torchinfo for printing a model summary
from torchinfo import summary

#import other libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from PIL import Image
import seaborn as sns
import os
import random 
import shutil
from tqdm.auto import tqdm
import zipfile
import pickle
import joblib

In [2]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
# # Path to the folder containing the images
# images_path = "/kaggle/input/utkface-new/UTKFace"

# # Path to the folder where the training and testing sets will be saved
# train_path = "/kaggle/working/utk_train"
# test_path = "/kaggle/working/utk_test"

# # Load image file names into memory
# image_files = []
# for filename in os.listdir(images_path):
#     if filename.endswith(".jpg"):
#         filepath = os.path.join(images_path, filename)
#         image_files.append(filepath)

# # Split image files into training and testing sets
# train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)

# # Move training images to the train folder
# for filepath in train_files:
#     filename = os.path.basename(filepath)
#     dest = os.path.join(train_path, filename)
#     shutil.copyfile(filepath, dest)

# # Move testing images to the test folder
# for filepath in test_files:
#     filename = os.path.basename(filepath)
#     dest = os.path.join(test_path, filename)
#     shutil.copyfile(filepath, dest)

# # Print number of images in each set
# print("Number of training images:", len(train_files))
# print("Number of testing images:", len(test_files))


In [4]:
# # replace 'input_folder' with the name of the folder containing your data in '/kaggle/input/'
# input_folder = 'age-periocular/images'

# # replace 'working_folder' with the name of the folder where you want to store the copied data in '/kaggle/working/'
# working_folder = 'data'

# # copy the contents of the input folder to the working folder
# shutil.copytree(f'/kaggle/input/{input_folder}', f'/kaggle/working/{working_folder}')


In [5]:
def class_labels_reassign(age):
    if 1 <= age <= 10:
        return 0
    elif 11 <= age <= 20:
        return 1
    elif 21 <= age <= 30:
        return 2
    elif 31 <= age <= 40:
        return 3
    elif 41 <= age <= 50:
        return 4
    elif 51 <= age <= 60:
        return 5
    elif 61 <= age <= 70:
        return 6
    elif 71 <= age <= 80:
        return 7
    elif 81 <= age <= 90:
        return 8
    else:
        return 9

class CustomDataset1(Dataset):
    def __init__(self, data_dir,transform=None):
        self.data_dir = data_dir
        self.img_paths = sorted(os.listdir(data_dir))
        self.transforms = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])    
    def __len__(self):
        return len(self.img_paths)    
    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, self.img_paths[idx])
        img = Image.open(img_path).convert('RGB')
        age = int(img_path.split('/')[-1].split('A')[1][:2])
        age_label = class_labels_reassign(age) 
        img = self.transforms(img)
        return img, age_label
    
class CustomDataset2(Dataset):
    def __init__(self, data_dir,transform=None):
        self.data_dir = data_dir
        self.img_paths = sorted(os.listdir(data_dir))
        self.transforms = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])    
    def __len__(self):
        return len(self.img_paths)    
    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, self.img_paths[idx])
        img = Image.open(img_path).convert('RGB')
        age = int(img_path.split('/')[-1].split('_')[0])
        age_label = class_labels_reassign(age) 
        img = self.transforms(img)
        return img, age_label
    
train_data = CustomDataset2('/kaggle/input/utk-split/utk_train')
train_loader = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=2)

test_data = CustomDataset2('/kaggle/input/utk-split/utk_test')
test_loader = DataLoader(test_data, batch_size=64, shuffle=False, num_workers=2)



In [6]:
# img_path = '/kaggle/input/utk-split/utk_test/115_1_0_20170120134725990.jpg.chip.jpg'
# img = Image.open(img_path).convert('RGB')
# age_label = int(img_path.split('/')[-1].split('_')[0])
# age_interval = class_labels_reassign(age_label)
# print(age_interval)


In [7]:
# img_path = '/kaggle/input/fgnet/FGNET/images/001A22.JPG'
# img = Image.open(img_path).convert('RGB')
# age = int(img_path.split('/')[-1].split('A')[1][:2])
# age_interval = class_labels_reassign(age)
# print(age_interval)


In [8]:
# Define ResNet-18 model
model = resnet.resnet50()
model.load_state_dict(torch.load('/kaggle/input/resnet-50/resnet50-0676ba61.pth'))
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

In [9]:
# # Define the folder path
# folder_path = "/kaggle/working/"

# # Define the filenames to be deleted
# file_names = ["utk_train.zip","utk_test.zip"]

# # Loop through the filenames and delete them
# for file_name in file_names:
#     file_path = os.path.join(folder_path, file_name)
#     os.remove(file_path)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 10) # 10 classes in our case
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        #print(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    train_loss = running_loss / len(train_loader.dataset)
    train_acc = 100. * correct / total
    return train_loss, train_acc

def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            #print(labels)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    test_loss = running_loss / len(test_loader.dataset)
    test_acc = 100. * correct / total
    return test_loss, test_acc

n_epochs = 50
for epoch in tqdm(range(1, n_epochs + 1)):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, test_loader, criterion, device)
    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')


  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 1: Train Loss: 1.3748, Train Acc: 48.46%, Test Loss: 1.1473, Test Acc: 54.96%
Epoch 2: Train Loss: 1.0526, Train Acc: 57.57%, Test Loss: 1.0484, Test Acc: 57.78%
Epoch 3: Train Loss: 0.9252, Train Acc: 62.41%, Test Loss: 0.9810, Test Acc: 60.57%
Epoch 4: Train Loss: 0.8274, Train Acc: 66.14%, Test Loss: 0.9793, Test Acc: 59.62%
Epoch 5: Train Loss: 0.7278, Train Acc: 70.42%, Test Loss: 1.0209, Test Acc: 58.54%
Epoch 6: Train Loss: 0.6197, Train Acc: 75.36%, Test Loss: 1.0439, Test Acc: 59.30%
Epoch 7: Train Loss: 0.5228, Train Acc: 80.19%, Test Loss: 1.2205, Test Acc: 55.69%
Epoch 8: Train Loss: 0.4372, Train Acc: 83.33%, Test Loss: 1.2022, Test Acc: 58.81%
Epoch 9: Train Loss: 0.3702, Train Acc: 86.29%, Test Loss: 1.3104, Test Acc: 56.50%
Epoch 10: Train Loss: 0.3083, Train Acc: 89.01%, Test Loss: 1.3820, Test Acc: 58.33%
Epoch 11: Train Loss: 0.2634, Train Acc: 90.89%, Test Loss: 1.4485, Test Acc: 60.06%
Epoch 12: Train Loss: 0.2334, Train Acc: 92.07%, Test Loss: 1.5370, Test A

In [11]:
torch.save(model.state_dict(), '/kaggle/working/PeriAge_resnet_v3.pth')

In [12]:
# true_labels = []
# predicted_labels = []

# model.eval()
# with torch.no_grad():
#     for inputs, labels in train_loader:
#         inputs = inputs.to(device)
#         labels = labels.to(device)
#         outputs = model(inputs)
#         _, predicted = torch.max(outputs.data, 1)
#         true_labels.extend(labels.cpu().numpy())
#         predicted_labels.extend(predicted.cpu().numpy())

# cm = confusion_matrix(true_labels, predicted_labels)

# # Finally, you can print or visualize the confusion matrix
# print(cm)


In [13]:
# true_labels = []
# predicted_labels = []
# model.eval()
# with torch.no_grad():
#     for inputs, labels in test_loader:
#         inputs = inputs.to(device)
#         labels = labels.to(device)
#         outputs = model(inputs)
#         _, predicted = torch.max(outputs.data, 1)
#         true_labels.extend(labels.cpu().numpy())
#         predicted_labels.extend(predicted.cpu().numpy())

# cm = confusion_matrix(true_labels, predicted_labels)
# print(cm)

In [14]:
# #Create Confusion Matrix for testing data for Random data
# confusion_matrix = [[71, 19, 3, 5, 3, 1, 8],
#                     [16, 22, 8, 7, 1, 0, 1],
#                     [2, 3, 4, 3, 1, 0, 0],
#                     [0, 3, 0, 0, 1, 0, 0],
#                     [1, 1, 1, 1, 2, 0, 0],
#                     [0, 0, 0, 0, 0, 0, 0],
#                     [11, 1, 0, 0, 0, 0, 2]]

# # Define class labels
# class_labels = ["1-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70"]

# # Create heatmap using seaborn
# sns.set(font_scale=1.4)
# sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="g", xticklabels=class_labels, yticklabels=class_labels)

# # Set plot labels
# plt.xlabel("Predicted Labels")
# plt.ylabel("True Labels")
# plt.title("Confusion Matrix")

# # Show plot
# plt.show()

In [15]:
# confusion_matrix = [[256, 0, 0, 0, 0, 0, 0, 0],
#                    [0, 264, 0, 0, 0, 0, 0, 0],
#                    [0, 0, 130, 0, 0, 0, 0, 0],
#                    [0, 0, 0, 65, 0, 0, 0, 0],
#                    [0, 0, 0, 0, 33, 0, 0, 0],
#                    [0, 0, 0, 0, 0, 14, 0, 0],
#                    [0, 0, 0, 0, 4, 0, 2, 1],
#                    [0, 0, 0, 0, 0, 0, 0, 29]]

# # Define class labels
# class_labels = ["1-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70"]

# # Create heatmap using seaborn
# sns.set(font_scale=1.4)
# sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="g", xticklabels=class_labels, yticklabels=class_labels)

# # Set plot labels
# plt.xlabel("Predicted Labels")
# plt.ylabel("True Labels")
# plt.title("Confusion Matrix")

# # Show plot
# plt.show()

In [16]:
# train_loss = [3.6078204283347497, 1.0828300301845257, 0.7821456973369305, 0.569744382913296, 0.42909533931658816, 0.35058144995799434, 0.375598736680471, 0.35029213015849775, 0.26209860696242404, 0.24780528705853683, 0.21145989631231016, 0.1919390490421882, 0.1478984814423781, 0.11406992934644222, 0.117084243764671, 0.1305352496699645, 0.1111809926537367, 0.1508219835276787, 0.14137629304940885, 0.08402698873900451]
# test_loss = [11.021007776260376, 1.0369595140218735, 0.7897928059101105, 0.9411616325378418, 0.8159842193126678, 1.4034732282161713, 1.765362709760666, 1.1426731050014496, 1.275940015912056, 1.4342282861471176, 2.3172550797462463, 1.3849702179431915, 1.6262561529874802, 1.6498392224311829, 1.3512279391288757, 1.2401925027370453, 1.7677911818027496, 1.6769012659788132, 1.6533903181552887, 1.2347149848937988]
# epochs = range(1, len(train_loss) + 1)

# sns.set_style('whitegrid')
# fig, ax = plt.subplots(figsize=(12, 6))

# sns.lineplot(x=epochs, y=train_loss, label='Train Loss')
# sns.lineplot(x=epochs, y=test_loss, label='Test Loss')

# plt.title('Training and Test Loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.legend()

# plt.show()

# #plt.savefig('mygraph.png', dpi=300)


In [17]:
# filename = 'model_v2.sav'
# model = joblib.load(filename)

In [18]:
# model = PeriAge()
# batch_size = 64
# summary(model, input_size=(batch_size, 3, 224, 224))

In [19]:
# folder_path = "/kaggle/working/utk_test"
# os.makedirs(folder_path, exist_ok=True)


In [20]:
# # replace 'input_folder' with the name of the folder containing your data in '/kaggle/input/'
# input_folder = 'utkface-new'

# # replace 'working_folder' with the name of the folder where you want to store the copied data in '/kaggle/working/'
# working_folder = 'utk'

# # copy the contents of the input folder to the working folder
# shutil.copytree(f'/kaggle/input/{input_folder}', f'/kaggle/working/{working_folder}')


In [21]:
# # Path to the folder containing the images
# image_folder = "/kaggle/input/utkface-new/UTKFace"

# # Count the number of image files in the folder
# num_images = len([filename for filename in os.listdir(image_folder) if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp'))])

# # Print the number of image files
# print("Number of images:", num_images)


In [22]:
# shutil.rmtree('/kaggle/working/utk_train')


In [23]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# def get_age_range(age):
#     if age == 0:
#         return "Age is in range 1-10"
#     elif age == 1: 
#         return "Age is in range 11-20"
#     elif age == 2:
#         return "Age is in range 21-30"
#     elif age == 3:
#         return "Age is in range 31-40"
#     elif age == 4:
#         return "Age is in range 41-50"
#     elif age == 5:
#         return "Age is in range 51-60"
#     elif age == 6:
#         return "Age is in range 61-70"
#     elif age == 7:
#         return "Age is in range 71-80"
#     elif age == 8:
#         return "Age is in range 81-90"
#     else:
#         return "Age is above 90"
    

# def preprocess_image(image_path):
#     transform = transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ])    
#     image = Image.open(image_path).convert('RGB')
#     image = transform(image)
#     image = image.unsqueeze(0)    
#     return image

# def predict(model, image_path):
#     image = preprocess_image(image_path)   
#     image = image.to(device)   
#     with torch.no_grad():
#         output = model(image)
#         #print(output.shape)
#         #print(output)
#         max_value, max_index = torch.max(output, dim=0)
             
#     #print(probs)
#     #print(predicted.item())
#     #print(max_index)
#     return max_index.item()


# image_path = '/kaggle/input/split-data/cleaned_data/train/002A16.JPG'
# predicted_age = predict(model, image_path)
# age_range = get_age_range(predicted_age)
# print(age_range)

# #print(f"Predicted age: {predicted_age}")
