### About This Data
This dataset, also known as Animal Faces-HQ (AFHQ), consists of 16,130 high-quality images at 512×512 resolution.
There are three domains of classes, each providing about 5000 images. By having multiple (three) domains and diverse images of various breeds per each domain, AFHQ sets a challenging image-to-image translation problem. The classes are:

- Cat;
- Dog;
- Wildlife.

## Libraries import

In [None]:
import os
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image ##image reading

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from torchvision import transforms ## preprocessing images

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Suppress warnings
warnings.filterwarnings('ignore')


In [None]:
## transfer from cpu to gpu
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Data Loading
- read all dataset so we can later split it into train, val, test

In [None]:
image_path = []
labels = []
base_dir = "E:/AI 2025/Projects_Pytorch/Datasets/AnimalFaces"

for i in os.listdir(base_dir):
    # print(i)
    for label in os.listdir(f"{base_dir}/{i}"):
        # print(label)
        for image in os.listdir(f"{base_dir}/{i}/{label}"):
            # print(image)
            image_path.append(f"{base_dir}/{i}/{label}/{image}")
            labels.append(label)
df=pd.DataFrame(zip(image_path,labels), columns=["image_path","labels"])
print(df['labels'].value_counts())
df.head()

## Split data into train and test

In [None]:
train = df.sample(frac=0.7, random_state=42)
test = df.drop(train.index)
val = test.sample(frac=0.5, random_state=42)
test = test.drop(val.index)
print(f"Train: {len(train)}, Test: {len(test)}, Val: {len(val)}")
print(train.shape, test.shape, val.shape)

In [None]:
## Use label encoder to convert labels to numbers
labelEncoder = LabelEncoder()
labelEncoder.fit(df['labels'])
## make all images have same properties
transforms = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float32)
])

## Create Custom Image dataset

In [None]:
class AnimalFacesDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.labels = torch.tensor(labelEncoder.transform(df['labels'])).to(device)
    def __len__(self):
        return self.df.shape[0]
    def __getitem__(self, idx):
        image_path = self.df.iloc[idx,0]
        label = self.labels[idx]
        image=Image.open(image_path).convert("RGB")
        if self.transform:
            image=self.transform(image).to(device)
        return image, label

In [None]:
train_dataset = AnimalFacesDataset(df=train, transform=transforms)
val_dataset = AnimalFacesDataset(df=val, transform=transforms)
test_dataset = AnimalFacesDataset(df=test, transform=transforms)


In [None]:
### Visulize some images
n_rows=3
n_col=3
f,ax=plt.subplots(n_rows,n_col,figsize=(10,10))
for i in range(n_rows):
    for j in range(n_col):
        image=Image.open(df.sample(n=1)['image_path'].values[0]).convert("RGB")
        ax[i,j].imshow(image)
        ax[i,j].axis("off")
plt.show()

In [None]:
LR=0.001
BATCH_SIZE=32
EPOCHS=10

In [None]:
train_Dataloader=DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_Dataloader=DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_Dataloader=DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

### Custom Model

In [None]:
from numpy._core.fromnumeric import searchsorted


class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1=nn.Conv2d(3,32,kernel_size=3,padding=1)
        self.conv2=nn.Conv2d(32,64,kernel_size=3,padding=1)
        self.conv3=nn.Conv2d(64,128,kernel_size=3,padding=1)


        self.pooling=nn.MaxPool2d(kernel_size=2,stride=2)
        self.relu=nn.ReLU()
        self.flatten=nn.Flatten()
        self.linear=nn.Linear((128*16*16),128)
        self.output=nn.Linear(128,len(df['labels'].unique()))
    def forward(self,x):
        x=self.conv1(x)
        x=self.pooling(x)
        x=self.relu(x)

        x=self.conv2(x)
        x=self.pooling(x) 
        x=self.relu(x)

        x=self.conv3(x)
        x=self.pooling(x)
        x=self.relu(x)

        x=self.flatten(x)
        x=self.linear(x)
        x=self.output(x)
        return x

In [None]:
model=Net().to(device)


In [None]:
from torchsummary import summary
summary(model,input_size=(3,128,128))

In [None]:
criterion=nn.CrossEntropyLoss()
optimizer=Adam(model.parameters(),lr=LR)

In [None]:
total_loss_train_plot=[]
total_loss_val_plot=[]
total_acc_train_plot=[]
total_acc_val_plot=[]


for epoch in range(EPOCHS):


    total_loss_train=0
    total_acc_train=0
    total_loss_val=0
    total_acc_val=0


    for images,labels in train_Dataloader:

        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        train_loss=criterion(outputs,labels)
        total_loss_train+=train_loss.item()
        train_loss.backward()
        train_acc=(torch.argmax(outputs, dim=1)==labels).sum().item()
        total_acc_train+=train_acc
        optimizer.step()



    with torch.no_grad():
        for images,labels in val_Dataloader:
            output=model(images)
            val_loss=criterion(output,labels)
            total_loss_val+=val_loss.item()
            val_acc=(torch.argmax(output, dim=1)==labels).sum().item()
            total_acc_val+=val_acc


    total_loss_train_plot.append(round(total_loss_train/1000,4))
    total_loss_val_plot.append(round(total_loss_val/1000,4))
    total_acc_train_plot.append(round((total_acc_train/train_dataset.__len__())*100,4))
    total_acc_val_plot.append(round((total_acc_val/val_dataset.__len__())*100,4))

    print(f"Epoch {epoch+1}/{EPOCHS}, Training Loss: {total_loss_train_plot[-1]:.4f}, Training Accuracy: {total_acc_train_plot[-1]:.4f}, Validation Loss: {total_loss_val_plot[-1]:.4f}, Validation Accuracy: {total_acc_val_plot[-1]:.4f}")
plt.figure(figsize=(10,5))
plt.plot(total_loss_train_plot,label="Train Loss")
plt.plot(total_loss_val_plot,label="Validation Loss")
plt.legend()
plt.show()

In [None]:
with torch.no_grad():
    total_loss_test = 0
    total_accuracy_test = 0

    for data in test_Dataloader:
        input,label=data
        predictions = model(input).squeeze(1)
        batch_loss_test = criterion(predictions, label).item()
        total_loss_test += batch_loss_test
        total_accuracy_test += (predictions.round() == label).sum().item()
print("Accuracy of the model is: ",round(total_accuracy_test/test_dataset.__len__()*100,4))

In [None]:
fig ,axis=plt.subplots(nrows=1,ncols=2,figsize=(10,5))
axis[0].plot(total_loss_train_plot,label="Training Loss")
axis[0].plot(total_loss_val_plot,label="Validation Loss")
axis[0].set_title("Loss")
axis[0].legend()
axis[1].plot(total_acc_train_plot,label="Training Accuracy")
axis[1].plot(total_acc_val_plot,label="Validation Accuracy")
axis[1].set_title("Accuracy")
axis[1].legend()
plt.show()

axis[1].plot(total_acc_train_plot,label="Training Accuracy")
axis[1].plot(total_acc_val_plot,label="Validation Accuracy")
axis[1].set_title("Accuracy")
axis[1].legend()
axis[1].set_xlabel("Epochs")
axis[1].set_ylabel("Accuracy")
axis[1].grid(True)
axis[1].set_ylim(0,100)
axis[1].set_xlim(0,EPOCHS)
axis[1].set_xticks(np.arange(0,EPOCHS+1,1))
axis[1].set_yticks(np.arange(0,101,10))
axis[1].set_xticklabels(np.arange(0,EPOCHS+1,1))
axis[1].set_yticklabels(np.arange(0,101,10))
axis[1].set_xticklabels(np.arange(0,EPOCHS+1,1))

### Perform inference
- read image
- Transform using transform object
- predict the model
- inverse transform by label encoder


In [None]:
def predict(image_path):
    image=Image.open(image_path).convert("RGB")
    image=transforms(image).to(device)
    print(image.shape)
    output=model(image.unsqueeze(0))
    return torch.argmax(output,dim=1)


In [None]:
predict("E:/AI 2025/Projects_Pytorch/Project2/cat.jpg")