In [97]:
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import shutil
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import torch as t
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.transforms import v2
import torchvision as tv
from torchvision.utils import save_image


In [98]:
from pprint import pprint as pp

In [None]:
##  Download Data
# https://www.kaggle.com/datasets/quadeer15sh/amur-tiger-reidentification
# !kaggle datasets download -d quadeer15sh/amur-tiger-reidentification

In [99]:
## Device Selection
device=t.device('cuda' if t.cuda.is_available()  else  'cpu')
device

device(type='cuda')

### Data Loading

In [126]:
## Training Dataset
train_dir=os.path.abspath(r'./Amur Tigers/train')
train_csvpath=os.path.abspath(r'./Amur Tigers/reid_list_train.csv')

In [141]:
## Original Labelled Data
odf=pd.read_csv(train_csvpath,header=None,names=['labels','filepaths'])

### Splitting Data to Train, Val and Test

In [142]:
## Dataset Split into Train and Test 
X=odf['filepaths']
y=odf['labels']
n_x_train, x_test, n_y_train, y_test = train_test_split(X,y,train_size=0.8,stratify=y)

In [143]:
## Training Dataset Split into Train and Validation Dataset 
x_train, x_val, y_train, y_val=train_test_split(n_x_train,n_y_train,train_size=0.8,stratify=n_y_train)

### Getting Normalization Mean and Standard Deviation from Training Dataset

In [145]:
r_mean,r_var=[],[]
g_mean,g_var=[],[]
b_mean,b_var=[],[]

for img_file in x_train:
    img_file=os.path.join(train_dir,img_file)
    if not img_file.endswith('.jpg'):
        continue
    img=plt.imread(img_file)
    rgb=img.reshape(img.shape[0]*img.shape[1],img.shape[2])
    r_mean.append(rgb[:,0].mean())
    r_var.append(rgb[:,0].std())
    g_mean.append(rgb[:,1].mean())
    g_var.append(rgb[:,1].std())
    b_mean.append(rgb[:,2].mean())
    b_var.append(rgb[:,2].std())

In [149]:
r_mean_val=np.mean(r_mean)
g_mean_val=np.mean(g_mean)
b_mean_val=np.mean(b_mean)
r_mean_val,g_mean_val,b_mean_val

(106.47606392124953, 98.50176745101983, 92.41432659748523)

In [150]:
r_var_val=np.mean(r_var)
g_var_val=np.mean(g_var)
b_var_val=np.mean(b_var)
r_var_val,g_var_val,b_var_val

(47.97671295859219, 46.67652436681641, 45.35840370185046)

### Data Transformation Configuration

In [151]:
normalize=v2.Normalize(mean=[r_mean_val,g_mean_val,b_mean_val],
                         std=[r_var_val,g_var_val,b_var_val])
transform=v2.Compose([
    v2.ToPILImage(),
    v2.Resize((500,500)),
    v2.RandomCrop((300,300)),
    # v2.ColorJitter(brightness=0.5,contrast=0.5,hue=0.5,saturation=0.5),
    # v2.RandomGrayscale(p=0.2),
    v2.RandomPerspective(distortion_scale=0.6, p=0.08),
    v2.RandomRotation(degrees=(0,180)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomVerticalFlip(p=0.02),
    v2.ToTensor(),
    normalize,
])

base_transform=v2.Compose([
    v2.ToPILImage(),
    v2.Resize((300,300)),
    v2.ToTensor(),
    normalize,
])



### Transforming and Storing Data

In [163]:
datasets={'Train':pd.DataFrame(zip(x_train,y_train),columns=['filepaths','labels']),
         'Val':pd.DataFrame(zip(x_val,y_val),columns=['filepaths','labels']),
         'Test':pd.DataFrame(zip(x_test,y_test),columns=['filepaths','labels'])}

In [167]:
aug_folder='./augs'
if not os.path.exists(aug_folder):
    os.makedirs(aug_folder)

In [168]:
TOTAL_IMG=150 ## Creating Total 150 image for each class with augmentation
for dataset in  datasets.keys():
    sets_folder=os.path.join(aug_folder,dataset)
    set_df=datasets[dataset]
    print(f'Dataset: {str(dataset)}:')
    for label in sorted(set_df['labels'].unique()):
        label_folder=os.path.join(sets_folder,str(label))
        img_files=[os.path.join(train_dir,filepath) for filepath in set_df[set_df['labels']==label]['filepaths'].values]
        img_count=len(img_files)
        print(f'\tLabel: {str(label)}:')
        print('\t\t',end='')
        if not os.path.exists(label_folder):
            os.makedirs(label_folder)
        
        for i in range(TOTAL_IMG):
            print(f'{i}',end='|')
            idx=i
            if i >= img_count and dataset=='Train':
                idx=np.random.randint(0,img_count)
            elif i >= img_count:
                break
            new_img_files=os.path.join(label_folder,f'{label}_{str(i).zfill(3)}.pt')
            img=plt.imread(img_files[idx]).astype(np.float32)
            if i >= img_count:
                t_img=transform(img)       
            else:
                t_img=base_transform(img)
            t.save(obj=t_img,f=new_img_files)
        print()

Dataset: Train:
	Label: 0:
		0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126|127|128|129|130|131|132|133|134|135|136|137|138|139|140|141|142|143|144|145|146|147|148|149|
	Label: 1:
		0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126|127|128|129|130|131|132|133|134|135|136|137|138|139|140|141|142|143|1