In [None]:
!pip install pretrainedmodels
!pip install imutils
!pip install opencv-python

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
import joblib
import cv2
import os
import time
import random
import pretrainedmodels
import numpy as np

from imutils import paths
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Load torch...!!!
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Load torchvision ...!!!
from torchvision import transforms

'''SEED Everything'''
def seed_everything(SEED=42):
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True # keep True if all the input have same size.
SEED=42
seed_everything(SEED=SEED)
'''SEED Everything'''

'SEED Everything'

In [30]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU
epochs = 5 # Number of epochs
BS = 16 # Batch size

image_paths = list(paths.list_images('./101_ObjectCategories'))


data = []
labels = []
unique_labels = {}
# omit_labels = ['BACKGROUND_Google', 'brontosaurus','ferry','nautilus','trilobite','buddha','flamingo','octopus','umbrella','butterfly','flamingo_head','okapi','watch','camera','garfield','pagoda','water_lilly','cannon','gerenuk','panda','wheelchair','car_side','gramophone','pigeon','wild_cat','ceiling_fan','grand_piano','pizza','windsor_chair','cellphone','hawksbill','platypus','wrench','chair','headphone','pyramid','yin_yang','chandelier','hedgehog','revolver','cougar_body','helicopter','rhino','binocular','emu','menorah','strawberry','bonsai','euphonium','metronome','sunflower','brain','ewer','minaret','tick']
for img_path in tqdm(image_paths):
    label = img_path.split(os.path.sep)[-2]
#     if label in omit_labels:
#         continue
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(img, (64, 64), interpolation = cv2.INTER_AREA)
    

    
    if label in labels and (unique_labels[label] < 30):
        unique_labels[label] += 1
        data.append(resized)
    elif label not in unique_labels:
        unique_labels[label] = 1
        data.append(resized)
    else:
        continue
        
    labels.append(label)

labels_to_int_dict = {}
labels_to_int = []
next_int = 1

for word in labels:
    if word not in labels_to_int_dict:
        labels_to_int_dict[word] = next_int
        next_int += 1
    labels_to_int.append(labels_to_int_dict[word])

data = np.array(data) / 255.0
labels_to_int = np.array(labels_to_int)
print(data.shape)
print(labels_to_int.shape)

100%|██████████████████████████████████████| 9144/9144 [00:22<00:00, 414.20it/s]


(3060, 64, 64)
(3060,)


In [32]:
lb = LabelEncoder()
labels_to_int = lb.fit_transform(labels_to_int)
print(f"Total Number of Classes: {len(lb.classes_)}")


Total Number of Classes: 102
[0.67058824 0.6745098  0.68235294 0.68235294 0.68235294 0.68627451
 0.69019608 0.69019608 0.68627451 0.68235294 0.69019608 0.69803922
 0.70196078 0.69803922 0.69803922 0.70588235 0.70588235 0.69803922
 0.70196078 0.70980392 0.70588235 0.70588235 0.71372549 0.72156863
 0.72156863 0.71372549 0.73333333 0.76862745 0.79607843 0.81176471
 0.75686275 0.26666667 0.24705882 0.50196078 0.55294118 0.2745098
 0.09019608 0.08235294 0.61176471 0.73333333 0.71372549 0.71764706
 0.71764706 0.71372549 0.71764706 0.71372549 0.70588235 0.70980392
 0.70980392 0.70196078 0.69803922 0.69411765 0.69411765 0.69411765
 0.69411765 0.69019608 0.69411765 0.69411765 0.69411765 0.68235294
 0.67843137 0.67843137 0.67843137 0.6745098 ]


In [None]:
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])   

In [33]:
# divide the data into train, validation, and test set
(X, x_val , Y, y_val) = train_test_split(data, labels_to_int, test_size=0.3,  stratify=labels_to_int,random_state=42)

(x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.5, random_state=42)
print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}")
print(f"y_train examples: {y_train.shape}\ny_test examples: {y_test.shape}\ny_val examples: {y_val.shape}")

x_train examples: (1071, 64, 64)
x_test examples: (1071, 64, 64)
x_val examples: (918, 64, 64)
y_train examples: (1071,)
y_test examples: (1071,)
y_val examples: (918,)


In [35]:
x_train = x_train.reshape(x_train.shape[0], -1)
x_val = x_val.reshape(x_val.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}")

x_train examples: (1071, 4096)
x_test examples: (1071, 4096)
x_val examples: (918, 4096)


In [36]:
from sklearn.decomposition import PCA

data = data.reshape(data.shape[0], -1)
print(data.shape)

pca = PCA()
pca.fit(x_train)

# The number of dimensions the training image features should be reduced to, which perserves 95 % of the variances
variance_ratio = np.cumsum(pca.explained_variance_ratio_)
num_dimensions = np.argmax(variance_ratio >= 0.95) + 1

print("Number of dimensions after reduction:", num_dimensions)

pca1 = PCA(n_components=num_dimensions)
pca1.fit(x_train)
reduced_data_train = pca1.transform(x_train)
print(reduced_data_train.shape)

reduced_data_test = pca1.transform(x_test)
print(reduced_data_test.shape)

reduced_data_val = pca1.transform(x_val)
print(reduced_data_val.shape)



(3060, 4096)
Number of dimensions after reduction: 237
(1071, 237)
(1071, 237)
(918, 237)


In [37]:
x_train = np.column_stack((reduced_data_train, y_train))

x_val = np.column_stack((reduced_data_val, y_val))

x_test = np.column_stack((reduced_data_test, y_test))

print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}")

x_train examples: (1071, 238)
x_test examples: (1071, 238)
x_val examples: (918, 238)


In [39]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier()
model.fit(x_train,y_train)
labels_predict = model.predict(x_test)
print(accuracy_score(y_test, labels_predict))


0.24556489262371614


In [9]:
from scipy import io

matdata = {
    'labelset': x_train,
    'unlabelset': x_val,
    'testset': x_test
}
io.savemat('./caltech_101.mat', matdata)