In [30]:
train_dir = '../input/blg453e-competition-2-2021/image-classification/imagenet_50/train'
test_dir = '../input/blg453e-competition-2-2021/image-classification/imagenet_50/test/imgs'

model1 = torch.load('../input/blg453e-competition-2-2021/image-classification/model_1.pth').cuda()
model2 = torch.load('../input/blg453e-competition-2-2021/image-classification/model_2.pth').cuda()
model3 = torch.load('../input/blg453e-competition-2-2021/image-classification/model_3.pth').cuda()
model4 = torch.load('../input/blg453e-competition-2-2021/image-classification/model_4.pth').cuda()

model1.eval()
model2.eval()
model3.eval()
model4.eval()

In [11]:
from PIL import Image
import PIL
from torchvision import transforms
import torch
import torchvision.models as models
import os, glob
import numpy as np
from imgaug import augmenters as iaa
from imgaug import parameters as iap
import imgaug as ia

In [33]:

seq_complex = iaa.Sequential(
    [
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.Sometimes(0.5, iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            rotate=(-45, 45),
            order=[0, 1],
            cval=(0, 255),
            mode=ia.ALL
        )),
        iaa.SomeOf((0, 3),
            [
                iaa.OneOf([
                    iaa.GaussianBlur((0, 3.0)),
                    iaa.AverageBlur(k=(2, 7)),
                    iaa.MedianBlur(k=(3, 11)),
                ]),
                iaa.AdditiveGaussianNoise(
                    loc=0, scale=(0.0, 0.05*255), per_channel=0.5
                ),
                iaa.LinearContrast((0.5, 2.0), per_channel=0.5),
            ],
            random_order=True
        )
    ],
    random_order=True
)


def obtain_features(model, img):
	preprocess = transforms.Compose([
		transforms.Resize(256),
		transforms.CenterCrop(224),
		transforms.ToTensor(),
		transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	])
	input_tensor = preprocess(img).cuda()
	input_batch = input_tensor.unsqueeze(0)

	with torch.no_grad():
		output = model(input_batch)

	return output


def prepare_train_from_folder(dir, model1, model2, model3, model4):
	img_count = len(glob.glob(dir +'/*/*.JPEG'))*2
	train_folders = sorted(os.listdir(dir))
	train = np.zeros((img_count, 7936))
	train_labels = np.zeros((img_count,))
	counter = 0
	for i in range(len(train_folders)):
		all_imgs = os.listdir(dir + '/' + train_folders[i])
		for j in range(len(all_imgs)):
			img = Image.open(dir + '/' + train_folders[i] + '/' + all_imgs[j])
			img = img.convert('RGB')
            
			features = obtain_features(model1, img).cpu().numpy()
			features2 = obtain_features(model2, img).cpu().numpy()
			features3 = obtain_features(model3, img).cpu().numpy()
			features4 = obtain_features(model4, img).cpu().numpy()
			features = np.concatenate((features, features2), axis=None)
			features = np.concatenate((features, features3), axis=None)
			features = np.concatenate((features, features4), axis=None)
            
			train[counter, :] = features
			train_labels[counter] = i
			counter += 1
            
			aug = seq_complex(images=[np.asarray(img)])
			aug_img = PIL.Image.fromarray(np.uint8(aug[0]))
            
			features = obtain_features(model1, aug_img).cpu().numpy()
			features2 = obtain_features(model2, aug_img).cpu().numpy()
			features3 = obtain_features(model3, aug_img).cpu().numpy()
			features4 = obtain_features(model4, aug_img).cpu().numpy()
			features = np.concatenate((features, features2), axis=None)
			features = np.concatenate((features, features3), axis=None)
			features = np.concatenate((features, features4), axis=None)
            
			train[counter, :] = features
			train_labels[counter] = i            
			counter += 1
			print(train.shape)
            
	return train, train_labels

In [16]:
train1, train_labels1 = prepare_train_from_folder(train_dir, model1, model2, model3, model4)

In [28]:
with open('train.npy', 'wb') as f:
    np.save(f, train)
    np.save(f, train_labels)

In [31]:
def prepare_test_from_folder(dir, model1, model2, model3, model4):
	img_count = len(glob.glob(dir + '/*.JPEG'))

	test_files = sorted(os.listdir(dir))

	test = np.zeros((img_count, 7936))
	test_names = []

	counter = 0

	for i in range(len(test_files)):
		img = Image.open(dir +  '/' + test_files[i])
		img = img.convert('RGB')

		features = obtain_features(model1, img).cpu().numpy()
		features2 = obtain_features(model2, img).cpu().numpy()
		features3 = obtain_features(model3, img).cpu().numpy()
		features4 = obtain_features(model4, img).cpu().numpy()
		features = np.concatenate((features, features2), axis=None)
		features = np.concatenate((features, features3), axis=None)
		features = np.concatenate((features, features4), axis=None)

		test[counter, :] = features

		test_names.append(test_files[i])
		counter+=1    

	return test, test_names

In [34]:
test, test_names = prepare_test_from_folder(test_dir, model1, model2, model3, model4)

In [35]:
with open('test.npy', 'wb') as f:
    np.save(f, test)
    np.save(f, test_names)

In [36]:
print(test)

In [2]:
from PIL import Image
import PIL
from torchvision import transforms
import torch
import torchvision.models as models
import os, glob
import numpy as np
from imgaug import augmenters as iaa
from imgaug import parameters as iap
import imgaug as ia

In [4]:
with open('train.npy', 'rb') as f:
    train = np.load(f)
    train_labels = np.load(f)

In [6]:
from sklearn.feature_selection import VarianceThreshold

print(train.shape)
selector = VarianceThreshold(0.3)
train = selector.fit_transform(train)

print(train.shape)

In [43]:
train.shape

In [7]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2000, copy=False)
train = pca.fit_transform(train)
print(train.shape)

In [None]:
with open('test.npy', 'rb') as f:
    test = np.load(f)
    test_labels = np.load(f)

In [37]:
test = selector.transform(test)
test = pca.transform(test)
print(test.shape)

In [10]:
import cudf, cuml
from cuml.neighbors import KNeighborsClassifier as cuKNeighbors

knn = cuKNeighbors(n_neighbors=120)
knn.fit(train,np.asarray(train_labels))

In [20]:
print(train)

In [21]:
print(test)

In [39]:
y_hat = knn.predict(test)

In [40]:
print(y_hat)

In [41]:
train_dir = '../input/blg453e-competition-2-2021/image-classification/imagenet_50/train'

train_folders = sorted(os.listdir(train_dir))
score_of_img = [train_folders[int(y_hat_i)] for y_hat_i in y_hat]

import numpy as np
np.savetxt('output1.csv', [p for p in zip(test_labels, score_of_img)], delimiter=',', fmt='%s')