In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision import transforms
from PIL import Image
from sklearn.neighbors import NearestNeighbors
import joblib

!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.0.tar.gz (20 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-py3-none-any.whl size=16035 sha256=7ffc52ee7de297c8a9ec6baa5ab2ec180fa0e3e6791afdd2371ad7d6b6294ba5
  Stored in directory: /root/.cache/pip/wheels/b7/cc/0d/41d384b0071c6f46e542aded5f8571700ace4f1eb3f1591c29
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.0


In [2]:
from pathlib import Path

In [3]:
LIB_DIR=Path('/kaggle/input/digix-gallery/gallery/')
MODEL_WEIGHTS= Path('/kaggle/input/digix-ai-1st-attempt/eff_net_w_2.pt')
ball_tree_dump_file = 'library_ball_tree.sav'
lib_files_dump_file = 'library_files_list.sav'
RESCALE_SIZE=224

In [4]:
eval_on_gpu = torch.cuda.is_available()

if eval_on_gpu:
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')

In [5]:
lib_files = sorted(list(LIB_DIR.rglob('*.jpg')))

In [6]:
from torch.utils.data import Dataset, DataLoader

In [7]:
class GoodsDataset(Dataset):
    def __init__(self, files):
        super().__init__()
        # список файлов для загрузки
        self.files = sorted(files)

        self.len_ = len(self.files)
            
    def __len__(self):
        return self.len_
      
    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image
    
    def __getitem__(self, index):
        # для преобразования изображений в тензоры PyTorch и нормализации входа
        data_transforms = transforms.Compose([
            transforms.Resize(RESCALE_SIZE),
            transforms.CenterCrop(RESCALE_SIZE),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        x = self.load_sample(self.files[index])
        if (len(x.mode) < 2):
            x = transforms.Grayscale(3)(x)
            
        transforms.functional.adjust_saturation(img=x,saturation_factor=1.25)
        transforms.functional.adjust_gamma(img=x, gamma=0.25)
        
        x = data_transforms(x)
        return x

In [8]:
lib_dataset = GoodsDataset(lib_files)

In [9]:
import torch.nn as nn

In [10]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_name('efficientnet-b1')

additional_ftrs = 4096
n_classes = 3094
#Изменяем выходные слои модели
num_ftrs_resnext = model._fc.in_features
new_fc_seq = nn.Sequential(
    nn.Linear(num_ftrs_resnext,additional_ftrs),
    nn.LeakyReLU(),
    nn.Linear(additional_ftrs, n_classes)
)
model._fc = new_fc_seq

model.load_state_dict(torch.load(MODEL_WEIGHTS))

<All keys matched successfully>

In [11]:
data_loader = DataLoader(lib_dataset, batch_size=64, shuffle=False)
outputs = 0
prepare_model(model)
model.to(DEVICE)
model.eval()
with torch.no_grad():
    for inputs in data_loader:
        inputs = inputs.to(DEVICE)
        outputs = model(inputs).cpu()
        print(type(outputs))
        break

NameError: name 'prepare_model' is not defined

In [12]:
np.array(outputs).shape

()

In [13]:
def library_processing(model, dataset, batch_size):
    feature_vector = np.empty((0, 4096), float)
    
    model.eval()
    
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    with torch.no_grad():
        for inputs in data_loader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs).cpu()
            feature_vector = np.append(feature_vector, np.array(outputs), axis=0)
            
    return feature_vector
            

In [14]:
def prepare_model(model):
    fc_without_last_layer = list(model._fc.children())[:-2]
    model._fc = torch.nn.Sequential(*fc_without_last_layer)

In [15]:
prepare_model(model)

model.to(DEVICE)
lib_feature_vector = library_processing(model, lib_dataset, batch_size=64)

In [16]:
lib_tree = NearestNeighbors(n_neighbors=10, metric="cosine")
lib_tree.fit(lib_feature_vector)

NearestNeighbors(metric='cosine', n_neighbors=10)

In [17]:
joblib.dump(lib_tree, ball_tree_dump_file)

joblib.dump(lib_files, lib_files_dump_file)

['library_files_list.sav']