<a href="https://www.kaggle.com/code/chiragtagadiya/arcface-inference-submission?scriptVersionId=93212122" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import sys
sys.path.append('../input/timmmaster')
import timm

### Load Libraries

In [None]:
# Preliminaries
from tqdm import tqdm
import math
import random
import os
import pandas as pd
import numpy as np

# Visuals and CV2
import cv2

# albumentations for augs
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

#torch
import torch
import timm

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader


import gc
import matplotlib.pyplot as plt
import cudf
import cuml
import cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml import PCA
from cuml.neighbors import NearestNeighbors

## Configurations 

In [None]:
class CFG:
    seed = 123
    classes = 11014 
    scale = 30 
    margin = 0.5
    model_name =  'tf_efficientnet_b4'
    fc_dim = 512
    image_size = 512
    batch_size = 12
    num_workers = 2
    device = device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_path_arcface = '../input/pretrained-b3/Train_F1_score_0.9061769859813084valid_f1_score0.4245035046728972_Epoch_0_lr_start_2.23e-05_lr_max_0.00016_softmax_512x512_tf_efficientnet_b0.pt'
    model_path_softmax='../input/label-classfier-model/2022-04-15_softmax_512x512_tf_efficientnet_b4.pt'
    isTraining=False
    # isTesting change it to true for submission, for training data make it false
    isTesting = False
    loss_module = 'arcface' #'cosface' #'adacos'

## Loading Data

In [None]:
def read_dataset():
    if not CFG.isTesting:
        # if not in testing phase read train dataset else test dataset
        df = pd.read_csv('../input/shopee-product-matching/train.csv')
        # WE have information that label_group is same for similar kind of product
        # let's use this to get F1 score for our final model
        tmp = df.groupby(['label_group'])['posting_id'].unique().to_dict()
        df['matches'] = df['label_group'].map(tmp)
        df['matches'] = df['matches'].apply(lambda x: ' '.join(x))
        # get cuda frame for faster GPU computation
        df_cu = cudf.DataFrame(df)
    else:
        df = pd.read_csv('../input/shopee-product-matching/test.csv')
        df_cu = cudf.DataFrame(df)
        
    return df, df_cu

In [None]:
def f1_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    intersection = np.array([len(x[0] & x[1]) for x in zip(y_true, y_pred)])
    len_y_pred = y_pred.apply(lambda x: len(x)).values
    len_y_true = y_true.apply(lambda x: len(x)).values
    f1 = 2 * intersection / (len_y_pred + len_y_true)
    return f1

## Create Dataset

In [None]:
class ShopeeDataset(Dataset):
    
    def __init__(self, df,root_dir, isTraining=False, transform=None):
        self.df = df
        self.transform = transform
        self.root_dir = root_dir
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):       
        row = self.df.iloc[idx]
        
        image_path = os.path.join(self.root_dir, row.image)
        # read image convert to RGB and apply augmentation
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            aug = self.transform(image=image)
            image = aug['image']
        if not CFG.isTesting:
            label = row.label_group
        else :
            # we don't have label for test data so return 1
            label = 1
        return image, torch.tensor(label).long()
            




In [None]:


def get_test_transforms():

    return albumentations.Compose(
        [
            albumentations.Resize(CFG.image_size,CFG.image_size,always_apply=True),
            albumentations.Normalize(),
        ToTensorV2(p=1.0)
        ]
    )

## Image Model : Product Labels Classfier

### Model 1: Product Classfier Softmax Loss

In [None]:
class ShopeeLabelGroupClassfier1(nn.Module):
    
    def __init__(self,
                     model_name='tf_efficientnet_b0',
                     loss_fn='softmax',
                     classes = CFG.classes,
                     fc_dim = CFG.fc_dim,
                     pretrained=False,
                     use_fc=True,
                     isTraining=False
                ):
        
        
        super(ShopeeLabelGroupClassfier1,self).__init__()
        
        # create bottlenack backbone network from pretrained model 
        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        # we will put FC layers over backbone to classfy images based on label groups
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.use_fc = use_fc
        self.loss_fn =loss_fn
        self.isTraining = isTraining
        
        
    
    def forward(self, image, label):
        features = self.get_features(image)
        print(features.shape)
        return features
    
    def get_features(self,inp):
        batch_dim = inp.shape[0]
        inp = self.backbone(inp)
        inp = self.pooling(inp).view(batch_dim, -1)
        return inp
    
    

In [None]:
class ShopeeLabelGroupClassfier(nn.Module):
    
    def __init__(self,
                     model_name='tf_efficientnet_b0',
                     loss_fn='softmax',
                     classes = CFG.classes,
                     fc_dim = CFG.fc_dim,
                     pretrained=False,
                     use_fc=True,
                     isTraining=False
                ):
        
        
        super(ShopeeLabelGroupClassfier,self).__init__()
        
        # create bottlenack backbone network from pretrained model 
        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        # we will put FC layers over backbone to classfy images based on label groups
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.use_fc = use_fc
        self.loss_fn =loss_fn
        self.isTraining = isTraining
        
        # build top fc layers
        if self.use_fc:
            self.dropout = nn.Dropout(0.2)
            self.fc = nn.Linear(in_features,fc_dim )
            self.bn = nn.BatchNorm1d(fc_dim)
            in_features = fc_dim
        self.loss_fn = loss_fn
        
        if self.loss_fn=='softmax':
            self.final = nn.Linear(in_features, CFG.classes)
    
    def forward(self, image, label):
        features = self.get_features(image)
        if self.loss_fn=='softmax' and CFG.isTraining:
            logits = self.final(features)
            return logits
        else:
            return features
    
    def get_features(self,inp):
        batch_dim = inp.shape[0]
        inp = self.backbone(inp)
        inp = self.pooling(inp).view(batch_dim, -1)
        if self.use_fc and self.isTraining:
            inp = self.dropout(inp)
            inp = self.fc(inp)
            inp = self.bn(inp)
        return inp
    
    

### Model 2: Product Classfier Arcface Loss

In [None]:
class ArcFaceModule(nn.Module):
    def __init__(self, in_features, out_features, scale, margin, easy_margin=False, ls_eps=0.0 ):
        super(ArcFaceModule, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.easy_margin=easy_margin
        self.ls_eps=ls_eps
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin
        
        
        
    
    def forward(self, input, label):
        
        # cosine = X.W = ||X|| .||W|| . cos(theta) 
        # if X and W are normalize then dot product X, W = will be cos theta
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        # phi = cos(theta + margin) = cos theta . cos(margin) -  sine theta .  sin(margin)
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
            
        one_hot = torch.zeros(cosine.size(), device=CFG.device)
        # one hot encoded
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        #  output = label == True ? phi : cosine  
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        # scale the output
        output *= self.scale
        # return cross entropy loss on scalled output
        return output, nn.CrossEntropyLoss()(output,label)

In [None]:
   
class ShopeeEncoderBackBone1(nn.Module):
    
    def __init__(self,
                     model_name='tf_efficientnet_b3',
                     loss_fn='ArcFace',
                     classes = CFG.classes,
                     fc_dim = CFG.fc_dim,
                     pretrained=False,
                     use_fc=True,
                     isTraining=False
                ):
        
        
        super(ShopeeEncoderBackBone1,self).__init__()
        
        # create bottlenack backbone network from pretrained model 
        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.use_fc = use_fc
        self.loss_fn =loss_fn
        self.isTraining =isTraining
        
        # build top fc layers (Embedding that we are looking at testing time to represent the entire image)
        if self.use_fc:
            self.dropout = nn.Dropout(0.2)
            self.fc = nn.Linear(in_features,fc_dim )
            self.bn = nn.BatchNorm1d(fc_dim)
            self.init_params()
            in_features = fc_dim
        self.loss_fn = loss_fn
        if self.loss_fn=='softmax':
            self.final = nn.Linear(in_features, CFG.classes)
        elif self.loss_fn =='ArcFace':
            self.final = ArcFaceModule( in_features,
                                        CFG.classes,
                                        scale = 30,
                                        margin = 0.5,
                                        easy_margin = False,
                                        ls_eps = 0.0)
        
        
        
            
    def forward(self, image, label):
        features = self.get_features(image)
        return features
    
    def init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias,0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)
        
        
    def get_features(self,inp):
        batch_dim = inp.shape[0]
        inp = self.backbone(inp)
        inp = self.pooling(inp).view(batch_dim, -1)
        return inp
    
    
# shoppe_label_classfier = ShopeeLabelGroupClassfier()


In [None]:
   
class ShopeeEncoderBackBone(nn.Module):
    
    def __init__(self,
                     model_name='tf_efficientnet_b3',
                     loss_fn='ArcFace',
                     classes = CFG.classes,
                     fc_dim = CFG.fc_dim,
                     pretrained=False,
                     use_fc=True,
                     isTraining=False
                ):
        
        
        super(ShopeeEncoderBackBone1,self).__init__()
        
        # create bottlenack backbone network from pretrained model 
        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.use_fc = use_fc
        self.loss_fn =loss_fn
        self.isTraining =isTraining
        
        # build top fc layers (Embedding that we are looking at testing time to represent the entire image)
        if self.use_fc:
            self.dropout = nn.Dropout(0.2)
            self.fc = nn.Linear(in_features,fc_dim )
            self.bn = nn.BatchNorm1d(fc_dim)
            self.init_params()
            in_features = fc_dim
        self.loss_fn = loss_fn
        if self.loss_fn=='softmax':
            self.final = nn.Linear(in_features, CFG.classes)
        elif self.loss_fn =='ArcFace':
            self.final = ArcFaceModule( in_features,
                                        CFG.classes,
                                        scale = 30,
                                        margin = 0.5,
                                        easy_margin = False,
                                        ls_eps = 0.0)
            
    def forward(self, image, label):
        features = self.get_features(image)
        if self.isTraining:
            logits = self.final(features, label)
            return logits
        else:
            return features
    
    def init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias,0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)
        
        
    def get_features(self,inp):
        batch_dim = inp.shape[0]
        inp = self.backbone(inp)
        inp = self.pooling(inp).view(batch_dim, -1)
        if self.use_fc and self.isTraining:
            inp = self.dropout(inp)
            inp = self.fc(inp)
            inp = self.bn(inp)
            
        return inp
    
    
# shoppe_label_classfier = ShopeeLabelGroupClassfier()


In [None]:
TRAIN_DIR = '../input/shopee-product-matching/train_images'
TEST_DIR = '../input/shopee-product-matching/test_images'

### Get Text Embeddings

In [None]:
def get_text_embeddings(df_cu, max_features = 15000, n_components = 5000):
    model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = max_features)
    text_embeddings = model.fit_transform(df_cu['title']).toarray()
    pca = PCA(n_components = n_components)
    text_embeddings = pca.fit_transform(text_embeddings).get()
    print(f'Our title text embedding shape is {text_embeddings.shape}')
    del model, pca
    gc.collect()
    return text_embeddings

### Get Image Embeddings

In [None]:
def get_image_embeddings(data, root_dir, model_path = CFG.model_path_arcface):
    embeds = []
    
    model = ShopeeEncoderBackBone1()
#     model = ShopeeLabelGroupClassfier()
    model.load_state_dict(torch.load(model_path))
    model = model.to(CFG.device)
    model.eval()
    
    data_aug = get_test_transforms()
    
    image_dataset = ShopeeDataset(data, root_dir, isTraining=False, transform = data_aug)

    image_loader = torch.utils.data.DataLoader(
        image_dataset,
        batch_size=CFG.batch_size,
        pin_memory=True,
        drop_last=False
    )
    
    
    with torch.no_grad():
        for img,label in tqdm(image_loader): 
            img = img.cuda()
            label = label.cuda()
            feat = model(img,label)
            image_embeddings = feat.detach().cpu().numpy()
            embeds.append(image_embeddings)
    
    
    del model
    image_embeddings = np.concatenate(embeds)
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect()
    return image_embeddings

### Get Top k Neighbors

In [None]:
def get_neighbors(df, embeddings, KNN = 50, isImage=False, metric_param = 'cosine'):
    print(embeddings.shape)
    model = NearestNeighbors(n_neighbors = KNN,metric=metric_param)
    model.fit(embeddings)
    distances, indices = model.kneighbors(embeddings)
    print(distances.shape)
    if not CFG.isTesting:
        
        # we will use different threshold for neighbor retrieval, basically we also want to make threashold smaller 
        # to retrieval small number of records because we have testing image size around 70000 huge memory constraint
        if isImage:
            thresholds = list(np.arange(0.01, 4,0.5))
        else:
            thresholds = list(np.arange(0.1, 1, 0.1))
        scores = []
        # for each threshold get top k neighbors with in threshold distance 
        # then get f1 score
        for threshold in thresholds:
            predictions = []
            for k in range(embeddings.shape[0]):
                idx = np.where(distances[k,] < threshold)[0]
                ids = indices[k,idx]
                # get posting ids based on retrival set
                posting_ids = ' '.join(df['posting_id'].iloc[ids].values)
                predictions.append(posting_ids)
            df['pred_matches'] = predictions
            df['f1'] = f1_score(df['matches'], df['pred_matches'])
            score = df['f1'].mean()
            print(f'Our f1 score for threshold {threshold} is {score}')
            scores.append(score)
        thresholds_scores = pd.DataFrame({'thresholds': thresholds, 'scores': scores})
        max_score = thresholds_scores[thresholds_scores['scores'] == thresholds_scores['scores'].max()]
        best_threshold = max_score['thresholds'].values[0]
        best_score = max_score['scores'].values[0]
        print(f'Our best score is {best_score} and has a threshold {best_threshold}')
        
        # Use threshold
        predictions = []
        print("for training time")
        for k in range(embeddings.shape[0]):
            # Because we are predicting the test set that have 70K images and different label groups, confidence should be smaller
            if isImage:
#                 print("choosing 0.2")
                idx = np.where(distances[k,] < best_threshold)[0]
            else:
                idx = np.where(distances[k,] < best_threshold)[0]
            ids = indices[k,idx]
            posting_ids = df['posting_id'].iloc[ids].values
            predictions.append(posting_ids)
    
    # Because we are predicting the test set that have 70K images and different label groups, confidence should be smaller
    else:
        predictions = []
        for k in tqdm(range(embeddings.shape[0])):
            if isImage:
                # testing for different threshold after submission
#                 idx = np.where(distances[k,] < 0.21 )[0]
                idx = np.where(distances[k,] < 0.3 )[0]
            else:
#                 idx = np.where(distances[k,] < 0.30)[0]
                idx = np.where(distances[k,] < 0.17)[0]
            ids = indices[k,idx]
            posting_ids = df['posting_id'].iloc[ids].values
            predictions.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    return df, predictions

## Preparing for submission

In [None]:
# get dataset
df,df_cu = read_dataset()
df.head()

In [None]:
_dir = None
if CFG.isTesting:
    _dir = TEST_DIR
else:
    _dir = TRAIN_DIR

In [None]:
# get image Embeddings 
image_embeddings = get_image_embeddings(df,_dir )
np.save("image_embeddings_0.9_training",image_embeddings)

In [None]:
# get text Embeddings
text_embeddings = get_text_embeddings(df_cu, max_features = 15000, n_components = 5000)
np.save("text_embeddings_0.9_training",text_embeddings)

In [None]:
# Get image model predictions
df,image_predictions = get_neighbors(df, image_embeddings, KNN = 50, isImage = True, metric_param='cosine')

In [None]:
df, text_predictions = get_neighbors(df, text_embeddings, KNN = 50, isImage=False,metric_param='cosine')


In [None]:
def combine_predictions(row):
    x = np.concatenate([row['image_predictions'], row['text_predictions']])
    return ' '.join( np.unique(x) )

In [None]:
class Test_config:
    isText=True,
    isImage=True
    
    
def combine_predictions_conditional(row):
    if Test_config.isImage and Test_config.isText:
        x = np.concatenate([row['image_predictions'], row['text_predictions']])
    elif Test_config.isImage:
        x = np.concatenate([row['image_predictions'], row['image_predictions']])
    else:
        x = np.concatenate([row['text_predictions'], row['text_predictions']])
    return ' '.join( np.unique(x) )

In [None]:
# For cv and testing
if not CFG.isTesting:
    df['image_predictions'] = image_predictions
    df['text_predictions'] = text_predictions
    df['pred_matches'] = df.apply(combine_predictions_conditional, axis = 1)
    df['f1'] = f1_score(df['matches'], df['pred_matches'])
    score = df['f1'].mean()
    print(f'Our final f1 cv score is for Text and Image Model {score}')
    df['matches'] = df['pred_matches']
    df[['posting_id', 'matches']].to_csv('submission_0.9_cosine_textTh_0.17_imageTh_0.3_both_text_image.csv', index = False)
    
    ## doing only for images
    df['image_predictions'] = image_predictions
    df['text_predictions'] = text_predictions
    Test_config.isImage = True
    Test_config.isText=False
    df['pred_matches'] = df.apply(combine_predictions_conditional, axis = 1)
    df['f1'] = f1_score(df['matches'], df['pred_matches'])
    score = df['f1'].mean()
    print(f'Our final f1 cv score is for Image Model {score}')
    df['matches'] = df['pred_matches']
    df[['posting_id', 'matches']].to_csv('submission_0.9_cosine_imageTh_0.3_only_image.csv', index = False)
    
    ## doing only for Text
    df['image_predictions'] = image_predictions
    df['text_predictions'] = text_predictions
    Test_config.isImage = False
    Test_config.isText=True
    df['pred_matches'] = df.apply(combine_predictions_conditional, axis = 1)
    df['f1'] = f1_score(df['matches'], df['pred_matches'])
    score = df['f1'].mean()
    print(f'Our final f1 cv score is for Text Model {score}')
    df['matches'] = df['pred_matches']
    df[['posting_id', 'matches']].to_csv('submission_0.9_cosine_textTh_0.17_only_text.csv', index = False)
else:
    df['image_predictions'] = image_predictions
    df['text_predictions'] = text_predictions
    df['matches'] = df.apply(combine_predictions_conditional, axis = 1)
    df[['posting_id', 'matches']].to_csv('submission.csv', index = False)

In [None]:
df.head()