## Build semantic tree
We need to build a semantic hash of the different leaf categories so that when we get the embedding from the network, we can quickly determine which to which class our picture belongs.
After we determine the leaf class, we need to check with all the items in that set.

In [1]:
import os
import PIL
import torch
import loader
import pickle

from tqdm import tqdm
from lshash.lshash import LSHash
from torch.utils.data import DataLoader
from model.resnet50 import ResNet50

IMG_DIR = '../images'
TRAIN_IMG = 'train'
INDEX_IMG = 'eval/index'

## Get index image embeddings
If we loop through all the training images and get their model embeddings, we could make a kind of prototype for each class.
The `index` contains our 1.1m images we have to search through. What I'm thinking:
- Run each of the images through our model to get its embeddings.
- Save it as a dictionary {uuid: embedding(or hash)}
- For each query image, run it through LSH and use that to get its approx knn

### Load model

In [2]:
model = ResNet50()
checkpoint = torch.load('./.checkpoints/model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

embDataset = loader.eProductTestDataset('../images/meta', 'index.csv')
embLoader = DataLoader(embDataset, batch_size=8)

### Loop through each index image and get its embedding
This will allow us to create the dictionary of image embeddings and uuids for similarity comparison later on.

In [3]:
embs = {}
hashtable = LSHash(hash_size=20, 
                   input_dim=3)
with torch.no_grad():
        for i, (uuids, imgs) in tqdm(enumerate(embLoader)):
            preds = model(imgs)
            #print(preds[][0].sum())
            # Concat level
            preds_concat = torch.concat([torch.argmax(preds[0], dim=1).reshape((8,1)),
                                        torch.argmax(preds[1], dim=1).reshape((8,1)),
                                        torch.argmax(preds[2], dim=1).reshape(8, 1)], dim=1)
                                         #preds[2], dim=1)
            #if not hashtable:
            #     hashtable = lshashing.LSHRandom(preds_concat[0], hash_len=500)
            
            for uuid in range(len(uuids)):
                hashtable.index(preds_concat[uuid], extra_data=uuid)
            
            if (i+1) % 1000 == 0:
                fname = f'.lsh/lsh_{i}.p'
                with open(fname, 'wb')  as f:
                    pickle.dump(hashtable, f)
                    print(f'Saved {fname}')
                del hashtable
                hashtable = LSHash(20, 3)
            
            

999it [21:06,  1.26s/it]

Saved .lsh/lsh_999.p


2000it [42:22,  1.50s/it]

Saved .lsh/lsh_1999.p


3000it [1:03:41,  1.50s/it]

Saved .lsh/lsh_2999.p


4000it [1:25:01,  1.52s/it]

Saved .lsh/lsh_3999.p


5000it [1:46:23,  1.50s/it]

Saved .lsh/lsh_4999.p


6000it [2:10:05,  2.72s/it]

Saved .lsh/lsh_5999.p


7000it [2:49:30,  2.77s/it]

Saved .lsh/lsh_6999.p


8000it [3:29:11,  2.81s/it]

Saved .lsh/lsh_7999.p


9000it [4:08:39,  2.76s/it]

Saved .lsh/lsh_8999.p


10000it [4:48:19,  2.74s/it]

Saved .lsh/lsh_9999.p


11000it [5:27:55,  2.76s/it]

Saved .lsh/lsh_10999.p


12000it [6:07:47,  2.79s/it]

Saved .lsh/lsh_11999.p


13000it [6:47:25,  2.77s/it]

Saved .lsh/lsh_12999.p


14000it [7:27:25,  2.80s/it]

Saved .lsh/lsh_13999.p


15000it [8:07:20,  2.78s/it]

Saved .lsh/lsh_14999.p


16000it [8:47:19,  2.89s/it]

Saved .lsh/lsh_15999.p


17000it [9:27:08,  2.73s/it]

Saved .lsh/lsh_16999.p


18000it [10:07:07,  2.79s/it]

Saved .lsh/lsh_17999.p


19000it [10:47:09,  2.70s/it]

Saved .lsh/lsh_18999.p


20000it [11:26:57,  2.77s/it]

Saved .lsh/lsh_19999.p


21000it [12:06:35,  2.98s/it]

Saved .lsh/lsh_20999.p


22000it [12:46:27,  2.83s/it]

Saved .lsh/lsh_21999.p


23000it [13:26:20,  2.76s/it]

Saved .lsh/lsh_22999.p


24000it [14:06:04,  2.74s/it]

Saved .lsh/lsh_23999.p


25000it [14:46:22,  2.97s/it]

Saved .lsh/lsh_24999.p


26000it [15:26:32,  2.96s/it]

Saved .lsh/lsh_25999.p


27000it [16:06:36,  2.77s/it]

Saved .lsh/lsh_26999.p


28000it [16:46:33,  2.74s/it]

Saved .lsh/lsh_27999.p


29000it [17:26:23,  2.82s/it]

Saved .lsh/lsh_28999.p


30000it [18:06:28,  2.77s/it]

Saved .lsh/lsh_29999.p


31000it [18:46:23,  2.80s/it]

Saved .lsh/lsh_30999.p


32000it [19:25:59,  2.80s/it]

Saved .lsh/lsh_31999.p


33000it [20:05:57,  2.74s/it]

Saved .lsh/lsh_32999.p


34000it [20:46:06,  2.97s/it]

Saved .lsh/lsh_33999.p


35000it [21:25:42,  2.74s/it]

Saved .lsh/lsh_34999.p


36000it [22:05:49,  2.78s/it]

Saved .lsh/lsh_35999.p


37000it [22:46:32,  3.02s/it]

Saved .lsh/lsh_36999.p


38000it [23:27:35,  2.99s/it]

Saved .lsh/lsh_37999.p


39000it [24:07:31,  2.75s/it]

Saved .lsh/lsh_38999.p


40000it [24:47:55,  2.81s/it]

Saved .lsh/lsh_39999.p


41000it [25:28:00,  2.97s/it]

Saved .lsh/lsh_40999.p


42000it [26:08:10,  2.78s/it]

Saved .lsh/lsh_41999.p


43000it [26:48:33,  2.80s/it]

Saved .lsh/lsh_42999.p


44000it [27:28:40,  2.75s/it]

Saved .lsh/lsh_43999.p


45000it [28:08:56,  2.80s/it]

Saved .lsh/lsh_44999.p


46000it [28:49:02,  2.75s/it]

Saved .lsh/lsh_45999.p


47000it [29:29:27,  2.75s/it]

Saved .lsh/lsh_46999.p


48000it [30:09:19,  2.97s/it]

Saved .lsh/lsh_47999.p


49000it [30:49:41,  2.77s/it]

Saved .lsh/lsh_48999.p


50000it [31:29:58,  2.78s/it]

Saved .lsh/lsh_49999.p


51000it [32:10:10,  2.79s/it]

Saved .lsh/lsh_50999.p


52000it [32:50:30,  2.75s/it]

Saved .lsh/lsh_51999.p


53000it [33:30:47,  2.91s/it]

Saved .lsh/lsh_52999.p


54000it [34:11:16,  2.82s/it]

Saved .lsh/lsh_53999.p


55000it [34:51:12,  2.75s/it]

Saved .lsh/lsh_54999.p


56000it [35:31:41,  2.97s/it]

Saved .lsh/lsh_55999.p


57000it [36:11:12,  2.75s/it]

Saved .lsh/lsh_56999.p


58000it [36:51:28,  2.77s/it]

Saved .lsh/lsh_57999.p


59000it [37:31:48,  2.75s/it]

Saved .lsh/lsh_58999.p


60000it [38:11:53,  3.05s/it]

Saved .lsh/lsh_59999.p


61000it [38:52:07,  3.01s/it]

Saved .lsh/lsh_60999.p


62000it [39:32:23,  2.76s/it]

Saved .lsh/lsh_61999.p


63000it [40:12:37,  2.79s/it]

Saved .lsh/lsh_62999.p


64000it [40:53:00,  2.75s/it]

Saved .lsh/lsh_63999.p


65000it [41:33:03,  3.06s/it]

Saved .lsh/lsh_64999.p


66000it [42:13:29,  2.72s/it]

Saved .lsh/lsh_65999.p


67000it [42:54:02,  2.81s/it]

Saved .lsh/lsh_66999.p


68000it [43:34:28,  2.81s/it]

Saved .lsh/lsh_67999.p


69000it [44:14:21,  2.77s/it]

Saved .lsh/lsh_68999.p


70000it [44:53:57,  2.79s/it]

Saved .lsh/lsh_69999.p


71000it [45:34:21,  2.85s/it]

Saved .lsh/lsh_70999.p


72000it [46:14:44,  2.79s/it]

Saved .lsh/lsh_71999.p


73000it [46:54:34,  2.75s/it]

Saved .lsh/lsh_72999.p


74000it [47:34:58,  2.80s/it]

Saved .lsh/lsh_73999.p


75000it [48:15:30,  2.79s/it]

Saved .lsh/lsh_74999.p


76000it [48:55:40,  2.76s/it]

Saved .lsh/lsh_75999.p


77000it [49:33:46,  1.88s/it]

Saved .lsh/lsh_76999.p


78000it [50:00:26,  1.92s/it]

Saved .lsh/lsh_77999.p


79000it [50:26:42,  1.90s/it]

Saved .lsh/lsh_78999.p


80000it [50:53:20,  1.84s/it]

Saved .lsh/lsh_79999.p


81000it [51:20:04,  1.87s/it]

Saved .lsh/lsh_80999.p


82000it [51:47:04,  1.93s/it]

Saved .lsh/lsh_81999.p


83000it [52:14:02,  1.93s/it]

Saved .lsh/lsh_82999.p


84000it [52:41:04,  1.92s/it]

Saved .lsh/lsh_83999.p


85000it [53:08:03,  1.95s/it]

Saved .lsh/lsh_84999.p


86000it [53:33:10,  1.56s/it]

Saved .lsh/lsh_85999.p


87000it [53:55:12,  1.54s/it]

Saved .lsh/lsh_86999.p


88000it [54:17:05,  1.54s/it]

Saved .lsh/lsh_87999.p


89000it [54:38:33,  1.52s/it]

Saved .lsh/lsh_88999.p


90000it [54:59:56,  1.52s/it]

Saved .lsh/lsh_89999.p


91000it [55:21:19,  1.50s/it]

Saved .lsh/lsh_90999.p


92000it [55:43:04,  1.54s/it]

Saved .lsh/lsh_91999.p


93000it [56:04:54,  1.57s/it]

Saved .lsh/lsh_92999.p


94000it [56:26:44,  1.53s/it]

Saved .lsh/lsh_93999.p


95000it [56:48:35,  1.54s/it]

Saved .lsh/lsh_94999.p


96000it [57:10:32,  1.54s/it]

Saved .lsh/lsh_95999.p


97000it [57:32:28,  1.56s/it]

Saved .lsh/lsh_96999.p


98000it [57:54:27,  1.54s/it]

Saved .lsh/lsh_97999.p


99000it [58:16:23,  1.55s/it]

Saved .lsh/lsh_98999.p


100000it [58:38:15,  1.57s/it]

Saved .lsh/lsh_99999.p


101000it [59:00:13,  1.54s/it]

Saved .lsh/lsh_100999.p


102000it [59:22:11,  1.55s/it]

Saved .lsh/lsh_101999.p


103000it [59:44:04,  1.55s/it]

Saved .lsh/lsh_102999.p


104000it [60:05:57,  1.56s/it]

Saved .lsh/lsh_103999.p


105000it [60:27:45,  1.54s/it]

Saved .lsh/lsh_104999.p


106000it [60:49:40,  1.54s/it]

Saved .lsh/lsh_105999.p


107000it [61:11:27,  1.54s/it]

Saved .lsh/lsh_106999.p


108000it [61:33:13,  1.53s/it]

Saved .lsh/lsh_107999.p


109000it [61:55:04,  1.54s/it]

Saved .lsh/lsh_108999.p


110000it [62:16:49,  1.54s/it]

Saved .lsh/lsh_109999.p


111000it [62:38:37,  1.56s/it]

Saved .lsh/lsh_110999.p


112000it [63:00:32,  1.55s/it]

Saved .lsh/lsh_111999.p


113000it [63:22:24,  1.54s/it]

Saved .lsh/lsh_112999.p


114000it [63:44:21,  1.55s/it]

Saved .lsh/lsh_113999.p


115000it [64:06:19,  1.56s/it]

Saved .lsh/lsh_114999.p


116000it [64:28:15,  1.55s/it]

Saved .lsh/lsh_115999.p


117000it [64:50:12,  1.56s/it]

Saved .lsh/lsh_116999.p


118000it [65:12:10,  1.55s/it]

Saved .lsh/lsh_117999.p


119000it [65:34:05,  1.54s/it]

Saved .lsh/lsh_118999.p


120000it [65:56:06,  1.55s/it]

Saved .lsh/lsh_119999.p


121000it [66:18:04,  1.55s/it]

Saved .lsh/lsh_120999.p


122000it [66:39:59,  1.55s/it]

Saved .lsh/lsh_121999.p


123000it [67:01:52,  1.54s/it]

Saved .lsh/lsh_122999.p


124000it [67:23:44,  1.54s/it]

Saved .lsh/lsh_123999.p


125000it [67:45:36,  1.55s/it]

Saved .lsh/lsh_124999.p


126000it [68:07:28,  1.54s/it]

Saved .lsh/lsh_125999.p


127000it [68:29:20,  1.58s/it]

Saved .lsh/lsh_126999.p


128000it [68:51:05,  1.54s/it]

Saved .lsh/lsh_127999.p


129000it [69:12:48,  1.54s/it]

Saved .lsh/lsh_128999.p


130000it [69:34:57,  1.56s/it]

Saved .lsh/lsh_129999.p


131000it [69:57:04,  1.55s/it]

Saved .lsh/lsh_130999.p


132000it [70:18:47,  1.55s/it]

Saved .lsh/lsh_131999.p


133000it [70:40:12,  1.50s/it]

Saved .lsh/lsh_132999.p


134000it [71:01:39,  1.50s/it]

Saved .lsh/lsh_133999.p


135000it [71:23:13,  1.55s/it]

Saved .lsh/lsh_134999.p


136000it [71:44:57,  1.53s/it]

Saved .lsh/lsh_135999.p


137000it [72:06:41,  1.53s/it]

Saved .lsh/lsh_136999.p


137674it [72:21:20,  1.89s/it]


RuntimeError: shape '[8, 1]' is invalid for input of size 4

## Pickle Hashtable and save it