In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
from dime.engine import SearchEngine
import torch
import torchvision
from train_model import IntermodalTripletNet

Loading faiss with AVX2 support.


In [3]:
BATCH_SIZE = 32

In [4]:
from train_model import main
import gc
main(15, "resnet18")
gc.collect()

CUDA: True
Loading in word vectors...
Done

Loading NUS_WIDE dataset...
Done

Making training and validation indices...
Done.




Epoch: 1/15. Train set: Average loss: 7.2886
Epoch: 1/15. Validation set: Average loss: 6.4048
Epoch: 2/15. Train set: Average loss: 6.0639
Epoch: 2/15. Validation set: Average loss: 5.8496
Epoch: 3/15. Train set: Average loss: 5.7553
Epoch: 3/15. Validation set: Average loss: 5.7267
Epoch: 4/15. Train set: Average loss: 5.6030
Epoch: 4/15. Validation set: Average loss: 5.5480
Epoch: 5/15. Train set: Average loss: 5.4535
Epoch: 5/15. Validation set: Average loss: 5.4339
Epoch: 6/15. Train set: Average loss: 5.3450
Epoch: 6/15. Validation set: Average loss: 5.3090
Epoch: 7/15. Train set: Average loss: 5.2729
Epoch: 7/15. Validation set: Average loss: 5.2726
Epoch: 8/15. Train set: Average loss: 5.0518
Epoch: 8/15. Validation set: Average loss: 5.0860


Epoch: 9/15. Train set: Average loss: 4.9578
Epoch: 9/15. Validation set: Average loss: 5.0835
Epoch: 10/15. Train set: Average loss: 4.9811
Epoch: 10/15. Validation set: Average loss: 5.0143
Epoch: 11/15. Train set: Average loss: 4.9074
Epoch: 11/15. Validation set: Average loss: 5.0279
Epoch: 12/15. Train set: Average loss: 4.9236
Epoch: 12/15. Validation set: Average loss: 5.0055
Epoch: 13/15. Train set: Average loss: 4.8955
Epoch: 13/15. Validation set: Average loss: 5.0009
Epoch: 14/15. Train set: Average loss: 4.9024
Epoch: 14/15. Validation set: Average loss: 4.9739
Epoch: 15/15. Train set: Average loss: 4.8800
Epoch: 15/15. Validation set: Average loss: 4.9918


0

In [5]:
engine_params = {
    "name": "demo_engine",
    "cuda": True,
    "verbose": True,
    "dataset_dir": "data/",
    "index_dir": "indexes/",
    "model_dir": "models/",
    "embedding_dir": "embeddings/",
    "modalities": ["text", "image", "audio", "video"]   
}

engine = SearchEngine(engine_params)

In [6]:
r152_features_params = {
    "name": "resnet152",
    "output_dim": (2048,),
    "modalities": ["image"],
    "embedding_nets": [torch.nn.Sequential(*list(torchvision.models.resnet152(pretrained=True).children())[:-1])],
    "input_dim": [(3, 224, 224)],
    "desc": "Resnet152 with the last layer removed for feature extraction"
} 

engine.add_model(r152_features_params)

Model 'resnet152' added


In [7]:
r18_features_params = {
    "name": "resnet18",
    "output_dim": (512,),
    "modalities": ["image"],
    "embedding_nets": [torch.nn.Sequential(*list(torchvision.models.resnet18(pretrained=True).children())[:-1])],
    "input_dim": [(3, 224, 224)],
    "desc": "Resnet18 with the last layer removed for feature extraction"
} 

engine.add_model(r18_features_params)

Model 'resnet18' added


In [8]:
with open("pickles/models/resnet152_5epochs.p", "rb") as f:
    model = pickle.load(f)

demo_model1_params = {
    "name": "cm-r152-5epochs",
    "output_dim": (200,),
    "modalities": ["image", "text"],
    "embedding_nets": [model.modalityOneNet, model.modalityTwoNet],
    "input_dim": [(2048,), (300,)],
    "desc": "5 epoch adversarial cross-modal triplet-loss retrieval trained with resnet152 features and wiki word2vec"
}

engine.add_model(demo_model1_params)

Model 'cm-r152-5epochs' added


In [9]:
with open("pickles/models/resnet152_15epochs.p", "rb") as f:
    model = pickle.load(f)

demo_model1_params = {
    "name": "cm-r152-15epochs",
    "output_dim": (200,),
    "modalities": ["image", "text"],
    "embedding_nets": [model.modalityOneNet, model.modalityTwoNet],
    "input_dim": [(2048,), (300,)],
    "desc": "15 epoch adversarial cross-modal triplet-loss retrieval trained with resnet152 features and wiki word2vec"
}

engine.add_model(demo_model1_params)

Model 'cm-r152-15epochs' added


In [10]:
with open("pickles/models/resnet18_15epochs.p", "rb") as f:
    model = pickle.load(f)

demo_model1_params = {
    "name": "cm-r18-15epochs",
    "output_dim": (200,),
    "modalities": ["image", "text"],
    "embedding_nets": [model.modalityOneNet, model.modalityTwoNet],
    "input_dim": [(512,), (300,)],
    "desc": "15 epoch adversarial cross-modal triplet-loss retrieval trained with resnet18 features and wiki word2vec"
}

engine.add_model(demo_model1_params)

Model 'cm-r18-15epochs' added


In [11]:
engine.add_preprocessor("cm-r152-5epochs", "image", "resnet152")
engine.add_preprocessor("cm-r152-15epochs", "image", "resnet152")
engine.add_preprocessor("cm-r18-15epochs", "image", "resnet18")

In [12]:
nuswide_params = {
    "name": "nuswide",
    "data_dir": "Flickr/",
    "transform": torchvision.transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]),
    "dim": (3, 224, 224),
    "modality": "image",
    "desc": "The nuswide dataset"
}

engine.add_dataset(nuswide_params, force_add = True)

Dataset 'nuswide' added


In [13]:
with open("pickles/word_embeddings/word_embeddings_tensors.p", "rb") as f:
    wiki = pickle.load(f)
    
wiki_word2vec_params = {
    "name": "wiki_word2vec",
    "data": wiki,
    "modality": "text",
    "dim": (300,),
    "desc": "one million word2vec entries trained on English Wikipedia"
}

engine.add_dataset(wiki_word2vec_params)

Dataset 'wiki_word2vec' added


In [14]:
ind1= {
    "name": "NUSWIDE (cm-r152-5epochs)",
    "model_name": "cm-r152-5epochs",
    "dataset_name": "nuswide",
    "desc": "The index corresponding to cm-r152-5epochs model and nuswide"
}

ind2 = {
    "name": "WIKI WORD2VEC (cm-r152-5epochs)",
    "model_name": "cm-r152-5epochs",
    "dataset_name": "wiki_word2vec",
    "desc": "The index corresponding to cm-r152-5epochs model and wiki_word2vec"
}

ind3 = {
    "name": "NUSWIDE (cm-r152-15epochs)",
    "model_name": "cm-r152-15epochs",
    "dataset_name": "nuswide",
    "desc": "The index corresponding to cm-r152-15epochs model and nuswide"
}
ind4 = {
    "name": "WIKI WORD2VEC (cm-r152-15epochs)",
    "model_name": "cm-r152-15epochs",
    "dataset_name": "wiki_word2vec",
    "desc": "The index corresponding to cm-r152-15epochs model and wiki_word2vec"
}
ind5 = {
    "name": "NUSWIDE (cm-r18-15epochs)",
    "model_name": "cm-r18-15epochs",
    "dataset_name": "nuswide",
    "desc": "The index corresponding to cm-r18-15epochs model and nuswide"
}
ind6 = {
    "name": "WIKI WORD2VEC (cm-r18-15epochs)",
    "model_name": "cm-r18-15epochs",
    "dataset_name": "wiki_word2vec",
    "desc": "The index corresponding to cm-r18-15epochs model and wiki_word2vec"
}

ind7 = {
    "name": "NUSWIDE (resnet152)",
    "model_name": "resnet152",
    "dataset_name": "nuswide",
    "desc": "The index corresponding to resnet152 and nuswide"
}

ind8 = {
    "name": "NUSWIDE (resnet18)",
    "model_name": "resnet18",
    "dataset_name": "nuswide",
    "desc": "The index corresponding to resnet18 and nuswide"
}

In [15]:
engine.build_index(ind1, batch_size = BATCH_SIZE)
engine.build_index(ind2, batch_size = BATCH_SIZE)
engine.build_index(ind3, batch_size = BATCH_SIZE)
engine.build_index(ind4, batch_size = BATCH_SIZE)
engine.build_index(ind5, batch_size = BATCH_SIZE)
engine.build_index(ind6, batch_size = BATCH_SIZE)
engine.build_index(ind7, batch_size = BATCH_SIZE)
engine.build_index(ind8, batch_size = BATCH_SIZE)

Building cm-r152-5epochs, nuswide index
Loading batch 0 of 8427
Processing batch 1000 of 8427
Processing batch 2000 of 8427
Processing batch 3000 of 8427
Processing batch 4000 of 8427
Processing batch 5000 of 8427
Processing batch 6000 of 8427
Processing batch 7000 of 8427
Processing batch 8000 of 8427
Finished building index NUSWIDE (cm-r152-5epochs) in 1528.1353 seconds.
Building cm-r152-5epochs, wiki_word2vec index
Processing batch 0 of 31250
Processing batch 1000 of 31250
Processing batch 2000 of 31250
Processing batch 3000 of 31250
Processing batch 4000 of 31250
Processing batch 5000 of 31250
Processing batch 6000 of 31250
Processing batch 7000 of 31250
Processing batch 8000 of 31250
Processing batch 9000 of 31250
Processing batch 10000 of 31250
Processing batch 11000 of 31250
Processing batch 12000 of 31250
Processing batch 13000 of 31250
Processing batch 14000 of 31250
Processing batch 15000 of 31250
Processing batch 16000 of 31250
Processing batch 17000 of 31250
Processing batc

'NUSWIDE (resnet18)'

In [16]:
engine.save(save_data=True)