In [1]:
import faiss
import os
import numpy as np
import pandas as pd
from typing import Dict

import torch
from torch import Tensor
from torchvision import models

from torchvision.transforms import Compose, transforms
from PIL import Image
import cv2
import sqlite3

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
DATASET = "/nethome/kravicha3/aryan/project/dataset/Reddit_Provenance_Datasets/data/"
FEATURES_PATH = ""
INDEX_PATH = ""

# Parameters to change

1) Features extracted from ResNet-50  
    - different layers  
    - different models  
    - bigger features  
    - SIFT or SURF features  
2) Different distance metrics  
    - L1 or L2 norm  
3) Different Indexing  
    - HNSW  
    - OPQ  
    - IVF  
4) Non linearity using score = 1-tanh(distance)  
5) Using decompistions using PCA  


## Model Exploration

In [3]:
from torchvision.models import MobileNet_V3_Large_Weights
# model = models.mobilenet_v3_large(weights=MobileNet_V3_Large_Weights)
model = models.resnet50(pretrained=True, progress=False)
for param in model.parameters():
    param.requires_grad = False
model.fc = torch.nn.Identity()
model.to(device)
model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### FX Feature Extractor

In [4]:
from torchsummary import summary
summary(model, input_size=(3,244,244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 122, 122]           9,408
       BatchNorm2d-2         [-1, 64, 122, 122]             128
              ReLU-3         [-1, 64, 122, 122]               0
         MaxPool2d-4           [-1, 64, 61, 61]               0
            Conv2d-5           [-1, 64, 61, 61]           4,096
       BatchNorm2d-6           [-1, 64, 61, 61]             128
              ReLU-7           [-1, 64, 61, 61]               0
            Conv2d-8           [-1, 64, 61, 61]          36,864
       BatchNorm2d-9           [-1, 64, 61, 61]             128
             ReLU-10           [-1, 64, 61, 61]               0
           Conv2d-11          [-1, 256, 61, 61]          16,384
      BatchNorm2d-12          [-1, 256, 61, 61]             512
           Conv2d-13          [-1, 256, 61, 61]          16,384
      BatchNorm2d-14          [-1, 256,

In [5]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

In [6]:
nodes, _ = get_graph_node_names(model)

nodes

['x',
 'conv1',
 'bn1',
 'relu',
 'maxpool',
 'layer1.0.conv1',
 'layer1.0.bn1',
 'layer1.0.relu',
 'layer1.0.conv2',
 'layer1.0.bn2',
 'layer1.0.relu_1',
 'layer1.0.conv3',
 'layer1.0.bn3',
 'layer1.0.downsample.0',
 'layer1.0.downsample.1',
 'layer1.0.add',
 'layer1.0.relu_2',
 'layer1.1.conv1',
 'layer1.1.bn1',
 'layer1.1.relu',
 'layer1.1.conv2',
 'layer1.1.bn2',
 'layer1.1.relu_1',
 'layer1.1.conv3',
 'layer1.1.bn3',
 'layer1.1.add',
 'layer1.1.relu_2',
 'layer1.2.conv1',
 'layer1.2.bn1',
 'layer1.2.relu',
 'layer1.2.conv2',
 'layer1.2.bn2',
 'layer1.2.relu_1',
 'layer1.2.conv3',
 'layer1.2.bn3',
 'layer1.2.add',
 'layer1.2.relu_2',
 'layer2.0.conv1',
 'layer2.0.bn1',
 'layer2.0.relu',
 'layer2.0.conv2',
 'layer2.0.bn2',
 'layer2.0.relu_1',
 'layer2.0.conv3',
 'layer2.0.bn3',
 'layer2.0.downsample.0',
 'layer2.0.downsample.1',
 'layer2.0.add',
 'layer2.0.relu_2',
 'layer2.1.conv1',
 'layer2.1.bn1',
 'layer2.1.relu',
 'layer2.1.conv2',
 'layer2.1.bn2',
 'layer2.1.relu_1',
 'layer2.

In [7]:
return_nodes = ['layer4.2']
feature_extractor = create_feature_extractor(model, return_nodes=return_nodes)
out_layer = feature_extractor(torch.zeros(1,3,224,224).to(device))

return_nodes = ['layer4.2.relu_2']
feature_extractor = create_feature_extractor(model, return_nodes=return_nodes)
out_relu = feature_extractor(torch.zeros(1,3,224,224).to(device))

In [8]:
x = out_layer['layer4.2'] == out_relu['layer4.2.relu_2']
torch.all(x)

## Therefore just using "layer4.2" is sufficient to get final layer result

tensor(True, device='cuda:0')

In [9]:
return_nodes = ['layer1.2', 'layer2.3', 'layer3.5', 'layer4.2']
feature_extractor = create_feature_extractor(model, return_nodes=return_nodes)
out = feature_extractor(torch.zeros(1,3,224,224).to(device))

In [10]:
out.keys()

dict_keys(['layer1.2', 'layer2.3', 'layer3.5', 'layer4.2'])

In [11]:
from functools import reduce
for i in out:
    x = out[i]
    print(f"layer={i} shape = {x.shape}, total={reduce(lambda x,y: x*y, list(x.shape))}")

layer=layer1.2 shape = torch.Size([1, 256, 56, 56]), total=802816
layer=layer2.3 shape = torch.Size([1, 512, 28, 28]), total=401408
layer=layer3.5 shape = torch.Size([1, 1024, 14, 14]), total=200704
layer=layer4.2 shape = torch.Size([1, 2048, 7, 7]), total=100352


In [13]:
dict_as_numpy(out)

{'layer1.2': array([[[[0.03877565, 0.02151572, 0.02193974, ..., 0.02199436,
           0.02192129, 0.02056322],
          [0.02730467, 0.00475721, 0.00514605, ..., 0.00532617,
           0.00466646, 0.01883157],
          [0.02690694, 0.00505806, 0.00551328, ..., 0.00556198,
           0.00501689, 0.01906472],
          ...,
          [0.02731758, 0.00491392, 0.00538119, ..., 0.0054427 ,
           0.00485085, 0.01882893],
          [0.02772745, 0.00463535, 0.00475742, ..., 0.00483652,
           0.00438937, 0.01932257],
          [0.03610303, 0.01821626, 0.01827663, ..., 0.01832831,
           0.01968963, 0.02866302]],
 
         [[0.1469104 , 0.14926973, 0.14921325, ..., 0.14922172,
           0.14888483, 0.12554364],
          [0.14420277, 0.14524393, 0.14615378, ..., 0.14569126,
           0.14570385, 0.11187707],
          [0.14668356, 0.1461264 , 0.1464101 , ..., 0.14586279,
           0.14593606, 0.11223075],
          ...,
          [0.14556444, 0.14584585, 0.14612283, ..., 0.1

### Model result files

In [12]:
return_nodes = ['layer1.2', 'layer2.3', 'layer3.5', 'layer4.2']
feature_extractor = create_feature_extractor(model, return_nodes=return_nodes)

def transform(images: np.ndarray):
    transformed = [transforms.ToTensor()]
    composed = Compose(transformed)
    return composed(Image.fromarray(images[:, :, ::-1])).unsqueeze(0)

def as_numpy(val: Tensor) -> np.ndarray:
    return val.detach().cpu().numpy()

def dict_as_numpy(inference: Dict) -> Dict:
    for layer_output in inference:
        output = inference[layer_output]
        inference[layer_output] = as_numpy(output)
    return inference

def read_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        cap = cv2.VideoCapture(image_path)
        ret, img = cap.read()
        cap.release()
    return img

def model_output(image_path):
    img = read_image(image_path)
    imgt = transform(img)
    imgt = imgt.to(device)
    with torch.no_grad():
        inference = as_numpy(model(torch.unsqueeze(imgt[0], 0)))
    return inference

def feature_extractor_output(image_path):
    img = read_image(image_path)
    imgt = transform(img)
    imgt = imgt.to(device)
    with torch.no_grad():
        inferences = dict_as_numpy(feature_extractor(torch.unsqueeze(imgt[0], 0)))
    return inferences

### Getting Images Features
1) Features extracted from ResNet-50  
    - different layers  
    - different models  
    - bigger features  
    - SIFT or SURF features 
2) Using decompistions using PCA

In [90]:
def get_image_features(start_path = '.'):
    df = pd.DataFrame(columns=['img_name', 'dir', 'features1', 'features2', 'features3', 'features4'])
    print("function running")
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            print(f"fp = {fp}")
            if (fp.endswith('.jpg') or fp.endswith('.png') or fp.endswith('.mp4')):
                
#                 features = feature_extractor_output(fp)
#                 print(features)
                img = read_image(fp)
                imgt = transform(img)
                imgt = imgt.to(device)
                print(imgt.shape)
                with torch.no_grad():
                    inferences = dict_as_numpy(feature_extractor(torch.unsqueeze(imgt[0], 0)))
                
                print(feature_extractor(torch.unsqueeze(imgt[0], 0)).keys())

                df.loc[len(df.index)] = [f, dirpath, features['layer1.2'], features['layer2.3'], features['layer3.5'], features['layer4.2']]
            
            torch.cuda.empty_cache()
            break
    return df

In [91]:
reddit_resnet_df = get_image_features(DATASET)

function running
fp = /nethome/kravicha3/aryan/project/dataset/Reddit_Provenance_Datasets/data/_This_cat_plotting_to_kill_someone/g1327_czcqbl6.jpg
torch.Size([1, 3, 695, 1200])
dict_keys(['layer1.2', 'layer2.3', 'layer3.5', 'layer4.2'])


NameError: name 'features' is not defined

In [71]:
reddit_resnet_df

Unnamed: 0,img_name,dir,features1,features2,features3,features4


In [14]:
df = pd.read_pickle('./tuning/tune_result/multi_reddit_resnet_df.pkl')

In [16]:
df.head(1)

Unnamed: 0,img_name,dir,features1,features2,features3,features4
0,g1327_czcqbl6.jpg,/nethome/kravicha3/aryan/project/dataset/Reddi...,[[[[0.0944433 0.06187888 0.058054 0.0574760...,[[[[0.00759521 0.04283475 0.01936433 0.0193564...,[[[[0.25154373 0.19911413 0.14900559 0.1829795...,[[[[0.4043764 0.42666304 0.6235993 0.7488219...


In [17]:
del df

### Creating new Faiss Index
1) Different Faiss indexes:  
    - IndexFlatL2  
    - IndexIVFFlat  

In [16]:
df = pd.read_pickle('./tuning/tune_result/reddit_resnet_df.pkl')
features = df['features']

In [17]:
features = features.values.tolist()

In [18]:
features = np.array(features, dtype=np.float32)

In [21]:
features.reshape(features.shape[0], features.shape[2])

array([[0.7295329 , 0.30392087, 0.7336758 , ..., 0.10279972, 0.1377843 ,
        0.65775776],
       [0.12240556, 0.14182492, 0.24656458, ..., 0.22167432, 0.12675598,
        0.28419152],
       [0.10773073, 0.10070281, 0.01043681, ..., 0.04425399, 0.03005464,
        0.35249814],
       ...,
       [0.22710375, 0.29315758, 0.43143603, ..., 0.04265103, 0.14289662,
        0.18905915],
       [0.21410389, 0.2253808 , 0.5778347 , ..., 0.04721229, 0.15858082,
        0.19459999],
       [0.21681714, 0.20525728, 0.5063112 , ..., 0.04636855, 0.15533772,
        0.23273687]], dtype=float32)

In [9]:
dim = 2048
index = faiss.IndexFlatL2(dim)
index.train(features)

NameError: name 'features' is not defined