<a href="https://colab.research.google.com/github/jontooy/vl_demos/blob/master/feature_extraction_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VinVL_feature_extraction


In [21]:
import sys
import os
import os.path as op
import yaml
import pandas as pd
import ast
import json
import base64

import numpy as np
np.set_printoptions(suppress=True, precision=4)

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Install **sgg**:
https://github.com/microsoft/scene_graph_benchmark

In [None]:
# Git clone repos
% cd /content/drive/MyDrive

! git clone https://github.com/microsoft/scene_graph_benchmark.git

# get model file to your mount disk
! wget https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/vinvl_vg_x152c4.pth -P /content/drive/MyDrive/scene_graph_benchmark/pretrained_model

# get object detection mapping
! wget https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/VG-SGG-dicts-vgoi6-clipped.json -P /content/drive/MyDrive/scene_graph_benchmark/visualgenome/

% cd /content/drive/MyDrive/scene_graph_benchmark/

# maskrcnn_benchmark and coco api dependencies
! pip install ninja yacs>=0.1.8 cython matplotlib tqdm opencv-python numpy>=1.19.5

! pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
! pip install timm
! pip install einops
! pip install -U PyYAML

# install pycocotools
! pip install pycocotools

# install cityscapesScripts
! python -m pip install cityscapesscripts

# the following will install the lib with
# symbolic links, so that you can modify
# the files if you want and won't need to
# re-build it
! python setup.py build develop

## Image extraction pipeline

1. Drop images to process into /content/drive/MyDrive/scene_graph_benchmark/tools/mini_tsv/images OR change data_path in tools\tsv_demo.py to image folder

In [17]:
TSV_DIR = './tools/mini_tsv/data'
# Create .tsv files for our images
! python /content/drive/MyDrive/scene_graph_benchmark/tools/mini_tsv/tsv_demo.py

# Create .yaml file for connecting .tsv files
yaml_dict = {"img" : "train.tsv",
            "label" :  "train.label.tsv",
            "hw" : "train.hw.tsv",
            "linelist" : "train.linelist.tsv"}
with open(op.join(TSV_DIR, 'train.yaml'), 'w') as file:
        yaml.dump(yaml_dict, file)

0it [00:00, ?it/s]3it [00:00, 3744.91it/s]


2. Configure sgg_configs/vgattr/vinvl_x152c4.yaml and make sure os.path.join(DATA_DIR, DATASETS.TEST) is to your dataset yaml file. Current settings:

  + DATASETS.TEST: ("train.yaml",)
  + OUTPUT_DIR: "output/"
  + DATA_DIR: "tools/mini_tsv/data/"

If you have problems loading the label map, edit line 37 in maskrcnn_benchmark/data/datasets/relation_tsv.py to an absolute path:

```
open('/content/drive/MyDrive/scene_graph_benchmark/visualgenome/VG-SGG-dicts-vgoi6-clipped.json', 'r')
```

In [None]:
# extract vision features with VinVL object-attribute detection model
# pretrained models at https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/vinvl_vg_x152c4.pth
# the associated labelmap at https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/VG-SGG-dicts-vgoi6-clipped.json
! python tools/test_sg_net.py \
  --config-file sgg_configs/vgattr/vinvl_x152c4.yaml \
  TEST.IMS_PER_BATCH 2 \
  MODEL.WEIGHT pretrained_model/vinvl_vg_x152c4.pth \
  MODEL.ROI_HEADS.NMS_FILTER 1 \
  MODEL.ROI_HEADS.SCORE_THRESH 0.2 \
  TEST.OUTPUT_FEATURE True \
  OUTPUT_DIR output \
  DATA_DIR tools/mini_tsv/data \
  TEST.IGNORE_BOX_REGRESSION True \
  MODEL.ATTRIBUTE_ON True

In [24]:
# Load height and width of every image
hw_df = pd.read_csv(op.join(TSV_DIR, 'train.hw.tsv'),sep='\t',header=None,converters={1:ast.literal_eval},index_col=0)

# Directory of out predictions.tsv (bbox_id, class, conf, feature, rect)
sg_tsv = './output/inference/vinvl_vg_x152c4/predictions.tsv'
df = pd.read_csv(sg_tsv,sep='\t',header = None,converters={1:json.loads})#converters={1:ast.literal_eval})
df[1] = df[1].apply(lambda x: x['objects'])

# Help functions
def generate_additional_features(rect,h,w):
    mask = np.array([w,h,w,h],dtype=np.float32)
    rect = np.clip(rect/mask,0,1)
    res = np.hstack((rect,[rect[3]-rect[1], rect[2]-rect[0]]))
    return res.astype(np.float32)

def generate_features(x):
    #image_id, object data list of dictionary, number of detected objects
    idx, data,num_boxes = x[0],x[1],len(x[1])
    # read image height, width, and initialize array of features
    h,w,features_arr = hw_df.loc[idx,1][0]['height'],hw_df.loc[idx,1][0]['width'],[]

    # for every detected object in img
    for i in range(num_boxes):
        # read image region feature vector
        features = np.frombuffer(base64.b64decode(data[i]['feature']),np.float32)
        # add 6 additional dimensions
        pos_feat = generate_additional_features(data[i]['rect'],h,w)
        # stack feature vector with 6 additional dimensions
        x = np.hstack((features,pos_feat))
        features_arr.append(x.astype(np.float32))
        
    features = np.vstack(tuple(features_arr))
    print(features.shape)
    features = base64.b64encode(features).decode("utf-8")
    return {"features":features, "num_boxes":num_boxes}

def generate_labels(x):
    data = x[1]
    res = [{"class":el['class'].capitalize(),"conf":el['conf'], "rect": el['rect']} for el in data] 
    return res

# Generate features from predictions.tsv
df['feature'] = df.apply(generate_features,axis=1)
df['feature'] = df['feature'].apply(json.dumps)

df['label'] = df.apply(generate_labels,axis=1)
df['label'] = df['label'].apply(json.dumps)

(63, 2054)
(44, 2054)
(29, 2054)


3. Make sure you change the OUTPUT_DIR and TYPE variable in the script below to your dataset folder

In [25]:
# Load height and width of every image
hw_df = pd.read_csv(op.join(TSV_DIR, 'train.hw.tsv'),sep='\t',header=None,converters={1:ast.literal_eval},index_col=0)

# Directory of out predictions.tsv (bbox_id, class, conf, feature, rect)
sg_tsv = './output/inference/vinvl_vg_x152c4/predictions.tsv'
df = pd.read_csv(sg_tsv,sep='\t',header = None,converters={1:json.loads})#converters={1:ast.literal_eval})
df[1] = df[1].apply(lambda x: x['objects'])

# Help functions
def generate_additional_features(rect,h,w):
    mask = np.array([w,h,w,h],dtype=np.float32)
    rect = np.clip(rect/mask,0,1)
    res = np.hstack((rect,[rect[3]-rect[1], rect[2]-rect[0]]))
    return res.astype(np.float32)

def generate_features(x):
    #image_id, object data list of dictionary, number of detected objects
    idx, data,num_boxes = x[0],x[1],len(x[1])
    # read image height, width, and initialize array of features
    h,w,features_arr = hw_df.loc[idx,1][0]['height'],hw_df.loc[idx,1][0]['width'],[]

    # for every detected object in img
    for i in range(num_boxes):
        # read image region feature vector
        features = np.frombuffer(base64.b64decode(data[i]['feature']),np.float32)
        # add 6 additional dimensions
        pos_feat = generate_additional_features(data[i]['rect'],h,w)
        # stack feature vector with 6 additional dimensions
        x = np.hstack((features,pos_feat))
        features_arr.append(x.astype(np.float32))
        
    features = np.vstack(tuple(features_arr))
    features = base64.b64encode(features).decode("utf-8")
    return {"features":features, "num_boxes":num_boxes}

def generate_labels(x):
    data = x[1]
    res = [{"class":el['class'].capitalize(),"conf":el['conf'], "rect": el['rect']} for el in data] 
    return res

# Generate features from predictions.tsv
df['feature'] = df.apply(generate_features,axis=1)
df['feature'] = df['feature'].apply(json.dumps)

df['label'] = df.apply(generate_labels,axis=1)
df['label'] = df['label'].apply(json.dumps)

# Generate train/test/val.label.tsv and train/test/val.feature.tsv
TYPE = 'train'
include_caption = True
OUTPUT_DIR = '/content/drive/MyDrive/dataset'
LABEL_FILE = os.path.join(OUTPUT_DIR, TYPE+'.label.tsv')
FEATURE_FILE = os.path.join(OUTPUT_DIR, TYPE+'.feature.tsv')
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    print(f"path to {OUTPUT_DIR} created")
from maskrcnn_benchmark.structures.tsv_file_ops import tsv_reader, tsv_writer
tsv_writer(df[[0,'label']].values.tolist(),LABEL_FILE)
tsv_writer(df[[0,'feature']].values.tolist(),FEATURE_FILE)

# Generate TYPE.yaml for vinvl run_captioning
yaml_dict = {"img" : TYPE+".img.tsv",
             "hw" : TYPE+".hw.tsv",
             "label": TYPE+".label.tsv",
             "feature": TYPE+".feature.tsv"}
if include_caption:
  yaml_dict["caption"] = TYPE+"_caption.json" 
with open(op.join(OUTPUT_DIR, TYPE+'.yaml'), 'w') as file:
        yaml.dump(yaml_dict, file)

path to /content/drive/MyDrive/dataset created
