In [None]:
#Download the Charades dataset
!wget http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip

#Unzip the file
!unzip /content/Charades_v1_480.zip

In [None]:
#Video (named F4BJJ) to image; starting at 7.70s and lasting 5.00s; 1 frame/second frame rate
!ffmpeg -ss 7.70 -t 5.00 -accurate_seek -i F4BJJ.mp4 -r 1 {'/content/F4BJJ/'}%d.jpg

#Video to keyframe
#!ffmpeg -i F4BJJ.mp4 -vf "select=eq(pict_type\,I)" -vsync vfr {'/content/F4BJJ/'}%d.jpg -hide_banner -loglevel quiet

In [None]:
#Check the GPU version; Colab randomly gives Nvidia K80, T4, P4 and P100
#If you get the error 'RuntimeError: CUDNN_STATUS_EXECUTION_FAILED' when you run the KERN model
#Create a new Colab file for another GPU version to avoid the error; T4 may fail to run
#This is due to CUDA 9.0 may be incompatible with T4
!nvidia-smi

Tue Jun 16 16:12:32 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
#Remeber to check the file size; Colab sometimes may fail to download files completely
#Download pretrained detector checkpoint
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=11zKRr2OF5oclFL47kjFYBOxScotQzArX' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=11zKRr2OF5oclFL47kjFYBOxScotQzArX" -O vg-faster-rcnn.tar && rm -rf /tmp/cookies.txt

#Download pretrained KERN model checkpoint
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1hAx4MpMiwofABQi9H6_Jb0Qjp016JX7T' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1hAx4MpMiwofABQi9H6_Jb0Qjp016JX7T" -O kern_sgdet.tar && rm -rf /tmp/cookies.txt

In [None]:
#Clone our github repository
!git clone https://github.com/Vinceeenttttt/KERN.git

#Create a new folder and move the KERN model checkpoint
!mkdir /content/KERN/Pipeline/checkpoint
!mv /content/kern_sgdet.tar /content/KERN/Pipeline/checkpoint

#Create a new folder and move the detector checkpoint
!mkdir /content/KERN/Pipeline/checkpoint/vgdet
!mv /content/vg-faster-rcnn.tar /content/KERN/Pipeline/checkpoint/vgdet

In [None]:
#Download and install Anaconda which can help to install pytorch
!wget -c https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh
!chmod +x Anaconda3-5.1.0-Linux-x86_64.sh
!bash ./Anaconda3-5.1.0-Linux-x86_64.sh -b -f -p /usr/local

#Download and install CUDA 9.0; Version used by KERN model
!wget https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
!dpkg -i cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
!apt-key add /var/cuda-repo-9-0-local/7fa2af80.pub
!apt-get update
!apt-get install cuda=9.0.176-1

In [None]:
#Check the CUDA version; should be 9.0 instead of the newest used by Colab
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2017 NVIDIA Corporation
Built on Fri_Sep__1_21:08:03_CDT_2017
Cuda compilation tools, release 9.0, V9.0.176


In [None]:
#Install pytorch 3, version used by KERN model
!conda install pytorch=0.3.0 torchvision=0.2.0 cuda90 -c pytorch

#Install libraries required by KERN model
!pip install pycocotools
!conda install dill
!conda install tqdm

In [None]:
#Install and update gcc/g++
!sudo apt-get install gcc-6 g++-6 -y

!sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-6 10
!sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 10

In [None]:
CUDA_VISIBLE_DEVICES=0

#Run the make file
%cd /content/KERN/Pipeline
!make

In [None]:
#Create a folder for data
!mkdir /content/KERN/Pipeline/data
!mkdir /content/KERN/Pipeline/data/stanford_filtered

#Download files for Visual Genome Dataset
#These files only contain information for Visual Genome; include them will make KERN model smoothly
!wget https://cvgl.stanford.edu/scene-graph/dataset/VG-SGG-dicts.json -P /content
!mv /content/VG-SGG-dicts.json /content/KERN/Pipeline/data/stanford_filtered

!wget https://cvgl.stanford.edu/scene-graph/VG/image_data.json -P /content
!mv /content/image_data.json /content/KERN/Pipeline/data/stanford_filtered

#Create a folder for image files we will use
!mkdir /content/KERN/Pipeline/data/VG_100K

In [None]:
#If you get the error with numpy when running the KERN model
#Run the folloing code to update the numpy package version
#!conda install numpy

In [None]:
#Run the KERN model in 'sgdet' mode
!/content/KERN/scripts/eval_kern_sgdet.sh

In [None]:
#Result of KERN model is saved in the caches folder as a pickel file
#Load the pickle file and save in data
import pickle
with open('/content/KERN/Pipeline/caches/kern_sgdet.pkl', 'rb') as f:
    data = pickle.load(f)

#Visualize the result
data

[{'obj_scores': array([8.8765651e-01, 8.8820976e-01, 6.4461130e-01, 5.4517323e-01,
         3.6605370e-01, 6.5438676e-01, 4.5007339e-01, 4.8734048e-01,
         3.1819612e-01, 3.0267492e-01, 5.0546199e-01, 2.6032728e-01,
         2.2865607e-01, 1.9720757e-01, 1.7216684e-01, 1.0767888e-01,
         2.4717368e-01, 9.6307620e-02, 2.1394186e-03, 7.9349667e-02,
         4.6841189e-02, 1.0554241e-01, 1.4544404e-01, 5.6758475e-02,
         1.0946512e-01, 6.2522247e-02, 1.2722552e-01, 2.7419558e-02,
         7.5420268e-02, 2.9548031e-01, 5.3908765e-02, 3.1585071e-02,
         1.2097321e-02, 6.6080103e-03, 1.2918929e-02, 2.6912779e-02,
         2.3021160e-02, 1.0911269e-02, 4.3552506e-04, 2.0764299e-02,
         1.3004647e-03, 3.1412967e-02, 2.2053596e-02, 3.9625883e-02,
         2.1191113e-02, 1.3177868e-02, 1.8990537e-02, 2.4225531e-02,
         1.0849449e-02, 1.5366188e-02, 1.8613078e-03, 4.5619361e-02,
         8.3798366e-03, 2.1536911e-02, 1.9120047e-02, 1.6453885e-03,
         9.3567325e-

In [None]:
#Take out all modules of the first frame
obj_scores = data[0]['obj_scores']
pred_boxes = data[0]['pred_boxes']
pred_classes = data[0]['pred_classes']
pred_rel_inds = data[0]['pred_rel_inds']
rel_scores = data[0]['rel_scores']

In [None]:
#Find the first object node with 'man' class, the number should be 78
#Check out the number of total possible relationships
print(pred_classes[5])
print(pred_rel_inds.shape)

78
(1506, 2)


In [None]:
#Find all relationships with object node 'man'
#Set a threshold to find meaningful relations; 0.25 is used here
import numpy as np
man_rel = []
for i in range (1506):
  if(pred_rel_inds[i][0] == 5):
    other_scores = rel_scores[i]
    other_scores[0] = 0
    if(np.array(other_scores).max() > 0.25):
      man_rel.append(pred_rel_inds[i])

print(len(man_rel))
print(man_rel)

8
[array([ 5, 10]), array([5, 4]), array([ 5, 24]), array([ 5, 30]), array([ 5, 60]), array([ 5, 56]), array([ 5, 50]), array([ 5, 40])]


In [None]:
#Find all relationships with object node 'man'
#Find attributes of selected nodes and links
#Remenber to run the next block first
dict1 = {}

for p in range(len(data)):
  pred_entry = data[p]
  relations = []
  man_rel = []

  for i in range(len(pred_entry['pred_classes'])):
    if(pred_entry['pred_classes'][i] == 78):
      first_man = i
      break
  
  for j in range(len(pred_entry['rel_scores'])):
    if(pred_entry['pred_rel_inds'][j][0] == first_man):
      other_entry = pred_entry['rel_scores'][j]
      other_entry[0] = 0
      if(np.array(other_entry).max() > 0.25):
        node1_index = pred_entry['pred_rel_inds'][j][0]
        node2_index = pred_entry['pred_rel_inds'][j][1]
        node1 = pred_entry['pred_classes'][node1_index]
        node2 = pred_entry['pred_classes'][node2_index]
        rel_index = np.argmax(np.array(pred_entry['rel_scores'][j]))
        if ((node1,rel_index,node2) not in relations):
          relations.append((node1,rel_index,node2))

  sentence=""
  for i,j,k in relations:
    sentence+=jso['idx_to_label'][str(i)]+" "+jso['idx_to_predicate'][str(j)]+" "+jso['idx_to_label'][str(k)]+". "
  dict1[p] = sentence

print(dict1)

{0: 'man wearing shirt. man has hand. man wearing jacket. man wearing pant. man has hair. man has nose. man wearing hat. '}


In [None]:
#Object and Relation Classes
jso={
"idx_to_label": {
"1": "airplane",
"2": "animal",
"3": "arm",
"4": "bag",
"5": "banana",
"6": "basket",
"7": "beach",
"8": "bear",
"9": "bed",
"10": "bench",
"11": "bike",
"12": "bird",
"13": "board",
"14": "boat",
"15": "book",
"16": "boot",
"17": "bottle",
"18": "bowl",
"19": "box",
"20": "boy",
"21": "branch",
"22": "building",
"23": "bus",
"24": "cabinet",
"25": "cap",
"26": "car",
"27": "cat",
"28": "chair",
"29": "child",
"30": "clock",
"31": "coat",
"32": "counter",
"33": "cow",
"34": "cup",
"35": "curtain",
"36": "desk",
"37": "dog",
"38": "door",
"39": "drawer",
"40": "ear",
"41": "elephant",
"42": "engine",
"43": "eye",
"44": "face",
"45": "fence",
"46": "finger",
"47": "flag",
"48": "flower",
"49": "food",
"50": "fork",
"51": "fruit",
"52": "giraffe",
"53": "girl",
"54": "glass",
"55": "glove",
"56": "guy",
"57": "hair",
"58": "hand",
"59": "handle",
"60": "hat",
"61": "head",
"62": "helmet",
"63": "hill",
"64": "horse",
"65": "house",
"66": "jacket",
"67": "jean",
"68": "kid",
"69": "kite",
"70": "lady",
"71": "lamp",
"72": "laptop",
"73": "leaf",
"74": "leg",
"75": "letter",
"76": "light",
"77": "logo",
"78": "man",
"79": "men",
"80": "motorcycle",
"81": "mountain",
"82": "mouth",
"83": "neck",
"84": "nose",
"85": "number",
"86": "orange",
"87": "pant",
"88": "paper",
"89": "paw",
"90": "people",
"91": "person",
"92": "phone",
"93": "pillow",
"94": "pizza",
"95": "plane",
"96": "plant",
"97": "plate",
"98": "player",
"99": "pole",
"100": "post",
"101": "pot",
"102": "racket",
"103": "railing",
"104": "rock",
"105": "roof",
"106": "room",
"107": "screen",
"108": "seat",
"109": "sheep",
"110": "shelf",
"111": "shirt",
"112": "shoe",
"113": "short",
"114": "sidewalk",
"115": "sign",
"116": "sink",
"117": "skateboard",
"118": "ski",
"119": "skier",
"120": "sneaker",
"121": "snow",
"122": "sock",
"123": "stand",
"124": "street",
"125": "surfboard",
"126": "table",
"127": "tail",
"128": "tie",
"129": "tile",
"130": "tire",
"131": "toilet",
"132": "towel",
"133": "tower",
"134": "track",
"135": "train",
"136": "tree",
"137": "truck",
"138": "trunk",
"139": "umbrella",
"140": "vase",
"141": "vegetable",
"142": "vehicle",
"143": "wave",
"144": "wheel",
"145": "window",
"146": "windshield",
"147": "wing",
"148": "wire",
"149": "woman",
"150": "zebra"
},
"idx_to_predicate": {
"1": "above",
"2": "across",
"3": "against",
"4": "along",
"5": "and",
"6": "at",
"7": "attached to",
"8": "behind",
"9": "belonging to",
"10": "between",
"11": "carrying",
"12": "covered in",
"13": "covering",
"14": "eating",
"15": "flying in",
"16": "for",
"17": "from",
"18": "growing on",
"19": "hanging from",
"20": "has",
"21": "holding",
"22": "in",
"23": "in front of",
"24": "laying on",
"25": "looking at",
"26": "lying on",
"27": "made of",
"28": "mounted on",
"29": "near",
"30": "of",
"31": "on",
"32": "on back of",
"33": "over",
"34": "painted on",
"35": "parked on",
"36": "part of",
"37": "playing",
"38": "riding",
"39": "says",
"40": "sitting on",
"41": "standing on",
"42": "to",
"43": "under",
"44": "using",
"45": "walking in",
"46": "walking on",
"47": "watching",
"48": "wearing",
"49": "wears",
"50": "with"
}
}

In [None]:
#Return man-object relations with boudning boxes
last_graph = data[6]
last_man_rel = []
max_man_rel = []

for i in range (len(last_graph['pred_classes'])):
  if(last_graph['pred_classes'][i] == 78):
    first_man = i
    break

for i in range (len(last_graph['rel_scores'])):
  if(last_graph['pred_rel_inds'][i][0] == first_man):
    other_scores = last_graph['rel_scores'][i]
    other_scores[0] = 0
    node_index = last_graph['pred_rel_inds'][i][1]
    node = last_graph['pred_classes'][node_index]
    rel_index = np.argmax(other_scores)
    max_val = np.array(other_scores).max()
    box = last_graph['pred_boxes'][node_index]
    max_man_rel.append((max_val, node, rel_index, box))

max_man_rel.sort(reverse=True)
for i in range (10):
  class_index = max_man_rel[i][1]
  rel_index = max_man_rel[i][2]
  jso_1 = jso['idx_to_label'][str(class_index)]
  jso_2 = jso['idx_to_predicate'][str(rel_index)]
  box = max_man_rel[i][3]
  last_man_rel.append((class_index, jso_1, rel_index, jso_2, box))

print(last_man_rel)

In [None]:
#Organize the relations
man_box = last_graph['pred_boxes'][node_index]
inputs = np.append(78, man_box)

for i in range (10):
  class_in = last_man_rel[i][0]
  box_in = last_man_rel[i][4]
  new_input = np.append(class_in, box_in)
  inputs = np.append(inputs, new_input, axis=0)

print(inputs)
inputs.resize((11, 5))
print(inputs)