In [1]:
!pip install torch torchvision

Collecting torchvision
  Downloading torchvision-0.19.0-1-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting numpy<2 (from torchvision)
  Downloading numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
     ---------------------------------------- 0.0/61.0 kB ? eta -:--:--
     ---------------------------------------- 0.0/61.0 kB ? eta -:--:--
     ------ --------------------------------- 10.2/61.0 kB ? eta -:--:--
     ------ --------------------------------- 10.2/61.0 kB ? eta -:--:--
     ------------------- ------------------ 30.7/61.0 kB 262.6 kB/s eta 0:00:01
     ------------------------- ------------ 41.0/61.0 kB 245.8 kB/s eta 0:00:01
     -------------------------------------- 61.0/61.0 kB 270.2 kB/s eta 0:00:00
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Using cached pillow-10.4.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Downloading torchvision-0.19.0-1-cp312-cp312-win_amd64.whl (1.3 MB)
   ---------------------------------------- 0.0/1.3 MB ? eta -:--:-


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


## Importing Faster RCNN for object detection

In [2]:
import torch 
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn

model=fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()



Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\USER/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:15<00:00, 11.0MB/s] 


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

## Creating the MotifNet model

In [3]:
import torch.nn as nn

class MotifNet(nn.Module):
    
    def __init__(self, num_classes,hidden_dim=256):
        super(MotifNet,self).__init__() ## initialising the base class

        #LSTM for object context

        self.obj_lstm=nn.LSTM(input_size=hidden_dim,
                              hidden_size=hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        #LSTM for relationship context

        self.rel_lstm=nn.LSTM(input_size=hidden_dim,
                        hidden_size=hidden_dim,
                        batch_first=True,
                        bidirectional=True)
        
        self.fc_obj=nn.Linear(hidden_dim*2,num_classes)
        self.fc_rel=nn.Linear(hidden_dim*2,num_classes)
    
    def forward(self, obj_features,rel_features):

        obj_lstm_out,_=self.obj_lstm(obj_features)
        rel_lstm_out,_=self.rel_lstm(rel_features)

        obj_scores=self.fc_obj(obj_lstm_out)
        rel_scores=self.fc_rel(rel_lstm_out)

        return obj_scores,rel_scores



## Integrating Object Detaction with MotifNet

In [None]:
from PIL import Image
from torchvision.transforms import functional as F

image_path="2.Knowledge_Graphs\2.Images\assets\SceneGraph.png"
image= Image.open(image_path)

#preprocess the image

image_tensor=F.to_tensor(image).unsqueeze(0)

#detecting objects

with torch.no_grad():
    detection=model(image_tensor)

#extracting features from Faster R-CNN

features=detection[0]['boxes']
labels=detection[0]['labels']

motif_net=MotifNet(num_classes=len(labels))

obj_scores,rel_scores=motif_net()
