# Part 3: Computer Vision

Set up GPU enviornment using this guide: https://saturncloud.io/blog/how-to-run-jupyter-notebook-on-gpus/

# Imports

In [1]:
import torch

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from visual_genome import local as vg
from PIL import Image as PIL_Image
import requests
from io import StringIO
from io import BytesIO
%matplotlib inline

In [2]:
import fiftyone as fo
import fiftyone.zoo as foz

***Python 3.8 Deprecation Notice***
Python 3.8 will no longer be supported in new releases after October 1, 2024.
Please upgrade to Python 3.9 or later.
For additional details please see https://deprecation.voxel51.com


Checking GPU is being used

In [3]:
torch.cuda.is_available()

True

In [4]:
# Model (YOLO v5)
model = torch.hub.load("ultralytics/yolov5", "yolov5s")  # or yolov5n - yolov5x6, custom



Using cache found in C:\Users\adam/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-20 Python-3.8.20 torch-2.4.1 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


# Testing Visual Genome Dataset

In [5]:
ids = vg.get_all_image_data("VisualGenome-Test/visual_genome/data/annotations")
image = ids[0]
print(ids[0]) # print 1st image info

id: 1, coco_id: -1, flickr_id: -1, width: 800, url: https://cs.stanford.edu/people/rak248/VG_100K_2/1.jpg


In [6]:
regions = vg.get_all_region_descriptions("VisualGenome-Test/visual_genome/data/annotations")
print("The first region description is: ", regions[0][0].phrase) # first 0 means first image, second 0 means first region description
print("It is located in a bounding box specified by x:%d, y:%d, width:%d, height:%d" % (regions[0][0].x, regions[0][0].y, regions[0][0].width, regions[0][0].height))

The first region description is:  the clock is green in colour
It is located in a bounding box specified by x:421, y:57, width:82, height:139


In [7]:
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
def visualize_regions(image, regions):
    response = requests.get(image.url)
    img = PIL_Image.open(BytesIO(response.content))
    plt.imshow(img)
    ax = plt.gca()
    for region in regions:
        ax.add_patch(Rectangle((region.x, region.y),
                               region.width,
                               region.height,
                               fill=False,
                               edgecolor='red',
                               linewidth=3))
        ax.text(region.x, region.y, region.phrase, style='italic', bbox={'facecolor':'white', 'alpha':0.7, 'pad':10})
    fig = plt.gcf()
    plt.tick_params(labelbottom='off', labelleft='off')
    plt.title("Image 1 (limited to first 8 regions)")
    plt.show()

In [8]:
visualize_regions(image, regions[0][:8]) # for image 1 (index 0), show first 8 regions

In [9]:
regions = vg.get_all_region_descriptions("VisualGenome-Test/visual_genome/data/annotations")

# Extracting Classes from Dataset

In [10]:
import json

## Sample: Getting a set of objects that contains every possible class

In [11]:
classes = dict()
num_images = 10000 # Only go up to this image id
# classes.add("") len(classes)

input_json=open("VisualGenome-Test/visual_genome/data/annotations/objects.json")
loading = json.load(input_json)
x = 0
for data in loading:
    x += 1
    objs = data.get("objects")
    for i in objs:
        #print("Classes length:", len(classes), " | adding now: ", i.get("names")[0])
        classes[i.get("names")[0]] = classes.get(i.get("names")[0], 0) + 1
    if x >= num_images:
        print("Reached image", x, " - Stopping here")
        break
        

print("Classes length (appear 1+ time):", len(classes))

Reached image 10000  - Stopping here
Classes length (appear 1+ time): 12244


## Write to text file

Only including entries that appear multiple times (ex. 5) to get rid of most duplicates (spelling errors) or very specific entries

In [12]:
empty_list = open('vis_genome_classes.txt', 'w').close() # Clearing any old text in the text file

In [13]:
class_list = open('vis_genome_classes.txt', 'a')
class_count = 0
min_appear = 5

for key in classes:
    if classes[key] >= min_appear: # Only including classes that appear 3+ times
        class_list.write(key + '\n')
        class_count += 1

class_list.close()
print("Classes that appear", min_appear, "+ times:", class_count)

Classes that appear 5 + times: 2470


# Testing FiftyOne (https://docs.voxel51.com/index.html)

# Testing AlexNet

In [14]:
from torchvision import models # Pre-trained models
from torchvision import transforms # Image pre-processing
import torch
from PIL import Image # Import Pillow
from torchvision.models import AlexNet_Weights
from torchvision.models import ResNet152_Weights

In [None]:
resnet = models.re