# Notebook for creating bounding box predictions and exporting to label studio friendly format

In [146]:
import os
import json
from ultralytics import YOLO
from PIL import Image

In [148]:
# trained_model = YOLO("./runs/detect/train72/weights/best.pt")
trained_model = YOLO("./runs/detect/train13/weights/best.pt")

In [153]:
pred_images_dir = "/Users/connorparish/code/hindsight_parsing/data/label_studio/twitter-2024-09-12-21-06-e4a60d42/images"
images = list()
for f in os.listdir(pred_images_dir):
    im = Image.open(os.path.join(pred_images_dir, f))
    images.append(im)

In [154]:
results = trained_model(images)


0: 640x320 4 tweets, 189.2ms
1: 640x320 3 tweets, 189.2ms
2: 640x320 1 plus_button, 4 tweets, 189.2ms
3: 640x320 3 tweets, 189.2ms
4: 640x320 3 tweets, 189.2ms
5: 640x320 3 tweets, 189.2ms
6: 640x320 1 discover_new_communities, 3 tweets, 189.2ms
7: 640x320 3 tweets, 189.2ms
8: 640x320 3 tweets, 189.2ms
9: 640x320 3 tweets, 189.2ms
10: 640x320 3 tweets, 189.2ms
11: 640x320 1 more_posted, 3 tweets, 1 tweet_ad, 189.2ms
12: 640x320 1 plus_button, 3 tweets, 189.2ms
13: 640x320 1 plus_button, 5 tweets, 189.2ms
14: 640x320 3 tweets, 189.2ms
15: 640x320 1 quoted_tweet, 4 tweets, 2 partial_tweets, 189.2ms
16: 640x320 3 tweets, 189.2ms
17: 640x320 1 more_posted, 3 tweets, 189.2ms
18: 640x320 1 more_posted, 3 tweets, 189.2ms
19: 640x320 1 more_posted, 1 plus_button, 3 tweets, 189.2ms
20: 640x320 1 new_posts, 5 tweets, 189.2ms
21: 640x320 1 more_posted, 4 tweets, 189.2ms
22: 640x320 1 tweet, 1 tweet_ad, 189.2ms
23: 640x320 1 new_posts, 2 tweets, 189.2ms
24: 640x320 1 more_posted, 2 tweets, 1 twee

In [155]:
all_preds = list()

In [156]:
for result in results:
    image_preds_d = {}
    image_name = os.path.basename(result.path)
    image_preds_d['data'] = {"image" : f"/data/upload/2/{image_name}"}
    predictions_d = {"model_version": "train13", "score": 0.55}
    org_width = result.orig_shape[1]
    org_height = result.orig_shape[0]
    result_d_template = {"type": "rectanglelabels",        
            "from_name": "label", "to_name": "image",
            "original_width": org_width, "original_height": org_height,
            "image_rotation": 0}
    converted_results = list()
    for i, box in enumerate(result.boxes):
        result_d = result_d_template.copy()
        result_d['id'] = f"result{i}"
        value_d = {"rotation" : 0,
                "x" : ((float(box.xyxyn[0][0])) * 100),
                "y" : (float(box.xyxyn[0][1])) * 100,
                "width" : ((float(box.xyxyn[0][2]) - float(box.xyxyn[0][0])) * 100), 
                "height": (float(box.xyxyn[0][3]) - float(box.xyxyn[0][1])) * 100, 
                "rectanglelabels": [result.names[int(box.cls[0])]]}
        result_d['value'] = value_d
        converted_results.append(result_d)
        
    predictions_d["result"] = converted_results
    image_preds_d['predictions'] = [predictions_d]
    all_preds.append(image_preds_d)

In [157]:
with open("first_preds.json", 'w') as outfile:
    json.dump(all_preds, outfile)

# Testing

In [None]:
# im = Image.open('/Users/connorparish/code/hindsight_parsing/data/annotations/twitter/twitter-2024-09-10-16-26-edf2ddd4/images/72312233-com-twitter-android_1725246837795.jpg')
im = Image.open('/Users/connorparish/code/hindsight_parsing/data/annotations/twitter/twitter-2024-09-10-16-26-edf2ddd4/images/202dbdd9-com-twitter-android_1719163900778.jpg')

In [149]:
results = trained_model(im)


0: 640x320 4 tweets, 62.9ms
Speed: 2.7ms preprocess, 62.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 320)


In [150]:
result = results[0]

In [151]:
result.show()

In [135]:
image_preds_d = {}
image_name = os.path.basename(result.path)
image_preds_d['data'] = {"image" : f"/data/upload/2/{image_name}"}
predictions_d = {"model_version": "one", "score": 0.55}
# predictions_d = {"model_version": "one", "score": 0.56, "cluster": 1,
#                  "neighbors" : 1, "model" : "yolov9", "model_run" : "5"}
org_width = result.orig_shape[1]
org_height = result.orig_shape[0]
result_d_template = {"type": "rectanglelabels",        
        "from_name": "label", "to_name": "image",
        "original_width": org_width, "original_height": org_height,
        "image_rotation": 0}
converted_results = list()
for i, box in enumerate(result.boxes):
    result_d = result_d_template.copy()
    result_d['id'] = f"result{i}"
    # value_d = {"rotation" : 0,
    #            "x" : float(box.xywh[0][0]),
    #            "y" : float(box.xywh[0][1]),
    #            "width" : float(box.xywh[0][2]), 
    #            "height": float(box.xywh[0][3]), 
    #            "rectanglelabels": [result.names[int(box.cls[0])]]}
    value_d = {"rotation" : 0,
               "x" : ((float(box.xyxyn[0][0])) * 100),
               "y" : (float(box.xyxyn[0][1])) * 100,
               "width" : ((float(box.xyxyn[0][2]) - float(box.xyxyn[0][0])) * 100), 
               "height": (float(box.xyxyn[0][3]) - float(box.xyxyn[0][1])) * 100, 
               "rectanglelabels": ["tweet"]}
    result_d['value'] = value_d
    converted_results.append(result_d)
    
predictions_d["result"] = converted_results
image_preds_d['predictions'] = [predictions_d]

In [136]:
result.boxes.xywh

tensor([[ 559.5109,  691.9374, 1056.9783,  827.5319],
        [ 551.9160, 1976.3483, 1072.1681,  847.3035],
        [ 549.3012, 1351.8394, 1077.3976,  384.0614],
        [ 548.9635,  139.1507, 1078.0730,  278.3014]])

In [137]:
result.boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0., 0., 0., 0.])
conf: tensor([1.0000, 0.9999, 0.9944, 0.9898])
data: tensor([[3.1022e+01, 2.7817e+02, 1.0880e+03, 1.1057e+03, 1.0000e+00, 0.0000e+00],
        [1.5832e+01, 1.5527e+03, 1.0880e+03, 2.4000e+03, 9.9990e-01, 0.0000e+00],
        [1.0602e+01, 1.1598e+03, 1.0880e+03, 1.5439e+03, 9.9444e-01, 0.0000e+00],
        [9.9271e+00, 0.0000e+00, 1.0880e+03, 2.7830e+02, 9.8982e-01, 0.0000e+00]])
id: None
is_track: False
orig_shape: (2400, 1088)
shape: torch.Size([4, 6])
xywh: tensor([[ 559.5109,  691.9374, 1056.9783,  827.5319],
        [ 551.9160, 1976.3483, 1072.1681,  847.3035],
        [ 549.3012, 1351.8394, 1077.3976,  384.0614],
        [ 548.9635,  139.1507, 1078.0730,  278.3014]])
xywhn: tensor([[0.5143, 0.2883, 0.9715, 0.3448],
        [0.5073, 0.8235, 0.9854, 0.3530],
        [0.5049, 0.5633, 0.9903, 0.1600],
        [0.5046, 0.0580, 0.9909, 0.1160]])
xyxy: tensor([[  31.0217,  278.1715, 1088.0000, 1105.70

In [138]:
all_preds = [image_preds_d]

In [139]:
with open("test_preds.json", 'w') as outfile:
    json.dump(all_preds, outfile)

In [77]:
with open("test_preds.json", 'r') as infile:
    test_d = json.load(infile)

In [127]:
result.show()