In [105]:
import os
import json

import pandas as pd
from PIL import Image

import sys
sys.path.insert(0, "../../")
sys.path.insert(0, "../../../hindsight/hindsight_server/")
from annotation_helpers import visualize_annotations, annotations_to_label_studio

In [120]:
project_name = "tweet_parse-2024-09-21-01-13-dc3535d9"

In [121]:
PROJECTS_DIR = os.path.abspath("/Users/connorparish/code/hindsight_parsing/data/label_studio/")
project_dir = os.path.join(PROJECTS_DIR, project_name)

In [122]:
with open(os.path.join(project_dir, "notes.json"), 'r') as infile:
    project_notes = json.load(infile)

id_to_entity = {}
for cat in project_notes['categories']:
    id_to_entity[cat['id']] = cat['name']

In [123]:
labels_dir = os.path.join(project_dir, "labels")
labels_list = list()
for f in os.listdir(labels_dir):
    with open(os.path.join(labels_dir, f), 'r') as infile:
        for line in infile.readlines():
            line_s = line.split(" ")
            d = {"labels_f" : f,
                 "label_id" : int(line_s[0]),
                 "x_mid_n" : float(line_s[1]),
                 "y_mid_n" : float(line_s[2]),
                 "width_n" : float(line_s[3]),
                 "height_n" : float(line_s[4])}
            labels_list.append(d)

In [124]:
images_dir = os.path.join(project_dir, "images")
im_to_width = {}
im_to_height = {}
for f in os.listdir(images_dir):
    im_path = os.path.join(images_dir, f)
    im_name = f.split(".")[0]
    im = Image.open(im_path)
    im_to_width[im_name] = im.width
    im_to_height[im_name] = im.height

In [125]:
labels_df = pd.DataFrame(labels_list)

In [126]:
labels_df['label'] = labels_df['label_id'].map(id_to_entity)
labels_df['image_basename'] = labels_df['labels_f'].apply(lambda x: x.split('.')[0])
labels_df['image_width'] = labels_df['image_basename'].map(im_to_width)
labels_df['image_height'] = labels_df['image_basename'].map(im_to_height)
labels_df['x'] = labels_df.apply(lambda row: (row['x_mid_n'] - (row['width_n'] / 2)) * row['image_width'], axis=1)
labels_df['y'] = labels_df.apply(lambda row: (row['y_mid_n'] - (row['height_n'] / 2)) * row['image_height'], axis=1)
labels_df['w'] = labels_df.apply(lambda row: row['width_n'] * row['image_width'], axis=1)
labels_df['h'] = labels_df.apply(lambda row: row['height_n'] * row['image_height'], axis=1)

# Try to find issues with labels

In [127]:
check_tweets = set()
for im_basename in set(labels_df['image_basename']):
    tweet_annotations = labels_df.loc[labels_df['image_basename'] == im_basename]

    if "quoted_tweet" not in set(tweet_annotations['label']):
        multi_labels = tweet_annotations.groupby(['label']).x.count().sort_values(ascending=False).iloc[0]
        if multi_labels > 1:
            check_tweets.add(im_basename)

In [128]:
len(check_tweets)

1

In [57]:
check_labels = labels_df.loc[labels_df['image_basename'].isin(check_tweets)]

# Create tasks to fix annotations

In [87]:
an_df = pd.read_csv(os.path.join(project_dir, "tweet_parse-2024-09-21-00-16-dc28421c.csv"))
an_df['image_basename'] = an_df['image'].apply(lambda x: os.path.basename(x).split(".")[0])
image_basename_to_image_path = {b : i.replace("/data/local-files/?d=", "") for b, i in zip(an_df['image_basename'], an_df['image'])}

In [102]:
check_labels['model_name'] = "conbot"
check_labels["model_version"] = "v0.0"
check_labels['index_value'] = check_labels.index
check_labels['index_value'] = check_labels['index_value'].astype("str")
check_labels["id"] = check_labels['image_basename'] + "_fix_manual_" + check_labels['index_value']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  check_labels['model_name'] = "conbot"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  check_labels["model_version"] = "v0.0"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  check_labels['index_value'] = check_labels.index
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [103]:
all_preds = list()
for image_basename in check_tweets:
    tweet_annotations = check_labels.loc[check_labels['image_basename'] == image_basename]
    im_path = "/" + image_basename_to_image_path[image_basename]
    im_path = im_path.replace("//", "/")
    im = Image.open(im_path)
    preds_d = annotations_to_label_studio(tweet_annotations, im_path, im.width, im.height)
    all_preds.append(preds_d)

with open("manual_annotation_fixes.json", 'w') as outfile:
    json.dump(all_preds, outfile)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotations['model_name_version'] = annotations['model_name'] + "-" + annotations['model_version']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotations['model_name_version'] = annotations['model_name'] + "-" + annotations['model_version']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotat

# Validation

In [115]:
check_tweets

{'com-twitter-android_1725224165003-19860',
 'com-twitter-android_1725994180952_66'}

In [118]:
im_basename = "com-twitter-android_1725994180952_66"
im = Image.open(os.path.join(images_dir, f"{im_basename}.jpg"))
test_annotations = labels_df.loc[labels_df['image_basename'] == im_basename]
im_viz = visualize_annotations(im, annotations=test_annotations)

In [119]:
im_viz.show()

In [41]:
test_annotations

Unnamed: 0,labels_f,label_id,x_mid_n,y_mid_n,width_n,height_n,label,image_basename,image_width,image_height,x,y,w,h
23,com-twitter-android_1726169325841_1730.txt,18,0.226412,0.168029,0.164257,0.180747,username,com-twitter-android_1726169325841_1730,1088,283,156.9805,21.976579,178.711816,51.151488
24,com-twitter-android_1726169325841_1730.txt,3,0.707365,0.829871,0.129183,0.1953,impresssions,com-twitter-android_1726169325841_1730,1088,283,699.338032,207.218562,140.550644,55.269811
25,com-twitter-android_1726169325841_1730.txt,16,0.076823,0.2892,0.113896,0.404733,user_image,com-twitter-android_1726169325841_1730,1088,283,21.624352,24.573996,123.918969,114.539469
26,com-twitter-android_1726169325841_1730.txt,4,0.549671,0.823132,0.13798,0.199893,likes,com-twitter-android_1726169325841_1730,1088,283,522.981028,204.661356,150.122061,56.569718
27,com-twitter-android_1726169325841_1730.txt,11,0.67245,0.162738,0.075027,0.179951,time_since_post,com-twitter-android_1726169325841_1730,1088,283,690.811054,20.59185,81.62891,50.926122
28,com-twitter-android_1726169325841_1730.txt,9,0.207372,0.818607,0.129368,0.208566,replies,com-twitter-android_1726169325841_1730,1088,283,155.244778,202.153793,140.752003,59.024248
29,com-twitter-android_1726169325841_1730.txt,19,0.32983,0.163622,0.043577,0.155461,verified_check,com-twitter-android_1726169325841_1730,1088,283,335.148972,24.307307,47.412052,43.995564
30,com-twitter-android_1726169325841_1730.txt,15,0.506744,0.164793,0.222501,0.182234,user_handle,com-twitter-android_1726169325841_1730,1088,283,430.296515,20.850212,242.081488,51.572337
31,com-twitter-android_1726169325841_1730.txt,10,0.380385,0.829701,0.134038,0.202706,retweets,com-twitter-android_1726169325841_1730,1088,283,340.941879,206.122506,145.833508,57.36586
32,com-twitter-android_1726169325841_1730.txt,13,0.530855,0.487112,0.77834,0.396663,tweet_text,com-twitter-android_1726169325841_1730,1088,283,154.153825,81.724716,846.833625,112.255699
