In [1]:
# Import libraries

import os
import cv2
import json
import tensorflow as tf
import numpy as np
import pickle

2023-11-27 16:38:18.902923: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-27 16:38:18.939456: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


2023-11-27 16:38:20.340102: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1233 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Ti, pci bus id: 0000:65:00.0, compute capability: 8.6


In [3]:
image_list = []
class Data_Loader():
    def __init__(self, data_file_path, label_file_path, n_images):
        
        self.data_file_path = data_file_path
        self.label_file_path = label_file_path
        self.n_images = n_images
    
    
    def load_image_data(self): 
        X = []
        Y = [] 
        count = 0
        
        for n, file in enumerate(os.listdir(self.data_file_path)):
            if count < 3500:
                file_path = os.path.join(self.data_file_path, file)
                if file.endswith('.jpg'):
                    img_annotations = self.__load_annotations(file)
                    if (img_annotations is not None) & (img_annotations['chart-type']=='scatter'):
                        Y.append(img_annotations)
                        img = cv2.imread(file_path)
                        X.append(img)
                        n+=1
                        count+=1
                        image_list.append(file)
                    if len(X) >= self.n_images :
                        return X, Y
            else:
                pass
        print('Total images loaded', count)
        return X, Y
    
    
    def __load_annotations(self, image_file_name):
        file_name = image_file_name.split('.jpg')[0]
        json_file_name = file_name + '.json'
        json_file_path = os.path.join(self.label_file_path, json_file_name)
        if os.path.isfile(json_file_path):
            f = open(json_file_path)
            return json.load(f)
        else:
            return None
        

In [4]:
class Image_Processor():
    
    def __init__(self, images):
        self.images = images
        self.min_width, self.min_height = self.__find_smallest_image_width_and_height()
        self.resized_images = self.__resize_images()
    
    
    def __resize_images(self):
        
        X_resized = []
        
        for img in self.images:
            X_resized.append(tf.image.resize(img, 
                                             size=(self.min_width, self.min_height)))
            
        return np.array(X_resized)
    
    
    def __find_smallest_image_width_and_height(self):
        
        min_width = np.size(self.images[0], 0)
        min_height = np.size(self.images[0], 1)
        
        for img in self.images[1:]:
            
            if np.size(img, 0) < min_width:
                min_width = np.size(img, 0)
                
            if np.size(img, 1) < min_height:
                min_height = np.size(img, 1)
                
        return min_width, min_height

In [5]:
import os
os.getcwd()

'/home/hiteshbasantani/Documents/berkeley/MLproject'

In [6]:
data_file_path = './benetech-making-graphs-accessible/train/images'
label_file_path = './benetech-making-graphs-accessible/train/annotations'

In [7]:
def get_line_bboxes(v,ctype=None):
    texts = v["text"]
    polygon_info = []
    point_info = []
    x_tickinfo = {}
    y_tickinfo = {}

    for tinf in v["axes"]["x-axis"]["ticks"]:
        bbox_label = convert_to_bbox(get_id(texts,tinf["id"])["polygon"])
        polygon_info.append([0]+bbox_label)
        x_tickinfo[get_id(texts,tinf["id"])["text"]] = tinf["tick_pt"]

    for tinf in v["axes"]["y-axis"]["ticks"]:
        bbox_label = convert_to_bbox(get_id(texts,tinf["id"])["polygon"])
        polygon_info.append([1]+bbox_label)
        y_tickinfo[get_id(texts,tinf["id"])["text"]] = tinf["tick_pt"]
    if ctype=="horizontal_bar":
        x_tickinfo, y_tickinfo = y_tickinfo, x_tickinfo
    for (x,y) in translate_dataseries_to_chart(v['data-series'],x_tickinfo, y_tickinfo,ctype):
        point_info.append([2]+create_square([x,y]))
    
    return [[p[0]]+polygon_to_bbox(p[1:]) for p in polygon_info+point_info]
def convert_to_bbox(polygon):
    return [polygon['x0'], polygon['y0'], polygon['x1'], polygon['y1'],polygon['x2'],polygon['y2'],polygon['x3'],polygon['y3']]

def get_id(texts,i):
    for k in texts:
        if i==k["id"]:
            return k
        
def translate_dataseries_to_chart(dataseries, x_tickinfo, y_tickinfo,ctype):
    x_labels_numerical = all(is_number(label) for label in x_tickinfo.keys())
    y_labels_numerical = all(is_number(label) for label in y_tickinfo.keys())
#    print(y_labels_numerical)

    if x_labels_numerical:
        # Convert y_tickinfo keys to float for comparison
        x_ticks = {get_number(k): v for k, v in x_tickinfo.items()}

        # Sort the y_ticks dictionary by key
        x_ticks = dict(sorted(x_ticks.items()))

        # List of y_ticks keys and values for interpolation
        x_values = list(x_ticks.keys())
        x_coords = list(x_ticks.values())
    else:
        # Create a mapping from x labels to their coordinates
        x_mapping = {label: info['x'] for label, info in x_tickinfo.items()}

    if y_labels_numerical:
        # Convert y_tickinfo keys to float for comparison
        y_ticks = {get_number(k): v for k, v in y_tickinfo.items()}

        # Sort the y_ticks dictionary by key
        y_ticks = dict(sorted(y_ticks.items()))

        # List of y_ticks keys and values for interpolation
        y_values = list(y_ticks.keys())
        y_coords = list(y_ticks.values())
    else:
        # Create a mapping from y labels to their coordinates
        y_mapping = {label: info['y'] for label, info in y_tickinfo.items()}

    result = []
    for point in dataseries:
        x_val = point['x']
        y_val = point['y']

        if x_labels_numerical:
            x_val = get_number(x_val)
            # Find the two y_ticks that y_val falls between
            for i in range(len(x_values) - 1):
                if x_values[i] <= x_val <= x_values[i + 1]:
                    # Linear interpolation of x_val to y_coord
                    ratio = (x_val - x_values[i]) / (x_values[i + 1] - x_values[i])
                    x_coord = x_coords[i]['x'] + ratio * (x_coords[i + 1]['x'] - x_coords[i]['x'])
                    break
            else:  # y_val >= maximum y_values[i]
                ratio = (x_val - x_values[0]) / (x_values[-1] - x_values[0])
                x_coord = x_coords[0]['x'] + ratio * (x_coords[-1]['x'] - x_coords[0]['x'])
        else:
            x_coord = x_mapping[x_val]

        if y_labels_numerical:
            y_val = get_number(y_val)

            # Find the two y_ticks that y_val falls between
            for i in range(len(y_values) - 1):
                if y_values[i] <= y_val <= y_values[i + 1]:
                    # Linear interpolation of y_val to y_coord
                    ratio = (y_val - y_values[i]) / (y_values[i + 1] - y_values[i])
                    y_coord = y_coords[i]['y'] + ratio * (y_coords[i + 1]['y'] - y_coords[i]['y'])
                    break
            else:  # y_val >= maximum y_values[i]
                ratio = (y_val - y_values[0]) / (y_values[-1] - y_values[0])
                y_coord = y_coords[0]['y'] + ratio * (y_coords[-1]['y'] - y_coords[0]['y'])

        else:
            print(type(y_val))
            # For non-numerical data, find the closest y tick
#             closest_y_label = min(y_mapping.keys(), key=lambda label: abs(float(label) - y_val))
            y_coord = y_mapping[y_val]

        # Append (x_coord, y_coord) to result
        result.append((x_coord, y_coord))

    return result

#Check if a string is number
def is_number(s: str):
    s = re.sub('[,$% ]', '', s)
    try:
        float(s)
        return True
    except ValueError:
        return False
    

    #Get number from string
def get_number(s):
    if type(s)==str:
        return float(re.sub('[,$% ]', '', s))
    else:
        return float(s)
    
#Convert a point to square
def create_square(point, k=10):
    if type(point)==dict:
        x,y = point["x"],point["y"]
    else:
        x,y =point
    half_k = k / 2.0

    # Define the corners of the square
    corners = [x - half_k, y - half_k,x + half_k, y - half_k,x + half_k, y + half_k,x - half_k, y + half_k]
    return corners

def polygon_to_bbox(polygon):
    # Create list of x and y coordinates
    x_coords = polygon[0::2]
    y_coords = polygon[1::2]

    # Compute minimum and maximum coordinates
    x_min, x_max = min(x_coords), max(x_coords)
    y_min, y_max = min(y_coords), max(y_coords)

    # Calculate the center of the bounding box
    x_center = (x_min + x_max) / 2.0
    y_center = (y_min + y_max) / 2.0

    # Calculate the width and height of the bounding box
    width = x_max - x_min
    height = y_max - y_min

    # Return the bounding box in the format [x, y, width, height]
    return [x_center, y_center, width, height]

CINDEX={'line': 0, 'dot': 1, 'vertical_bar': 2, 'scatter': 3, 'horizontal_bar': 4}

In [8]:
#img = jsn[:-4]+'jpg'
ANNOTATIONS_DIR = '/home/hiteshbasantani/Documents/berkeley/MLproject/benetech-making-graphs-accessible/train/annotations/'
IMAGE_DIR = '/home/hiteshbasantani/Documents/berkeley/MLproject/benetech-making-graphs-accessible/train/images/'

In [9]:
import re
import shutil
import cv2
import matplotlib.pyplot as plt
import random
os.makedirs("./dataset/train/images",exist_ok=True)
os.makedirs("./dataset/train/labels",exist_ok=True)
image_filenames = os.listdir(IMAGE_DIR)
TTH=10/len(image_filenames)
TTH
len(image_filenames)

60578

In [None]:
i = 0
for i, img in enumerate(os.listdir(data_file_path)):
    if i >0:
        break
    else:
        i+=1
    
        try:
            k = img.split(".")[0]
            v = json.load(open(os.path.join(ANNOTATIONS_DIR,k+".json")))
            imgpth=os.path.join(IMAGE_DIR,img)
            height,width=cv2.imread(imgpth).shape[:2]
            polygon = get_line_bboxes(v, v['chart-type'])
            h,w,x,y = list(v["plot-bb"].values())
            x = x+w/2
            y = y+h/2
            print(h)
            polygon.append([3+CINDEX[v['chart-type']],x,y,w,h])
            for i in range(len(polygon)):
                polygon[i][0] = str(polygon[i][0])
                x,y,w,h = polygon[i][1:]
                polygon[i][1] = str(x/width)
                polygon[i][3] = str(w/width)
                polygon[i][2] = str(y/height)
                polygon[i][4] = str(h/height)

            with open(f"./dataset/train/labels/{k}.txt", "w") as f:    
                f.write("\n".join([" ".join(p) for p in polygon]))
            shutil.copy(imgpth,"./dataset/train/images")
            if np.random.rand()<TTH:
                draw_boxes(imgpth, f"./dataset/train/labels/{k}.txt")
        except:
            pass

In [None]:
jsn = '/home/hiteshbasantani/Documents/berkeley/MLproject/benetech-making-graphs-accessible/train/annotations/e17acd360f25.json'
#jsn
imgpth

In [None]:
k = '0a07fa932e96'

In [None]:
data_file_path = f"./dataset/train/labels/{k}.txt"

In [None]:
imgpth = '/home/hiteshbasantani/Documents/berkeley/MLproject/benetech-making-graphs-accessible/train/images/0a07fa932e96.jpg'

In [None]:
colors = [(random.randint(0,255), random.randint(0,255), random.randint(0,255)) for _ in range(8)]
image = cv2.imread(imgpth)
image_height, image_width = image.shape[:2]
plt.figure(figsize=(10,10))
with open(data_file_path, 'r') as file:
    lines = file.readlines()
    for line in lines:
        data = line.strip().split()
        object_class, x_center, y_center, width, height = map(float, data)
        object_class = int(object_class)

        # denormalize coordinates and dimensions
        x_center *= image_width
        y_center *= image_height
        width *= image_width
        height *= image_height

        # calculate the top left and bottom right points
        x1 = int(x_center-width/2)
        y1 = int(y_center-height/2)
        x2 = int(x_center + width/2)
        y2 = int(y_center + height/2)

        # draw the bounding box on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), colors[object_class], 2)

# convert the image from BGR color space to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# display the image
plt.imshow(image)
plt.show()