In [1]:
# Import libraries

import os
import cv2
import json
import tensorflow as tf
import numpy as np
import pickle

In [2]:
class Data_Loader():

    def __init__(self, data_file_path, label_file_path, n_images, chart_type):
        self.data_file_path = data_file_path
        self.label_file_path = label_file_path
        self.n_images = n_images
        self.chart_type = chart_type  # Added chart_type as an attribute

    def load_image_data(self):
        X = []
        Y = []

        for file in os.listdir(self.data_file_path):
            file_path = os.path.join(self.data_file_path, file)

            if file.endswith('.jpg'):
                img_annotations = self.__load_annotations(file)

                # Check if annotations exist and if chart-type matches self.chart_type
                if img_annotations is not None and img_annotations.get('chart-type') == self.chart_type:
                    Y.append(img_annotations)
                    img = cv2.imread(file_path)
                    X.append(img)

                # Stop if enough images are loaded
                if len(X) >= self.n_images:
                    return X, Y

        return X, Y

    def __load_annotations(self, image_file_name):
        file_name = image_file_name.split('.jpg')[0]
        json_file_name = file_name + '.json'
        json_file_path = os.path.join(self.label_file_path, json_file_name)

        if os.path.isfile(json_file_path):
            with open(json_file_path) as f:
                return json.load(f)

        else:
            return None

In [3]:
class Image_Processor():

    def __init__(self, images):
        self.images = images
        self.min_width, self.min_height = self.__find_smallest_image_width_and_height()
        self.resized_images = self.__resize_images()


    def __resize_images(self):

        X_resized = []

        for img in self.images:
            X_resized.append(tf.image.resize(img,
                                             size=(self.min_width, self.min_height)))

        return np.array(X_resized)


    def __find_smallest_image_width_and_height(self):

        min_width = np.size(self.images[0], 0)
        min_height = np.size(self.images[0], 1)

        for img in self.images[1:]:

            if np.size(img, 0) < min_width:
                min_width = np.size(img, 0)

            if np.size(img, 1) < min_height:
                min_height = np.size(img, 1)

        return min_width, min_height

In [4]:
import os
os.getcwd()

'/content'

In [5]:
# mount your Google drive in Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
#Load data from Kaggle
!pip install -U -q kaggle
!mkdir -p ~/.kaggle
!echo '{"username":"angel80423","key":"f5a347582d10b1f9f45bc7bb61ab390b"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c benetech-making-graphs-accessible

Downloading benetech-making-graphs-accessible.zip to /content
100% 982M/982M [00:38<00:00, 28.3MB/s]
100% 982M/982M [00:38<00:00, 26.9MB/s]


In [7]:
!unzip -q benetech-making-graphs-accessible.zip -d ./

In [8]:
data_file_path = '/content/train/images'
label_file_path = '/content/train/annotations'

In [9]:
n_images = 700
chart_type='scatter'

# Load in raw data
X_raw, Y_raw = Data_Loader(data_file_path,
                           label_file_path, n_images,chart_type).load_image_data()

In [10]:
# Process images: resize and make it gray
X_processed = Image_Processor(X_raw)
X_resized = X_processed.resized_images
X_gray = X_resized / 255.0

In [11]:
#Process labels: make scatters saved as an array of (id,x,y) points

def process_labels(Y_raw):
    Y_processed = []
    for annotation in Y_raw:
        # Extract polygon（x,y) for each point from annotation
        text_annotations = annotation['text']
        for text_annotation in text_annotations:
            polygon = text_annotation['polygon']
            id = text_annotation.get('id', -1)  # Default to -1 if no id is present
            # Loop through each pair of coordinates
            num_points = len(polygon) // 2
            for i in range(num_points):
                        x_key = 'x' + str(i)
                        y_key = 'y' + str(i)
                        if x_key in polygon and y_key in polygon:
                            point = [id, polygon[x_key], polygon[y_key]]
                            Y_processed.append(point)

    return np.array(Y_processed)

Y_processed = process_labels(Y_raw)

In [12]:
# Shuffle and Split the Data
shuffle_indices = tf.random.shuffle(tf.range(tf.shape(X_gray)[0], dtype=tf.int32))
X_shuffled = tf.gather(X_gray, shuffle_indices)
Y_shuffled = tf.gather(Y_processed, shuffle_indices)

split_index = int(0.8 * len(X_shuffled))

X_train, X_val = X_shuffled[:split_index], X_shuffled[split_index:]
Y_train, Y_val = Y_shuffled[:split_index], Y_shuffled[split_index:]



In [18]:
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("Y_train shape:", Y_train.shape)
print("Y_val shape:", Y_val.shape)

X_train shape: (560, 248, 447, 3)
X_val shape: (140, 248, 447, 3)
Y_train shape: (560, 3)
Y_val shape: (140, 3)


In [19]:
def build_model(input_shape):

    model = tf.keras.Sequential()

    # add first convolution layer to the model
    model.add(tf.keras.layers.Conv2D(
        filters=16,
        kernel_size=(3, 3),
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        name='conv_1',
        activation='relu'))


    # add a max pooling layer with pool size (2,2) and strides of 2
    # (this will reduce the spatial dimensions by half)
    model.add(tf.keras.layers.MaxPool2D(
        pool_size=(2, 2),
        name='pool_1'))


    # add second convolutional layer
    # model.add(tf.keras.layers.Conv2D(
    #     filters=64,
    #     kernel_size=(5, 5),
    #     strides=(1, 1),
    #     padding='same',
    #     name='conv_2',
    #     activation='relu'))

    # # add second max pooling layer with pool size (2,2) and strides of 2
    # # (this will further reduce the spatial dimensions by half)
    # model.add(tf.keras.layers.MaxPool2D(
    #     pool_size=(2, 2), name='pool_2')
    # )


    # add a fully connected layer (need to flatten the output of the previous layers first)
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(
        units=128,
        name='fc_1',
        activation='relu'))

    # add dropout layer
    #model.add(tf.keras.layers.Dropout(
    #     rate=0.5))

    # add the last fully connected layer
    # this last layer sets the activation function to "None" in order to output the logits
    # note that passing activation = "sigmoid" will return class memembership probabilities but
    # in TensorFlow logits are prefered for numerical stability
    model.add(tf.keras.layers.Dense(
        units=1,
        name='fc_2',
        activation=None))

    tf.random.set_seed(1)
    model.build(input_shape=input_shape)
    model.summary()

    return model

In [20]:
model = build_model(
     input_shape=(None, X_processed.min_width, X_processed.min_height, 3))

model.compile(optimizer=tf.keras.optimizers.Adam(),
               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])

tf.random.set_seed(1234)
np.random.seed(1234)
history = model.fit(X_train, Y_train,
                     epochs=10,
                     batch_size=96,
                     validation_data=(X_val, Y_val) )

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_1 (Conv2D)             (None, 248, 447, 16)      448       
                                                                 
 pool_1 (MaxPooling2D)       (None, 124, 223, 16)      0         
                                                                 
 flatten_4 (Flatten)         (None, 442432)            0         
                                                                 
 fc_1 (Dense)                (None, 128)               56631424  
                                                                 
 fc_2 (Dense)                (None, 1)                 129       
                                                                 
Total params: 56632001 (216.03 MB)
Trainable params: 56632001 (216.03 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10

InvalidArgumentError: ignored

In [21]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found
