# **upload and save resnet50 model**

In [1]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

#save resnet model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)  # Add a dropout layer with 50% dropout rate
output = Dense(5, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=output)
model.save('resnet50.h5')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


# **train the first model for predict type of the chart**

In [2]:
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import load_model

# More Horizontal Data Paths
hrz_images_dir = '/kaggle/input/sortrd-chart-data/selected_images_archive'
hrz_annotations_dir = '/kaggle/input/sortrd-chart-data/json_files/json_files'

dot_images_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/dot/images'
dot_annotations_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/dot/annotations'

line_images_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/line/images'
line_annotations_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/line/annotations'

scatter_images_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/scatter/images'
scatter_annotations_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/scatter/annotations'

vrt_images_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/vertical_bar/images'
vrt_annotations_dir = '/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/vertical_bar/annotations'

files_paths = [
    [hrz_images_dir,hrz_annotations_dir],
    [dot_images_dir,dot_annotations_dir],
    [line_images_dir,line_annotations_dir],
    [scatter_images_dir,scatter_annotations_dir],
    [vrt_images_dir,vrt_annotations_dir]
]

# Load and preprocess data
def load_data(files_paths):
    image_paths, labels = [], []
    chart_types = {
        "horizontal_bar": 0,
        "vertical_bar": 1,
        "dot": 2,
        "line": 3,
        "scatter": 4
    }
    max_iterations = 2000
    for charttype in files_paths:
        iteration_count = 0
        for filename in os.listdir(charttype[0]):
            if iteration_count >= max_iterations:
                break
            if filename.endswith('.jpg'):
                img_path = os.path.join(charttype[0], filename)
                json_path = os.path.join(charttype[1], filename.replace('.jpg', '.json'))
                
                with open(json_path, 'r') as f:
                    data = json.load(f)
                    chart_type = data.get('chart-type', None)
                    
                    # Handle missing or unknown chart types
                    if chart_type not in chart_types:
                        print(f"Unknown chart type '{chart_type}' in {json_path}")
                        continue
                    
                    label = chart_types[chart_type]
                    image_paths.append(img_path)
                    labels.append(label)
            iteration_count += 1
    return image_paths, labels
    
# Preprocess images
def preprocess_image(img_path, label, img_size=(224, 224)):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = img / 255.0
    return img, label

# Load dataset
image_paths, labels = load_data(files_paths)

# Split into train, validation, and test sets
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.3, random_state=42)

# Use a smaller subset of the training dataset for faster training
train_paths = train_paths[:5000]  # Use only 5000 images for training
train_labels = train_labels[:5000]  # Match train labels with train paths
val_paths = val_paths[:1000]      # Use only 1000 images for validation
val_labels = val_labels[:1000]    # Match validation labels with validation paths

# Compute class weights to handle imbalance
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weights = dict(enumerate(class_weights))

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(lambda x, y: preprocess_image(x, y)).batch(32).shuffle(1000).repeat()

val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_dataset = val_dataset.map(lambda x, y: preprocess_image(x, y)).batch(32).repeat()

# Define the model
model = load_model('/kaggle/working/resnet50.h5')

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Train the model with limited steps per epoch
model.fit(
    train_dataset,
    validation_data=val_dataset,
    steps_per_epoch=100,  # Use only 100 batches per epoch
    validation_steps=30,  # Use only 30 batches for validation
    epochs=5
)

# Unfreeze top layers for fine-tuning
for layer in base_model.layers[:-10]:  # Freeze all layers except the last 10
    layer.trainable = False
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Compile the model for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(1e-6), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fine-tune the model
model.fit(
    train_dataset,
    validation_data=val_dataset,
    steps_per_epoch=100,
    validation_steps=30,
    epochs=10
)

# Save the model
model.save('chart_classification_model.h5')

# Evaluate on the validation set
val_loss, val_accuracy = model.evaluate(val_dataset, steps=30)
print(f"Validation Accuracy: {val_accuracy:.2f}, Validation Loss: {val_loss:.2f}")

Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 546ms/step - accuracy: 0.9061 - loss: 0.3802 - val_accuracy: 0.2031 - val_loss: 1.7457
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 351ms/step - accuracy: 0.9754 - loss: 0.1463 - val_accuracy: 0.2051 - val_loss: 1.6088
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 312ms/step - accuracy: 0.9807 - loss: 0.0775 - val_accuracy: 0.1506 - val_loss: 1.6275
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 328ms/step - accuracy: 0.9878 - loss: 0.0436 - val_accuracy: 0.2340 - val_loss: 1.5921
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 325ms/step - accuracy: 0.9879 - loss: 0.0557 - val_accuracy: 0.3013 - val_loss: 3.6471
Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 510ms/step - accuracy: 0.9896 - loss: 0.0443 - val_accuracy: 0.5052 - val_loss: 1.1076
Epoch 2/10
[

# **Load original train data**

In [3]:
#paths
images_dir = '/kaggle/input/benetech-making-graphs-accessible/train/images'
annotations_dir = '/kaggle/input/benetech-making-graphs-accessible/train/annotations'

# Load and preprocess data
def load_data(images_dir,annotations_dir):
    image_paths, labels = [], []
    chart_types = {
        "horizontal_bar": 0,
        "vertical_bar": 1,
        "dot": 2,
        "line": 3,
        "scatter": 4
    }
    for filename in os.listdir(images_dir):
        if iteration_count >= max_iterations:
            break
        if filename.endswith('.jpg'):
            img_path = os.path.join(images_dir, filename)
            json_path = os.path.join(annotations_dir, filename.replace('.jpg', '.json'))
                
            with open(json_path, 'r') as f:
                data = json.load(f)
                chart_type = data.get('chart-type', None)
                    
                # Handle missing or unknown chart types
                if chart_type not in chart_types:
                    print(f"Unknown chart type '{chart_type}' in {json_path}")
                    continue
                    
                label = chart_types[chart_type]
                image_paths.append(img_path)
                labels.append(label)
    return image_paths, labels

# **check the model on all of the data**

In [4]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/chart_classification_model.h5"
image_folder = "/kaggle/input/benetech-making-graphs-accessible/train/images"
json_folder = "/kaggle/input/benetech-making-graphs-accessible/train/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


60578 accuracy=99.01%
Accuracy: 99.01% (59981/60578 correctly classified)


# **Finetuning the model**

In [5]:
import os
import random
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Paths to the sorted data folders
data_paths = {
    "horizontal_bar": "/kaggle/input/benetech-extra-generated-data/graphs_h",
    "vertical_bar": "/kaggle/input/benetech-extra-generated-data/graphs_v",
    "dot": "/kaggle/input/benetech-extra-generated-data/graphs_d",
    "line": "/kaggle/input/benetech-extra-generated-data/graphs_l",
    "scatter": "/kaggle/input/benetech-extra-generated-data/graphs_s"
}

# Labels for each folder
chart_types = {
    "horizontal_bar": 0,
    "vertical_bar": 1,
    "dot": 2,
    "line": 3,
    "scatter": 4
}
max_chart = {
    "horizontal_bar":3000,
    "vertical_bar": 1400,
    "dot": 1400,
    "line": 3000,
    "scatter": 1400
}

# Function to load and label images
def load_images(data_paths, chart_types, max_images_per_class=2000):
    image_paths, labels = [], []
    for chart_type, path in data_paths.items():
        files = os.listdir(path)
        random.shuffle(files)
        for i, filename in enumerate(files):
            if i >= max_chart[chart_type]:  # Limit to max_images_per_class per folder
                break
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Support common image formats
                image_paths.append(os.path.join(path, filename))
                labels.append(chart_types[chart_type])
    image_paths, labels = shuffle(image_paths, labels, random_state=0)
    return image_paths, labels

# Load and label the images
image_paths, labels = load_images(data_paths, chart_types)

# Shuffle the data
data = list(zip(image_paths, labels))
np.random.shuffle(data)
image_paths, labels = zip(*data)

# Preprocess images
def preprocess_image(img_path, label, img_size=(224, 224)):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = img / 255.0
    return img, label

# Split into training and validation sets
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(lambda x, y: preprocess_image(x, y)).batch(32).shuffle(1000).repeat()

val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_dataset = val_dataset.map(lambda x, y: preprocess_image(x, y)).batch(32).repeat()

# Load the pre-trained model
model = load_model('/kaggle/working/chart_classification_model.h5')

# Freeze base model layers
for layer in model.layers[:-10]:  # Freeze all layers except the last 10
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with limited steps per epoch
model.fit(
    train_dataset,
    validation_data=val_dataset,
    steps_per_epoch=1000,
    validation_steps=300,
    epochs=5
)
model.save('fine_tuned_chart_classification_model_mid_v3_5.h5')

# Unfreeze all layers for fine-tuning
for layer in model.layers:
    layer.trainable = True

# Compile the model with a smaller learning rate for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(1e-6), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fine-tune the model
model.fit(
    train_dataset,
    validation_data=val_dataset,
    steps_per_epoch=1000,
    validation_steps=300,
    epochs=10
)

# Save the fine-tuned model
model.save('fine_tuned_chart_classification_model_v3_5.h5')

# Evaluate on the validation set
val_loss, val_accuracy = model.evaluate(val_dataset, steps=30)
print(f"Validation Accuracy: {val_accuracy:.2f}, Validation Loss: {val_loss:.2f}")

Epoch 1/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 134ms/step - accuracy: 0.9807 - loss: 0.0656 - val_accuracy: 0.9885 - val_loss: 0.0363
Epoch 2/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 132ms/step - accuracy: 0.9922 - loss: 0.0221 - val_accuracy: 0.9918 - val_loss: 0.0292
Epoch 3/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 132ms/step - accuracy: 0.9939 - loss: 0.0204 - val_accuracy: 0.9931 - val_loss: 0.0217
Epoch 4/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 133ms/step - accuracy: 0.9974 - loss: 0.0081 - val_accuracy: 0.9929 - val_loss: 0.0218
Epoch 5/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 131ms/step - accuracy: 0.9979 - loss: 0.0059 - val_accuracy: 0.9929 - val_loss: 0.0205
Epoch 1/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m389s[0m 329ms/step - accuracy: 0.9152 - loss: 1.0175 - val_accuracy: 0.9831 - val_loss: 0.06

# **check the new model on all data**

In [6]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/benetech-making-graphs-accessible/train/images"
json_folder = "/kaggle/input/benetech-making-graphs-accessible/train/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")

60578 accuracy=97.15%
Accuracy: 97.15% (58853/60578 correctly classified)


# **check new model on each type of graph**

**Horizontal bar**

In [7]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/horizontal_bar/images"
json_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/horizontal_bar/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


73 accuracy=58.90%
Accuracy: 58.90% (43/73 correctly classified)


**vertical bar**

In [8]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/vertical_bar/images"
json_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/vertical_bar/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


19189 accuracy=98.68%
Accuracy: 98.68% (18935/19189 correctly classified)


**dot**

In [9]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/dot/images"
json_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/dot/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


5131 accuracy=100.00%
Accuracy: 100.00% (5131/5131 correctly classified)


**line**

In [10]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/line/images"
json_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/line/annotations"

chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


24942 accuracy=96.20%
Accuracy: 96.20% (23993/24942 correctly classified)


**scatter**

In [11]:
import os
import sys
import json
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
model_path = "/kaggle/working/fine_tuned_chart_classification_model_v3_5.h5"
image_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/scatter/images"
json_folder = "/kaggle/input/sortrd-chart-data/sorted_data/sorted_data/scatter/annotations"


chart_types = {
    0: "horizontal_bar",
    1: "vertical_bar",
    2: "dot",
    3: "line",
    4: "scatter"
}

# Load the trained model
model = load_model(model_path)

# Initialize variables
correct_predictions = 0
total_images = 0

# Helper function to preprocess the image
def preprocess_image(image_path, target_size=(224, 224)):  # Adjust target size based on your model
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image) / 255.0
    return np.expand_dims(image, axis=0)

# Iterate through images and JSON
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg'):
        img_path = os.path.join(image_folder, filename)
        json_path = os.path.join(json_folder, filename.replace('.jpg', '.json'))
    
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    chart_type = data.get("chart-type")  # Adjust key if it's different
    
    if not os.path.exists(img_path):
        continue
    
    # Preprocess image
    image = preprocess_image(img_path)
    
    # Predict chart type
    predictions = model.predict(image,verbose = 0)
    predicted_chart_type_index = np.argmax(predictions, axis=1)[0]
    predicted_chart_type = chart_types.get(predicted_chart_type_index, "unknown")  # Map number to string
    
    
    # Compare prediction with ground truth
    if predicted_chart_type == chart_type:
        correct_predictions += 1
    total_images += 1
    accuracy = correct_predictions / total_images if total_images > 0 else 0
    sys.stdout.write(f"\r{total_images} accuracy={accuracy * 100:.2f}%")
    sys.stdout.flush()  # Flush to ensure the line is updated immediately

# Calculate accuracy
accuracy = correct_predictions / total_images if total_images > 0 else 0
print(f"\nAccuracy: {accuracy * 100:.2f}% ({correct_predictions}/{total_images} correctly classified)")


11243 accuracy=95.62%
Accuracy: 95.62% (10751/11243 correctly classified)
