In [7]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [8]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [9]:
# Define file paths
DATA_DIR = "/data/hdd1/users/kmparmp/DocFigure/annotation"
IMAGE_DIR = "/data/hdd1/users/kmparmp/DocFigure/images"

train_labels_file = os.path.join(DATA_DIR, "train.txt")
test_labels_file = os.path.join(DATA_DIR, "test.txt")

# Function to load labels into a DataFrame
def load_labels(file_path: str) -> pd.DataFrame:
    with open(file_path, "r") as file:
        data = [line.strip().split(", ") for line in file if ", " in line]
    
    return pd.DataFrame(data, columns=["Image_Name", "Label"])

# Load train and test labels
train_df = load_labels(train_labels_file)
test_df = load_labels(test_labels_file)

# Add dataset type
train_df["Set"] = "train"
test_df["Set"] = "test"

# Encode labels using the same encoder for both sets
label_encoder = LabelEncoder()
train_df["Label_Encoded"] = label_encoder.fit_transform(train_df["Label"])
test_df["Label_Encoded"] = label_encoder.transform(test_df["Label"])  # Use same encoding

# Print label mapping
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)

# Add full image paths
train_df["Image_Path"] = train_df["Image_Name"].apply(lambda x: os.path.join(IMAGE_DIR, x))
test_df["Image_Path"] = test_df["Image_Name"].apply(lambda x: os.path.join(IMAGE_DIR, x))

# Print to verify
print("Train DataFrame:\n", train_df.head())
print("\nTest DataFrame:\n", test_df.head())


Label Mapping: {'3D objects': np.int64(0), 'Algorithm': np.int64(1), 'Area chart': np.int64(2), 'Bar plots': np.int64(3), 'Block diagram': np.int64(4), 'Box plot': np.int64(5), 'Bubble Chart': np.int64(6), 'Confusion matrix': np.int64(7), 'Contour plot': np.int64(8), 'Flow chart': np.int64(9), 'Geographic map': np.int64(10), 'Graph plots': np.int64(11), 'Heat map': np.int64(12), 'Histogram': np.int64(13), 'Mask': np.int64(14), 'Medical images': np.int64(15), 'Natural images': np.int64(16), 'Pareto charts': np.int64(17), 'Pie chart': np.int64(18), 'Polar plot': np.int64(19), 'Radar chart': np.int64(20), 'Scatter plot': np.int64(21), 'Sketches': np.int64(22), 'Surface plot': np.int64(23), 'Tables': np.int64(24), 'Tree Diagram': np.int64(25), 'Vector plot': np.int64(26), 'Venn Diagram': np.int64(27)}
Train DataFrame:
                              Image_Name       Label    Set  Label_Encoded  \
0   2014_06909662-Figure5-1subFig-2.png  3D objects  train              0   
1   2013_06619122-F

In [10]:
# Load pre-trained VGG model
vgg_model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
vgg_model.eval()  # Set to evaluation mode

# Remove the last fully connected layer to get FC-CNN features
fc_cnn_model = torch.nn.Sequential(*list(vgg_model.children())[:-1])

In [11]:
# Function to extract FC-CNN features
def extract_fc_cnn_features(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension

    # Extract features
    with torch.no_grad():
        features = fc_cnn_model(image_tensor)
    
    return features.squeeze().numpy()

# Function to extract FV-CNN features (simplified version)
def extract_fv_cnn_features(image_path):
    # For simplicity, we use the same VGG model but extract features from the last convolutional layer
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image_tensor = preprocess(image).unsqueeze(0)

    # Extract features from the last convolutional layer
    with torch.no_grad():
        conv_features = vgg_model.features(image_tensor)
    
    # Flatten and return the features
    return conv_features.squeeze().numpy().flatten()

In [12]:
# Extract features for all images
train_df["FC_CNN_Features"] = train_df["Image_Path"].apply(extract_fc_cnn_features)
train_df["FV_CNN_Features"] = train_df["Image_Path"].apply(extract_fv_cnn_features)

test_df["FC_CNN_Features"] = test_df["Image_Path"].apply(extract_fc_cnn_features)
test_df["FV_CNN_Features"] = test_df["Image_Path"].apply(extract_fv_cnn_features)



In [13]:
# Combine FC-CNN and FV-CNN features
def combine_features(row):
    fc_features = row["FC_CNN_Features"].flatten()
    fv_features = row["FV_CNN_Features"].flatten()
    return np.concatenate((fc_features, fv_features))


train_df["Combined_Features"] = train_df.apply(combine_features, axis=1)
test_df["Combined_Features"] = test_df.apply(combine_features, axis=1)

In [14]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score

# Prepare data for training
X_train = np.array(train_df["Combined_Features"].tolist())
y_train = train_df["Label_Encoded"]

X_test = np.array(test_df["Combined_Features"].tolist())
y_test = test_df["Label_Encoded"]

# Train SVM classifier
svm_classifier = OneVsRestClassifier(SVC(kernel='linear', C=1))
svm_classifier.fit(X_train, y_train)

# Predict on test set
y_pred = svm_classifier.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 93.76%


In [15]:
import joblib

model_path = "svm_gvv_model.pkl"
joblib.dump(svm_classifier, model_path)
print(f"Model saved to {model_path}")

Model saved to svm_gvv_model.pkl
