In [1]:
import os

import cv2
import pandas as pd
import numpy as np
import random

from skimage.feature import local_binary_pattern
from sklearn.decomposition import PCA

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

from sklearn.svm import SVC

import xgboost as xgb


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was too old on your system - pyarrow 10.0.1 is the current minimum supported version as of this release.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Paths are initialized

In [3]:
#wikiart (large dataset) will be added later

kaggle_train_path = "../data/train"
kaggle_test_path = "../data/test"

features_path = "../features"

kaggle_painter_names = os.listdir(kaggle_train_path)

In [4]:
painter_ids = {}
id = 0
for painter in kaggle_painter_names:
    painter_ids[painter] = id
    id += 1

Feature 1: HOG

In [None]:
hog_path = "../features/hog.txt"

In [None]:
#Computing HOG over an image
def compute_hog(image):
    hog_desc = cv2.HOGDescriptor()
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_image = cv2.resize(hsv_image, (128, 128))
    hog_features = []

    for channel in range(hsv_image.shape[2]): #computing for HSV channels
        hog_features_channel = hog_desc.compute(hsv_image[:, :, channel])
        hog_features.append(hog_features_channel)

    return np.concatenate(hog_features)

#Computing average of HOG for each painter's paintings distinctively
for painter in kaggle_painter_names:
    paintings_path = os.path.join(kaggle_train_path, painter)
    images_list = [image for image in os.listdir(paintings_path)]

    hog_features_list = []
    for image_name in images_list:
        image_path = os.path.join(paintings_path, image_name)
        image = cv2.imread(image_path)
        hog_features = compute_hog(image)
        hog_features_list.append(hog_features)

    hog_features_array = np.array(hog_features_list)
    avg_hog_features = np.mean(hog_features_array, axis=0)

Feature 2: LBP

In [10]:
lbp_path = "../features/lbp.txt"

In [20]:
def compute_lbp(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    gray_image = hsv_image[:, :, 2] #to obtain texture information

    radius = 3
    n_points = 8 * radius
    features = local_binary_pattern(gray_image, n_points, radius, method="uniform")
    lbp_hist, _ = np.histogram(features.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    lbp_hist = lbp_hist.astype(float)
    lbp_hist /= np.sum(lbp_hist)
    return lbp_hist

In [None]:
with open(lbp_path, "w") as lbp_file:
    #Computing standard deviation of LBP for each painter's paintings distinctively
    for painter in kaggle_painter_names:
        lbp_file.write(painter)
        lbp_file.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        images_list = [image for image in os.listdir(paintings_path)]
        painter_lbp_features = []

        for image_name in images_list:
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            lbp_feature = compute_lbp(image)
            painter_lbp_features.append(lbp_feature)

        np.savetxt(lbp_file, [painter_lbp_features], fmt="%.6f")

Feature 3: Gray Histogram

In [11]:
gray_hist_path = "../features/gray_hist.txt"

In [21]:
def compute_gray_histogram(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist, _ = np.histogram(gray_image.ravel(), bins=256, range=[0,256])
    return hist

In [None]:
with open(gray_hist_path, "w") as gray_hist_file:
    for painter in kaggle_painter_names:
        gray_hist_file.write(painter)
        gray_hist_file.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_gray_hist = []

        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            gray_hist = compute_gray_histogram(image)
            painter_gray_hist.append(gray_hist)

        np.savetxt(gray_hist_file, [painter_gray_hist])

Feature 4: Edges Histogram

In [12]:
edges_hist_path = "../features/edges_hist.txt"

In [22]:
def compute_edge_histogram(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray_image, 50, 150)
    hist, _ = np.histogram(edges.ravel(), bins=256, range=[0,256])
    return hist

In [None]:
with open(edges_hist_path, "w") as edges_hist_file:
    for painter in kaggle_painter_names:
        edges_hist_file.write(painter)
        edges_hist_file.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_edge_hist = []

        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            edge_hist = compute_edge_histogram(image)
            painter_edge_hist.append(edge_hist)

        np.savetxt(edges_hist_file, [painter_edge_hist], fmt="%.6f")

Feature 5: Histogram for S Component From HSV Image

In [14]:
s_path = "../features/s_hsv.txt"

In [24]:
def compute_s_histogram(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    s_component = hsv_image[:,:,1]
    hist, _ = np.histogram(s_component.ravel(), bins=256, range=[0,256])
    return hist

In [None]:
with open(s_path, "w") as s:
    for painter in kaggle_painter_names:
        s.write(painter)
        s.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_s_hist = []
        
        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            s_hist = compute_s_histogram(image)
            painter_s_hist.append(s_hist)
            
        np.savetxt(s, [painter_s_hist], fmt="%.6f")

Feature 6: Histogram for B Component From RGB Image

In [16]:
b_path = "../features/b_rgb.txt"

In [25]:
def compute_b_histogram(image):
    b, g, r = cv2.split(image)
    hist, _ = np.histogram(b.ravel(), bins=256, range=[0,256])
    return hist

In [None]:
with open(b_path, "w") as b:
    for painter in kaggle_painter_names:
        b.write(painter)
        b.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_b_hist = []
        
        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            b_hist = compute_b_histogram(image)
            painter_b_hist.append(s_hist)
            
        np.savetxt(b, [painter_b_hist], fmt="%.6f")

Feature 7: MSER

In [None]:
mser_path = "../features/mser.txt"

In [None]:
def compute_mser(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    mser = cv2.MSER_create()
    regions, _ = mser.detectRegions(gray_image)
    
    mser_features = []
    for region in regions:
        for point in region:
            mser_features.append(point)
    
    return mser_features

In [None]:
with open(mser_path, "w") as mser_file:
    for painter in kaggle_painter_names:
        mser_file.write(painter)
        mser_file.write("\n")

        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_mser = []
        
        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            mser = compute_mser(image)
            painter_mser.append(mser)
            
        np.savetxt(mser_file, [painter_mser], fmt="%.6f")

Function to Generate Test Data

In [None]:
def get_basic_features(test_image_path):
    test_image = cv2.imread(test_image_path)
    #test_image = cv2.resize(test_image, (500, 500))

    test_hog = compute_hog(test_image)
    test_lbp = compute_lbp(test_image)
    test_gray_hist = compute_gray_histogram(test_image)
    test_edge_hist = compute_edge_histogram(test_image)
    test_s_hist = compute_s_histogram(test_image)
    test_b_hist = compute_b_histogram(test_image)
    test_mser = compute_mser(test_image)

    features_for_test = []
    features_for_test.extend(test_hog) 
    features_for_test.extend(test_lbp) 
    features_for_test.extend(test_gray_hist) 
    features_for_test.extend(test_edge_hist)
    features_for_test.extend(test_s_hist)
    features_for_test.extend(test_b_hist)
    features_for_test.extend(test_mser)

    return features_for_test

Train Data: Using the computed features

In [18]:
concatenated_features_path = "../features/concatenated_features.txt"

concatenated_features = {}

#Concatenate features for each painter
for painter in kaggle_painter_names:
    painter_features = []

    with open(hog_path, "r") as hog_file:
        for line in hog_file:
            if line.strip() == painter:
                next_line = hog_file.readline().strip()
                hog_features = np.fromstring(next_line, sep=" ")
                painter_features.extend(hog_features)
                break

    with open(lbp_path, "r") as lbp_file:
        for line in lbp_file:
            if line.strip() == painter:
                next_line = lbp_file.readline().strip()
                lbp_features = np.fromstring(next_line, sep=" ")
                painter_features.extend(lbp_features)
                break

    with open(gray_hist_path, "r") as gray_hist_file:
        for line in gray_hist_file:
            if line.strip() == painter:
                next_line = gray_hist_file.readline().strip()
                gray_hist_file = np.fromstring(next_line, sep=" ")
                painter_features.extend(gray_hist_file)
                break

    with open(edges_hist_path, "r") as edges_hist_file:
        for line in edges_hist_file:
            if line.strip() == painter:
                next_line = edges_hist_file.readline().strip()
                edges_hist = np.fromstring(next_line, sep=" ")
                painter_features.extend(edges_hist)
                break

    with open(s_path, "r") as s_file:
        for line in s_file:
            if line.strip() == painter:
                next_line = s_file.readline().strip()
                s = np.fromstring(next_line, sep=" ")
                painter_features.extend(s)
                break

    with open(b_path, "r") as b_file:
        for line in b_file:
            if line.strip() == painter:
                next_line = b_file.readline().strip()
                b = np.fromstring(next_line, sep=" ")
                painter_features.extend(b)
                break

    with open(mser_path, "r") as mser_file:
        for line in mser_file:
            if line.strip() == painter:
                next_line = mser_file.readline().strip()
                mser_features = np.fromstring(next_line, sep=" ")
                painter_features.extend(mser_features)
                break

    #Add the corresponding features for that painter
    concatenated_features[painter] = np.array(painter_features)

#Save concatenated features to the file
with open(concatenated_features_path, "w") as concatenated_file:
    for painter, features in concatenated_features.items():
        concatenated_file.write(painter + "\n")
        np.savetxt(concatenated_file, [features], fmt="%.6f")

Train Data: Obtain the Data

In [27]:
X_train = []
y_train = []

for painter in kaggle_painter_names:
    y_train.append([painter_ids[painter]])

with open(concatenated_features_path, "r") as concatenated_file:
    lines = concatenated_file.readlines()
    for i in range(0, len(lines), 2):  #Every other line is a painter name
        painter_name = lines[i].strip()
        if painter_name in kaggle_painter_names:
            features = np.fromstring(lines[i + 1].strip(), sep=" ")
            X_train.append(features)

X_train = np.array(X_train)
y_train = np.array(y_train)

Test Data

In [None]:
X_test = []
y_test = []

for painter in kaggle_painter_names:
    test_paintings_path = os.path.join(kaggle_test_path, painter)
    
    for test_image_name in os.listdir(test_paintings_path):
        test_image_path = os.path.join(test_paintings_path, test_image_name)
        test_features = get_basic_features(test_image_path)
        X_test.append(test_features)
        y_test.append(painter_ids[painter])

X_test = np.array(X_test)
y_test = np.array(y_test)

ML Model 1: MLP

In [28]:
mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)

mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)

results_mlp_path = "../results/results_mlp.txt"

accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
report_mlp = classification_report(y_test, y_pred_mlp)

with open(results_mlp_path, "w") as results_mlp:
    results_mlp.write(report_mlp)
    results_mlp.write("\n")
    results_mlp.write(f"Accuracy: {accuracy_mlp}")

  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ML Model 2: SVM

In [29]:
svm_model = SVC(kernel='linear', random_state=42)

svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

results_svm_path = "../results/results_svm.txt"

with open(results_svm_path, "w") as results_svm:
    results_svm.write(report_svm)
    results_svm.write("\n")
    results_svm.write(f"Accuracy: {accuracy_svm}")

  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ML Model 3: XGBoost

In [30]:
xgb_model = xgb.XGBClassifier(random_state=42)

xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
report_xgb = classification_report(y_test, y_pred_xgb)

results_xgb_path_1 = "../results/results_xgb.txt"

with open(results_xgb_path_1, "w") as results_xgb:
    results_xgb.write(report_xgb)
    results_xgb.write("\n")
    results_xgb.write(f"Accuracy: {accuracy_xgb}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
