In [1]:
import os

import cv2
import pandas as pd
import numpy as np

from skimage.feature import local_binary_pattern
from sklearn.decomposition import PCA

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

from sklearn.svm import SVC

import xgboost as xgb


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was too old on your system - pyarrow 10.0.1 is the current minimum supported version as of this release.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Paths are initialized

In [2]:
#wikiart (large dataset) will be added later

kaggle_train_path = "../data/train"
kaggle_test_path = "../data/test"

features_path = "../features"

kaggle_painter_names = os.listdir(kaggle_train_path)

In [3]:
painter_ids = {}
id = 0
for painter in kaggle_painter_names:
    painter_ids[painter] = id
    id += 1

Feature 1: LBP

In [4]:
lbp_path = "../features/lbp.txt"

In [5]:
def compute_lbp(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    gray_image = hsv_image[:, :, 2] #to obtain texture information
    gray_image = cv2.resize(gray_image, (128, 128))

    radius = 3
    n_points = 8 * radius
    features = local_binary_pattern(gray_image, n_points, radius, method="uniform")
    lbp_hist, _ = np.histogram(features.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    lbp_hist = lbp_hist.astype(float)
    lbp_hist /= np.sum(lbp_hist)
    return lbp_hist

In [6]:
with open(lbp_path, "w") as lbp_file:
    for painter in kaggle_painter_names:
        paintings_path = os.path.join(kaggle_train_path, painter)
        images_list = [image for image in os.listdir(paintings_path)]

        for image_name in images_list:
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            lbp_feature = compute_lbp(image)
            np.savetxt(lbp_file, [lbp_feature], fmt="%.6f")

Feature 2: Gray Histogram

In [7]:
gray_hist_path = "../features/gray_hist.txt"

In [8]:
def compute_gray_histogram(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.resize(gray_image, (128, 128))
    hist, _ = np.histogram(gray_image.ravel(), bins=256, range=[0,256])
    return hist

In [9]:
with open(gray_hist_path, "w") as gray_hist_file:
    for painter in kaggle_painter_names:
        paintings_path = os.path.join(kaggle_train_path, painter)

        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            gray_hist = compute_gray_histogram(image)
            np.savetxt(gray_hist_file, [gray_hist])

Feature 3: Edges Histogram

In [10]:
edges_hist_path = "../features/edges_hist.txt"

In [11]:
def compute_edge_histogram(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.resize(gray_image, (128, 128))
    edges = cv2.Canny(gray_image, 50, 150)
    hist, _ = np.histogram(edges.ravel(), bins=256, range=[0,256])
    return hist

In [12]:
with open(edges_hist_path, "w") as edges_hist_file:
    for painter in kaggle_painter_names:
        paintings_path = os.path.join(kaggle_train_path, painter)

        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            edge_hist = compute_edge_histogram(image)
            np.savetxt(edges_hist_file, [edge_hist], fmt="%.6f")

Feature 4: Histogram for S Component From HSV Image

In [13]:
s_path = "../features/s_hsv.txt"

In [14]:
def compute_s_histogram(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    s_component = hsv_image[:,:,1]
    hist, _ = np.histogram(s_component.ravel(), bins=256, range=[0,256])
    return hist

In [15]:
with open(s_path, "w") as s:
    for painter in kaggle_painter_names:
        paintings_path = os.path.join(kaggle_train_path, painter)
        
        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            s_hist = compute_s_histogram(image)
            np.savetxt(s, [s_hist], fmt="%.6f")

Feature 5: Histogram for B Component From RGB Image

In [16]:
b_path = "../features/b_rgb.txt"

In [17]:
def compute_b_histogram(image):
    b, g, r = cv2.split(image)
    hist, _ = np.histogram(b.ravel(), bins=256, range=[0,256])
    return hist

In [18]:
with open(b_path, "w") as b:
    for painter in kaggle_painter_names:
        paintings_path = os.path.join(kaggle_train_path, painter)
        painter_b_hist = []
        
        for image_name in os.listdir(paintings_path):
            image_path = os.path.join(paintings_path, image_name)
            image = cv2.imread(image_path)
            b_hist = compute_b_histogram(image)
            np.savetxt(b, [b_hist], fmt="%.6f")

Function to Generate Test Data

In [19]:
def get_basic_features(test_image_path):
    test_image = cv2.imread(test_image_path)

    test_lbp = compute_lbp(test_image)
    test_gray_hist = compute_gray_histogram(test_image)
    test_edge_hist = compute_edge_histogram(test_image)
    test_s_hist = compute_s_histogram(test_image)
    test_b_hist = compute_b_histogram(test_image)

    features_for_test = []
    features_for_test.extend(test_lbp) 
    features_for_test.extend(test_gray_hist) 
    features_for_test.extend(test_edge_hist)
    features_for_test.extend(test_s_hist)
    features_for_test.extend(test_b_hist)

    return features_for_test

Train Data: Using the computed features

In [20]:
concatenated_features_path = "../features/concatenated_features.txt"

file_paths = [lbp_path, gray_hist_path, edges_hist_path, s_path, b_path] #hog_path

feature_files = [open(file_path, "r") for file_path in file_paths]

concatenated_file = open(concatenated_features_path, "w")

for lines in zip(*feature_files): #concatenates all the features for the specific image and saves it to the file
    concatenated_line = " ".join(line.strip() for line in lines)
    concatenated_file.write(concatenated_line + "\n")

concatenated_file.close()
for file in feature_files:
    file.close()

Train Data: Obtain the Data

In [21]:
y_train = []
image_count = 0

for painter in kaggle_painter_names:
    paintings_path = os.path.join(kaggle_train_path, painter)
    for image_name in os.listdir(paintings_path):
        y_train.append([painter_ids[painter]])
        image_count += 1

In [22]:
X_train = []

with open(concatenated_features_path, "r") as concatenated_file:
    lines = concatenated_file.readlines()
    for i in range(len(lines)):
        features = np.fromstring(lines[i].strip(), sep=" ")
        X_train.append(features)

X_train = np.array(X_train)
y_train = np.array(y_train)

Test Data

In [23]:
X_test = []
y_test = []

for painter in kaggle_painter_names:
    test_paintings_path = os.path.join(kaggle_test_path, painter)
    
    for test_image_name in os.listdir(test_paintings_path):
        test_image_path = os.path.join(test_paintings_path, test_image_name)
        test_features = get_basic_features(test_image_path)
        X_test.append(test_features)
        y_test.append(painter_ids[painter])

X_test = np.array(X_test)
y_test = np.array(y_test)

ML Model 1: MLP

In [24]:
mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)

mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)

results_mlp_path = "../results/results_mlp.txt"

accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
report_mlp = classification_report(y_test, y_pred_mlp)

with open(results_mlp_path, "w") as results_mlp:
    results_mlp.write(report_mlp)
    results_mlp.write("\n")
    results_mlp.write(f"Accuracy: {accuracy_mlp}")

  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ML Model 2: SVM

In [25]:
svm_model = SVC(kernel='linear', random_state=42)

svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

results_svm_path = "../results/results_svm.txt"

with open(results_svm_path, "w") as results_svm:
    results_svm.write(report_svm)
    results_svm.write("\n")
    results_svm.write(f"Accuracy: {accuracy_svm}")

  y = column_or_1d(y, warn=True)


ML Model 3: XGBoost

In [26]:
xgb_model = xgb.XGBClassifier(random_state=42)

xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
report_xgb = classification_report(y_test, y_pred_xgb)

results_xgb_path_1 = "../results/results_xgb.txt"

with open(results_xgb_path_1, "w") as results_xgb:
    results_xgb.write(report_xgb)
    results_xgb.write("\n")
    results_xgb.write(f"Accuracy: {accuracy_xgb}")

# Saving Models

In [27]:
import pickle

pickle.dump(mlp_model, open('../models/mlp_model', 'wb'))
pickle.dump(svm_model, open('../models/svm_model', 'wb'))
pickle.dump(xgb_model, open('../models/xgb_model', 'wb'))