In [1]:
#Import all the packages
import cv2
import numpy as np
import matplotlib.pyplot as plt
from urllib.request import urlopen
from scipy import ndimage
from ripser import ripser, lower_star_img
from persim import plot_diagrams 
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import tree
from sklearn.metrics import confusion_matrix
from matplotlib.patches import Rectangle
from joblib import dump, load
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
#Some Common Functions to see the results

def print_results(X,y,model,type_data='test'):
    y_pred=model.predict(X)
    M=confusion_matrix(y,y_pred,normalize='all')
    M=pd.DataFrame(M,index=['True Other','True Van Gogh'],columns=['Pred. Other','Pred. Van Gog'])
    display(M.style.format('{:.2%}'))    
    print(('Score on '+type_data+' data: {:.2%}').format(model.score(X,y)))
    M2=confusion_matrix(y,y_pred,normalize='pred')
    print(('P[True Van|Pred Van]= {:.2%}').format(M2[1,1]))

In [3]:
Catalog0=pd.read_pickle('Catalogs/Catalog_train_w_strokes_and_channels_and_faces_and_hists.pkl')
Catalog_train,Catalog_test,y_train,y_test=train_test_split(Catalog0,Catalog0.Vangogh,test_size=0.2,stratify=Catalog0.Vangogh)
Catalog_train=Catalog_train.reset_index(drop=True)
Catalog_test=Catalog_test.reset_index(drop=True)

In [4]:
#MLP On the Whole image
#Here we play with the parameters trying to avoid overfitting
mlp2=MLPClassifier(hidden_layer_sizes=(50,100,50),max_iter=500)

X_train=np.concatenate([np.array(list(Catalog_train['Normalized_Image_B'])),
                        np.array(list(Catalog_train['Normalized_Image_G'])),
                        np.array(list(Catalog_train['Normalized_Image_R']))],axis=1)

X_test=np.concatenate([np.array(list(Catalog_test['Normalized_Image_B'])),
                        np.array(list(Catalog_test['Normalized_Image_G'])),
                        np.array(list(Catalog_test['Normalized_Image_R']))],axis=1)

mlp2.fit(X_train,y_train)
print_results(X_train,y_train,mlp2,'train')
print_results(X_test,y_test,mlp2)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,73.47%,2.00%
True Van Gogh,6.60%,17.92%


Score on train data: 91.39%
P[True Van|Pred Van]= 89.94%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.57%,9.91%
True Van Gogh,14.15%,10.38%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.16%


In [5]:
Catalog_train['Pred_Whole']=mlp2.predict_proba(X_train)[:,1]
Catalog_test['Pred_Whole']=mlp2.predict_proba(X_test)[:,1]

In [6]:
#Machine Learning on faces
mlp3=MLPClassifier(hidden_layer_sizes=(50,100,50),max_iter=500)

X_train=np.array(Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Face_Patch'].tolist())
y_train_sub=Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Vangogh']

X_test=np.array(Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Face_Patch'].tolist())
y_test_sub=Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Vangogh']


mlp3.fit(X_train,y_train_sub)
print_results(X_train,y_train_sub,mlp3,'train')
print_results(X_test,y_test_sub,mlp3)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,76.59%,2.95%
True Van Gogh,18.41%,2.05%


Score on train data: 78.64%
P[True Van|Pred Van]= 40.91%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,76.00%,2.40%
True Van Gogh,20.80%,0.80%


Score on test data: 76.80%
P[True Van|Pred Van]= 25.00%


In [7]:
Catalog_train['Pred_Face']=5
Catalog_test['Pred_Face']=5
Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Pred_Face']=mlp3.predict_proba(X_train)[:,1]
Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Pred_Face']=mlp3.predict_proba(X_test)[:,1]

In [8]:
#KNN On Face Histograms
# Function to calculate Chi-distace 
def chi2_distance(A, B): 
    A=np.array(A)
    B=np.array(B)
    # compute the chi-squared distance
    chi=0
    for (a, b) in zip(A, B):
        if a != b:     
            chi=chi+((a - b) ** 2) / (a + b)
    chi=0.5*chi 
    return chi

X_train=np.array([x.ravel() for x in Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Hist_Face']])
y_train_sub=Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Vangogh']

X_test=np.array([x.ravel() for x in Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Hist_Face']])
y_test_sub=Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Vangogh']

knn=KNeighborsClassifier(n_neighbors=3, algorithm='ball_tree', metric = chi2_distance)

knn.fit(X_train,y_train_sub)
print_results(X_train,y_train_sub,knn,'train')
print_results(X_test,y_test_sub,knn)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,77.05%,2.50%
True Van Gogh,11.59%,8.86%


Score on train data: 85.91%
P[True Van|Pred Van]= 78.00%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,72.00%,6.40%
True Van Gogh,16.00%,5.60%


Score on test data: 77.60%
P[True Van|Pred Van]= 46.67%


In [9]:
Catalog_train['Pred_Face_Hist']=-1
Catalog_test['Pred_Face_Hist']=-1
Catalog_train.loc[Catalog_train.Face_Patch.notnull(),'Pred_Face_Hist']=knn.predict(X_train)
Catalog_test.loc[Catalog_test.Face_Patch.notnull(),'Pred_Face_Hist']=knn.predict(X_test)

In [10]:
#KNN On Histograms
X_train=np.array(list(Catalog_train.GNHist))
X_test=np.array(list(Catalog_test.GNHist))

knn2=KNeighborsClassifier(n_neighbors=3, algorithm='ball_tree', metric = chi2_distance)

knn2.fit(X_train,y_train)
print_results(X_train,y_train,knn2,'train')
print_results(X_test,y_test,knn2)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,71.34%,4.13%
True Van Gogh,8.61%,15.92%


Score on train data: 87.26%
P[True Van|Pred Van]= 79.41%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.15%,10.38%


Score on test data: 76.42%
P[True Van|Pred Van]= 52.38%


In [11]:
Catalog_train['Pred_Hist']=knn2.predict(X_train)
Catalog_test['Pred_Hist']=knn2.predict(X_test)

In [12]:
#Decision Tree for Mean_Strokes

X_train=Catalog_train[['Mean_Strokes']]
X_test=Catalog_test[['Mean_Strokes']]

tree_cl=DecisionTreeClassifier(max_depth=2)

tree_cl.fit(X_train,y_train)
print_results(X_train,y_train,tree_cl,'train')
print_results(X_test,y_test,tree_cl)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,72.05%,3.42%
True Van Gogh,11.67%,12.85%


Score on train data: 84.91%
P[True Van|Pred Van]= 78.99%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,15.57%,8.96%


Score on test data: 79.72%
P[True Van|Pred Van]= 65.52%


In [13]:
#Decision Tree for No_Faces

X_train=Catalog_train[['No_Faces']]
X_test=Catalog_test[['No_Faces']]

tree_cl2=DecisionTreeClassifier(max_depth=2)

tree_cl2.fit(X_train,y_train)
print_results(X_train,y_train,tree_cl2,'train')
print_results(X_test,y_test,tree_cl2)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,75.47%,0.00%
True Van Gogh,24.53%,0.00%


Score on train data: 75.47%
P[True Van|Pred Van]= 0.00%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,75.47%,0.00%
True Van Gogh,24.53%,0.00%


Score on test data: 75.47%
P[True Van|Pred Van]= 0.00%


In [14]:
# This function was modified from stackexchange user hughdbrown 
# at this link, 
# https://stackoverflow.com/questions/1482308/how-to-get-all-subsets-of-a-set-powerset

# This returns the power set of a set minus the empty set
def powerset_no_empty(s):
    power_set = []
    x = len(s)
    for i in range(1 << x):
        power_set.append([s[j] for j in range(x) if (i & (1 << j))])
            
    return power_set[1:]

In [15]:
#Combine all Models

all_features= ['Pred_Whole','Pred_Face','Pred_Face_Hist','Pred_Hist','No_Faces','Mean_Strokes']
subsets=powerset_no_empty(all_features)

for s in subsets:
    tree=DecisionTreeClassifier(max_depth=len(s)+1)
    X_train=Catalog_train[s]
    X_test=Catalog_test[s]
    
    print(s)
    tree.fit(X_train,y_train)
    #print_results(X_train,y_train,tree,'train')
    print_results(X_test,y_test,tree)
    print('--------------------------')

['Pred_Whole']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,64.15%,11.32%
True Van Gogh,13.21%,11.32%


Score on test data: 75.47%
P[True Van|Pred Van]= 50.00%
--------------------------
['Pred_Face']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,75.47%,0.00%
True Van Gogh,24.53%,0.00%


Score on test data: 75.47%
P[True Van|Pred Van]= 0.00%
--------------------------
['Pred_Whole', 'Pred_Face']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.40%,7.08%
True Van Gogh,15.57%,8.96%


Score on test data: 77.36%
P[True Van|Pred Van]= 55.88%
--------------------------
['Pred_Face_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,71.70%,3.77%
True Van Gogh,21.23%,3.30%


Score on test data: 75.00%
P[True Van|Pred Van]= 46.67%
--------------------------
['Pred_Whole', 'Pred_Face_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,64.62%,10.85%
True Van Gogh,13.68%,10.85%


Score on test data: 75.47%
P[True Van|Pred Van]= 50.00%
--------------------------
['Pred_Face', 'Pred_Face_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,72.17%,3.30%
True Van Gogh,21.23%,3.30%


Score on test data: 75.47%
P[True Van|Pred Van]= 50.00%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.57%,9.91%
True Van Gogh,14.15%,10.38%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.16%
--------------------------
['Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.15%,10.38%


Score on test data: 76.42%
P[True Van|Pred Van]= 52.38%
--------------------------
['Pred_Whole', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,14.15%,10.38%


Score on test data: 77.36%
P[True Van|Pred Van]= 55.00%
--------------------------
['Pred_Face', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.15%,10.38%


Score on test data: 76.42%
P[True Van|Pred Van]= 52.38%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,13.21%,11.32%


Score on test data: 77.36%
P[True Van|Pred Van]= 54.55%
--------------------------
['Pred_Face_Hist', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,62.74%,12.74%
True Van Gogh,12.26%,12.26%


Score on test data: 75.00%
P[True Van|Pred Van]= 49.06%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,13.68%,10.85%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.49%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,15.09%,9.43%


Score on test data: 80.19%
P[True Van|Pred Van]= 66.67%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'Pred_Hist']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,67.92%,7.55%
True Van Gogh,13.21%,11.32%


Score on test data: 79.25%
P[True Van|Pred Van]= 60.00%
--------------------------
['No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,75.47%,0.00%
True Van Gogh,24.53%,0.00%


Score on test data: 75.47%
P[True Van|Pred Van]= 0.00%
--------------------------
['Pred_Whole', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,64.62%,10.85%
True Van Gogh,13.21%,11.32%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.06%
--------------------------
['Pred_Face', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,75.47%,0.00%
True Van Gogh,24.53%,0.00%


Score on test data: 75.47%
P[True Van|Pred Van]= 0.00%
--------------------------
['Pred_Whole', 'Pred_Face', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.15%,10.38%


Score on test data: 76.42%
P[True Van|Pred Van]= 52.38%
--------------------------
['Pred_Face_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,72.17%,3.30%
True Van Gogh,21.70%,2.83%


Score on test data: 75.00%
P[True Van|Pred Van]= 46.15%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,63.68%,11.79%
True Van Gogh,12.74%,11.79%


Score on test data: 75.47%
P[True Van|Pred Van]= 50.00%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,72.64%,2.83%
True Van Gogh,21.23%,3.30%


Score on test data: 75.94%
P[True Van|Pred Van]= 53.85%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,64.62%,10.85%
True Van Gogh,13.21%,11.32%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.06%
--------------------------
['Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,14.15%,10.38%


Score on test data: 77.36%
P[True Van|Pred Van]= 55.00%
--------------------------
['Pred_Whole', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.15%,10.38%


Score on test data: 76.42%
P[True Van|Pred Van]= 52.38%
--------------------------
['Pred_Face', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,14.15%,10.38%


Score on test data: 77.36%
P[True Van|Pred Van]= 55.00%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.51%,8.96%
True Van Gogh,14.15%,10.38%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.66%
--------------------------
['Pred_Face_Hist', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,63.68%,11.79%
True Van Gogh,12.74%,11.79%


Score on test data: 75.47%
P[True Van|Pred Van]= 50.00%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.51%,8.96%
True Van Gogh,13.68%,10.85%


Score on test data: 77.36%
P[True Van|Pred Van]= 54.76%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,15.09%,9.43%


Score on test data: 80.19%
P[True Van|Pred Van]= 66.67%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,67.45%,8.02%
True Van Gogh,13.68%,10.85%


Score on test data: 78.30%
P[True Van|Pred Van]= 57.50%
--------------------------
['Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,15.57%,8.96%


Score on test data: 79.72%
P[True Van|Pred Van]= 65.52%
--------------------------
['Pred_Whole', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,13.21%,11.32%


Score on test data: 78.30%
P[True Van|Pred Van]= 57.14%
--------------------------
['Pred_Face', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,14.62%,9.91%


Score on test data: 80.66%
P[True Van|Pred Van]= 67.74%
--------------------------
['Pred_Whole', 'Pred_Face', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,13.21%,11.32%


Score on test data: 77.36%
P[True Van|Pred Van]= 54.55%
--------------------------
['Pred_Face_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.28%,5.19%
True Van Gogh,13.21%,11.32%


Score on test data: 81.60%
P[True Van|Pred Van]= 68.57%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.57%,9.91%
True Van Gogh,13.21%,11.32%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.33%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,69.34%,6.13%
True Van Gogh,15.57%,8.96%


Score on test data: 78.30%
P[True Van|Pred Van]= 59.38%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.62%,9.91%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.22%
--------------------------
['Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.28%,5.19%
True Van Gogh,12.74%,11.79%


Score on test data: 82.08%
P[True Van|Pred Van]= 69.44%
--------------------------
['Pred_Whole', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,63.68%,11.79%
True Van Gogh,10.85%,13.68%


Score on test data: 77.36%
P[True Van|Pred Van]= 53.70%
--------------------------
['Pred_Face', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.87%,6.60%
True Van Gogh,11.32%,13.21%


Score on test data: 82.08%
P[True Van|Pred Van]= 66.67%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,14.15%,10.38%


Score on test data: 77.36%
P[True Van|Pred Van]= 55.00%
--------------------------
['Pred_Face_Hist', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.40%,7.08%
True Van Gogh,12.26%,12.26%


Score on test data: 80.66%
P[True Van|Pred Van]= 63.41%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,67.92%,7.55%
True Van Gogh,13.68%,10.85%


Score on test data: 78.77%
P[True Van|Pred Van]= 58.97%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,13.21%,11.32%


Score on test data: 82.08%
P[True Van|Pred Van]= 70.59%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,67.45%,8.02%
True Van Gogh,12.26%,12.26%


Score on test data: 79.72%
P[True Van|Pred Van]= 60.47%
--------------------------
['No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,71.23%,4.25%
True Van Gogh,16.04%,8.49%


Score on test data: 79.72%
P[True Van|Pred Van]= 66.67%
--------------------------
['Pred_Whole', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,13.21%,11.32%


Score on test data: 77.36%
P[True Van|Pred Van]= 54.55%
--------------------------
['Pred_Face', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,71.23%,4.25%
True Van Gogh,14.62%,9.91%


Score on test data: 81.13%
P[True Van|Pred Van]= 70.00%
--------------------------
['Pred_Whole', 'Pred_Face', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.98%,8.49%
True Van Gogh,14.62%,9.91%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.85%
--------------------------
['Pred_Face_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.87%,6.60%
True Van Gogh,13.21%,11.32%


Score on test data: 80.19%
P[True Van|Pred Van]= 63.16%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,14.62%,9.91%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.22%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.87%,6.60%
True Van Gogh,13.21%,11.32%


Score on test data: 80.19%
P[True Van|Pred Van]= 63.16%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.09%,10.38%
True Van Gogh,12.26%,12.26%


Score on test data: 77.36%
P[True Van|Pred Van]= 54.17%
--------------------------
['Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,13.21%,11.32%


Score on test data: 82.08%
P[True Van|Pred Van]= 70.59%
--------------------------
['Pred_Whole', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.51%,8.96%
True Van Gogh,14.15%,10.38%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.66%
--------------------------
['Pred_Face', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,68.87%,6.60%
True Van Gogh,13.21%,11.32%


Score on test data: 80.19%
P[True Van|Pred Van]= 63.16%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,66.04%,9.43%
True Van Gogh,12.26%,12.26%


Score on test data: 78.30%
P[True Van|Pred Van]= 56.52%
--------------------------
['Pred_Face_Hist', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,64.62%,10.85%
True Van Gogh,13.21%,11.32%


Score on test data: 75.94%
P[True Van|Pred Van]= 51.06%
--------------------------
['Pred_Whole', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.57%,9.91%
True Van Gogh,11.79%,12.74%


Score on test data: 78.30%
P[True Van|Pred Van]= 56.25%
--------------------------
['Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,69.81%,5.66%
True Van Gogh,14.15%,10.38%


Score on test data: 80.19%
P[True Van|Pred Van]= 64.71%
--------------------------
['Pred_Whole', 'Pred_Face', 'Pred_Face_Hist', 'Pred_Hist', 'No_Faces', 'Mean_Strokes']


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,65.57%,9.91%
True Van Gogh,13.21%,11.32%


Score on test data: 76.89%
P[True Van|Pred Van]= 53.33%
--------------------------


In [17]:
#The best configuration seems to be:3 features No_Faces, Mean_Strokes, and Pred_Hist 

tree=DecisionTreeClassifier(max_depth=4)
X_train=Catalog_train[['No_Faces','Mean_Strokes','Pred_Hist']]
X_test=Catalog_test[['No_Faces','Mean_Strokes','Pred_Hist']]
    
tree.fit(X_train,y_train)
print_results(X_train,y_train,tree,'train')
print_results(X_test,y_test,tree)

Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,73.23%,2.24%
True Van Gogh,8.37%,16.16%


Score on train data: 89.39%
P[True Van|Pred Van]= 87.82%


Unnamed: 0,Pred. Other,Pred. Van Gog
True Other,70.75%,4.72%
True Van Gogh,13.21%,11.32%


Score on test data: 82.08%
P[True Van|Pred Van]= 70.59%
