We will create a Unified Catalog that contains the following columns:
    1. AUTHOR
    2. Vangogh
    3. URL
    4. Image
    5. Mean_Strokes
    6. Stroke_Patch (Now this includes the path with min number) (Normalized 100x100)
    7. Normalized_Image_B (100x100)
    8. Normalized_Image_G
    9. Normalized_Image_R
    10. No_Faces
    11. No_Eyes
    12. Patch_Face (100x100 grayscale not normalized)
    13. GNHist
    14. OGNHist
    15. Hist_Face

The idea is to later use the following methods:
    1. Logistic regresion for the Mean_Strokes
    2. MLP for the Stroke_Patch
    3. MLP for the 3 color channels together
Finally over those, wi'll make an Ensemble learning. 

In [1]:
#Import all the packages
import cv2
import numpy as np
import matplotlib.pyplot as plt
from urllib.request import urlopen
from scipy import ndimage
from ripser import ripser, lower_star_img
from persim import plot_diagrams 
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import tree
from sklearn.metrics import confusion_matrix
from matplotlib.patches import Rectangle
from joblib import dump, load

In [2]:
#General purpose functions
def get_image(url):
    resp = urlopen(url)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    return image

def plot_image(image):
    plt.figure(figsize=(15,8))
    b,g,r = cv2.split(image)       # get b,g,r
    rgb_img = cv2.merge([r,g,b])     # switch it to rgb

    plt.imshow(rgb_img)
    plt.xticks([]); plt.yticks([])   # to hide tick values on X and Y axis

def preprocess_image_gray(image):
    image=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = ndimage.uniform_filter(image.astype(np.float64), size=5)
    image += 0.01 * np.random.randn(*image.shape)  
    return image

def normalize_image(image,st_scaler=StandardScaler(),minmax_scaler=MinMaxScaler()):
    img=image.copy()
    img=cv2.resize(img,(100,100))
    img_normal=minmax_scaler.fit_transform(st_scaler.fit_transform(img.reshape(-1, 1)))
    img_normal=(img_normal*200).astype(np.uint8)
    return img_normal.ravel()

In [3]:
#Functions for the Strokes Parts
def get_patches(img,patch_size=(200,200)):
    image=img.copy()
    N=image.shape[0]; M=image.shape[1]
    x_size=y_size=int(min(N,M)/5)
    Patches=[image[y:(y+y_size),x:(x+x_size)].copy() for x in [0,M-x_size] for y in [0,N-y_size]]

    #Choose one random in the middle
    x=np.random.randint(x_size,M-2*x_size)
    y=np.random.randint(y_size,N-2*y_size)

    Patches.append(image[y:(y+y_size),x:(x+x_size)].copy())
    image[y:(y+y_size),x:(x+x_size)]=0
    
    #Resize the Patches
    Patches=[cv2.resize(img,patch_size) for img in Patches]
        
    return Patches

def get_max_strokes_patch(url,thresh=10):
    #Read Image
    image=get_image(url)
    
    #PreProcess
    image=preprocess_image_gray(image)
           
    #Get Patches
    Patches=get_patches(image)
       
    #Compute and plot the Persistance Diagrams
    Dgms=[]
    No_Strokes=[]
    for i in range(5):
        patch=Patches[i]
        
        #Find the lower_star persistence using ripser
        #Cite Tom Needham
        dgm = lower_star_img(-patch)
        Dgms.append(dgm)
        
        #Get the indices of the persistence classes with life > thresh
        idxs = np.arange(dgm.shape[0])
        idxs = idxs[np.abs(dgm[:, 1] - dgm[:, 0]) > thresh]
        
      
        #Translate back the persistence classes into points of the original image.
        bidxs=[]
        for idx in idxs:
            bidx = np.argmin(np.abs(patch + dgm[idx, 0]))
            bidxs.append(bidx)
        bidxs=list(np.unique(bidxs))
        
        No_Strokes.append(len(bidxs))
        
    #Return the min, max and mean no. of Brush Strokes
    return np.mean(No_Strokes), normalize_image(Patches[np.argmax(No_Strokes)])

def add_no_strokes_and_patch(Catalog):
    Catalog['Stroke_Patch']=0
    Catalog.Stroke_Patch=Catalog.Stroke_Patch.astype('object')
    Catalog['Mean_Strokes']=0
    N=Catalog.shape[0]
    All_Strokes=[]
    All_Patches=[]
    for ind in Catalog.index:
        url=Catalog.URL[ind]
        mean_strokes,stroke_patch=get_max_strokes_patch(url,10)
        All_Strokes.append(mean_strokes)
        All_Patches.append(stroke_patch)
        
    Catalog['Stroke_Patch']=All_Patches
    Catalog['Mean_Strokes']=All_Strokes
    
    return Catalog


In [4]:
#Function to Normalize each color channel
def add_normalized_colors(Catalog):
    Catalog['Normalized_Image_B']=0
    Catalog.Normalized_Image_B=Catalog.Normalized_Image_B.astype('object')
    
    Catalog['Normalized_Image_G']=0
    Catalog.Normalized_Image_G=Catalog.Normalized_Image_G.astype('object')
    
    Catalog['Normalized_Image_R']=0
    Catalog.Normalized_Image_R=Catalog.Normalized_Image_R.astype('object')
    
    N=Catalog.shape[0]
    All_B=[]; All_G=[]; All_R=[]
    st_scaler=StandardScaler();minmax_scaler=MinMaxScaler()
    for i in range(N):
        image=Catalog.Image[i]
        All_B.append(normalize_image(image[:,:,0],st_scaler,minmax_scaler))
        All_G.append(normalize_image(image[:,:,1],st_scaler,minmax_scaler))
        All_R.append(normalize_image(image[:,:,2],st_scaler,minmax_scaler))
    Catalog['Normalized_Image_B']=All_B
    Catalog['Normalized_Image_G']=All_G
    Catalog['Normalized_Image_R']=All_R
    
    return Catalog   

In [5]:
#Functions to get Histograms
def gray_histogram(image,preprocess=True):
    #PreProcess
    if preprocess:
        image=preprocess_image_gray(image)
    image=MinMaxScaler().fit_transform(StandardScaler().fit_transform(image.reshape(-1, 1)))
    image=np.multiply(image,255)
    
    #Histogram
    hist = cv2.calcHist([np.float32(image)], [0], None, [256], [0, 256])
    hist = np.divide(hist,max(hist))
    return hist

def ordered_histogram(hist):
    order_hist=np.sort(hist.reshape(1,-1))
    order_hist=np.divide(order_hist.reshape(-1,1),order_hist[0,255])
    return order_hist

In [6]:
#Function to append histogram to an imported Catalog
def append_histograms(Catalog):
    N=Catalog.shape[0]
    histogram=np.zeros((N,256))
    Catalog['GNHist']=''
    o_histogram=np.zeros((N,256))
    Catalog['OGNHist']=''
    for ind in Catalog.index:
        image=Catalog.Image[ind]
        hist=gray_histogram(image).reshape(1,-1)
        histogram[ind]=hist
        o_histogram[ind]=ordered_histogram(hist).reshape(1,-1)
    Catalog.GNHist=histogram.tolist()
    Catalog.OGNHist=o_histogram.tolist()
    return Catalog

In [7]:
#Functions to extract faces
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
face2_cascade=cv2.CascadeClassifier('haarcascade_profileface.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

def extract_faces_eyes(image):
    gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray,1.3,3)
    faces2=face2_cascade.detectMultiScale(gray,1.3,4)
    eyes=eye_cascade.detectMultiScale(gray,1.3,4)
    
    faces=list(faces)+list(faces2)   
    
    if len(faces)>0:  
        sizes_faces=[f[2]*f[3] for f in faces]
        f=faces[np.argmax(sizes_faces)]
        Face_Patch=gray[f[1]:(f[1]+f[3]),f[0]:(f[0]+f[2])]
        Face_Patch=cv2.resize(Face_Patch,(100,100))
        Hist=gray_histogram(Face_Patch,False)
        Face_Patch=Face_Patch.ravel()       
    else:
        Face_Patch=None
        Hist=None
    
    return len(faces),len(eyes), Face_Patch, Hist

In [8]:
def add_Faces_Eyes_Catalog(Catalog):
    Catalog['Face_Patch']=0
    Catalog.Face_Patch=Catalog.Face_Patch.astype('object')
    Catalog['No_Faces']=0
    Catalog['No_Eyes']=0
    Catalog['Hist_Face']=0
    Catalog.Hist_Face=Catalog.Hist_Face.astype('object')
    N=Catalog.shape[0]
    print('Need:', N)
    
    All_No_Faces=[]
    All_No_Eyes=[]
    All_Patches=[]
    All_Hists=[]
    for ind in Catalog.index:
        url=Catalog.URL[ind]
        no_faces,no_eyes,face_patch,hist=extract_faces_eyes(get_image(url))
        All_No_Faces.append(no_faces)
        All_No_Eyes.append(no_eyes)
        All_Patches.append(face_patch)
        All_Hists.append(hist)
        
    Catalog.No_Faces=All_No_Faces
    Catalog.No_Eyes=All_No_Eyes
    Catalog.Face_Patch=All_Patches
    Catalog.Hist_Face=All_Hists
    
    return Catalog    

In [9]:
#Compute Patches and Strokes
#Catalog=pd.read_pickle('Catalog_train.pkl')
#Catalog=add_no_strokes_and_patch(Catalog)
#pd.to_pickle(Catalog,'Catalogs/Catalog_train_w_strokes.pkl',protocol=3)

In [10]:
#Normalize channels and save
#Catalog=pd.read_pickle('Catalogs/Catalog_train_w_strokes.pkl')
#Catalog=add_normalized_colors(Catalog)

#del Catalog['URL_small']
#pd.to_pickle(Catalog,'Catalogs/Catalog_train_w_strokes_and_channels.pkl',protocol=3)

In [11]:
#Count Faces
Catalog=pd.read_pickle('Catalogs/Catalog_train_w_strokes_and_channels.pkl')
Catalog=add_Faces_Eyes_Catalog(Catalog)

Need: 1060


TypeError: to_pickle() missing 1 required positional argument: 'filepath_or_buffer'

In [12]:
pd.to_pickle(Catalog,'Catalogs/Catalog_train_w_strokes_and_channels_and_faces.pkl',protocol=3)

In [13]:
#Compute Histograms
Catalog=pd.read_pickle('Catalogs/Catalog_train_w_strokes_and_channels_and_faces.pkl')
Catalog=append_histograms(Catalog)
pd.to_pickle(Catalog,'Catalogs/Catalog_train_w_strokes_and_channels_and_faces_and_hists.pkl',protocol=3)

# Now the Test Sets

In [14]:
#Compute Patches and Strokes for the test
Catalog=pd.read_pickle('Catalog_test.pkl')
Catalog=add_no_strokes_and_patch(Catalog)
pd.to_pickle(Catalog,'Catalogs/Catalog_test_w_strokes.pkl',protocol=3)

In [15]:
#Normalize channels and save for the test
Catalog=pd.read_pickle('Catalogs/Catalog_test_w_strokes.pkl')
Catalog=add_normalized_colors(Catalog)

del Catalog['URL_small']
pd.to_pickle(Catalog,'Catalogs/Catalog_test_w_strokes_and_channels.pkl',protocol=3)

In [16]:
#Count Faces
Catalog=pd.read_pickle('Catalogs/Catalog_test_w_strokes_and_channels.pkl')
Catalog=add_Faces_Eyes_Catalog(Catalog)
pd.to_pickle(Catalog,'Catalogs/Catalog_test_w_strokes_and_channels_and_faces.pkl',protocol=3)

Need: 265


In [17]:
#Compute Histograms
Catalog=pd.read_pickle('Catalogs/Catalog_test_w_strokes_and_channels_and_faces.pkl')
Catalog=append_histograms(Catalog)
pd.to_pickle(Catalog,'Catalogs/Catalog_test_w_strokes_and_channels_and_faces_and_hists.pkl',protocol=3)