In [1]:
import numpy as np
import cv2
from sklearn.metrics import confusion_matrix

In [None]:
def patch_prob_ensemble(data, patch_idx, grpn, grpsize, total_trials):
    '''
    Function: load probability data, compute ensemble prediction for a given patch index
    Input Parameters:
        data: probability file of the format [index probability_art_1 probability_art_2 probability_art_3 probability_art_4]
        patch_idx: patch index that we need the ensemble prediction for
        grpn: ensemble number
        grpsize: ensemble size 
        total_trials: total number of trials we have in the data for that particular patch size
    Output:
        patch_idx: patch index that we need the ensemble prediction for
        max_prob: average confidence in that particular prediction
        max_prob_pid: ensemble prediction for that particular patch
        
    '''    
    pred_list = data[data[:,0] == patch_idx]
    #pred_list = data[patch_idx::1110]
    patch_pred_mean = np.mean(pred_list[np.arange(total_trials)//grpsize==grpn,1:5],axis=0)
    max_prob = np.amax(patch_pred_mean)
    max_prob_pid = np.argmax(patch_pred_mean)
    return patch_idx, max_prob, max_prob_pid

In [None]:
'''
MAIN:
Inputs probability data files and computes ensemble accuracy
'''
psizes = [120,140,224]

for patch_size in psizes:
    all_idx = np.unique(np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p1_ps'+repr(patch_size)+'_9010.csv')[:,0])
    
    data1 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p1_ps'+repr(patch_size)+'_9010.csv')
    data2 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p2_ps'+repr(patch_size)+'_9010.csv')
    data3 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p3_ps'+repr(patch_size)+'_9010.csv')
    data4 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p4_ps'+repr(patch_size)+'_9010.csv')
  
    #ensembles of 10
    total_trials = len(data1[::len(all_idx)])
    grpsize = 20
    grpnmbr = total_trials//grpsize
    
    fi_acc = np.zeros(grpnmbr+1)
    fi_acc[0] = patch_size
    
    for j in range(grpnmbr):
        prob_list_p1 = np.zeros((len(all_idx),3))
        prob_list_p2 = np.zeros((len(all_idx),3))
        prob_list_p3 = np.zeros((len(all_idx),3))
        prob_list_p4 = np.zeros((len(all_idx),3))

        for i in range(len(all_idx)):
            prob_list_p1[i] = patch_prob_ensemble(data1, i, j, grpsize, total_trials)
            prob_list_p2[i] = patch_prob_ensemble(data2, i, j, grpsize, total_trials)
            prob_list_p3[i] = patch_prob_ensemble(data3, i, j, grpsize, total_trials)
            prob_list_p4[i] = patch_prob_ensemble(data4, i, j, grpsize, total_trials)
        total_right = len(all_idx[prob_list_p1[:,2]==0])+len(all_idx[prob_list_p2[:,2]==1])+len(all_idx[prob_list_p3[:,2]==2])+len(all_idx[prob_list_p4[:,2]==3])
        acc = total_right/( len(all_idx) *4)
        fi_acc[j+1] = acc
        #print(acc)
    res = np.reshape(fi_acc,(1,grpnmbr+1))
    with open('ensemble_analysis_100_minus.csv','a') as f:
        np.savetxt(f, res, fmt='%s', delimiter=',')
    print(fi_acc)


In [None]:
def patch_prob_ensemble_f(data, patch_idx, grpn, grpsize, total_trials):
    '''
    Function: load probability data, compute ensemble prediction for a given patch index
    Input Parameters:
        data: probability file of the format [index probability_art_1 probability_art_2 probability_art_3 probability_art_4]
        patch_idx: patch index that we need the ensemble prediction for
        grpn: ensemble number
        grpsize: ensemble size 
        total_trials: total number of trials we have in the data for that particular patch size
    Output:
        max_prob_pid: ensemble prediction for that particular patch
    '''
    pred_list = data[data[:,0] == patch_idx]
    #pred_list = data[patch_idx::1110]
    patch_pred_mean = np.mean(pred_list[np.arange(total_trials)//grpsize==grpn,1:5],axis=0)
    max_prob = np.amax(patch_pred_mean)
    max_prob_pid = np.argmax(patch_pred_mean)
    return max_prob_pid

In [None]:
'''
MAIN:
Computes Confusion Matrices for Individual Artists
and subsequently does
-F1 SCORE ANALYSIS
-Recall SCORE ANALYSIS
-Precision SCORE ANALYSIS
'''

#patch sizes to be analyzed
psizes = [10,20,40,80,100,120,140,160,180,200,224,250] 

for patch_size in psizes:
    #obtain length of files
    all_idx = np.unique(np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p1_ps'+repr(patch_size)+'_9010.csv')[:,0])
    
    #load probability files
    data1 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p1_ps'+repr(patch_size)+'_9010.csv')
    data2 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p2_ps'+repr(patch_size)+'_9010.csv')
    data3 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p3_ps'+repr(patch_size)+'_9010.csv')
    data4 = np.loadtxt('/home/gxs372/ml_art_hist/height_SI_fig/PS'+repr(patch_size)+'/heapmap_p4_ps'+repr(patch_size)+'_9010.csv')

    total_trials = len(data1[::len(all_idx)])
    if patch_size==10 :
        grpsize = 10
    else :
        grpsize = 20   
        
    grpnmbr = total_trials//grpsize
    
    #Initiate arrays
    f1 = np.zeros((4,grpnmbr+1))
    f1[:,0] = np.repeat(patch_size,4)
    precision = np.zeros((4,grpnmbr+1))
    precision[:,0] = np.repeat(patch_size,4)
    recall = np.zeros((4,grpnmbr+1))
    recall[:,0] = np.repeat(patch_size,4)

    for j in range(grpnmbr):
        prob_list_p1 = np.zeros((len(all_idx),1))
        prob_list_p2 = np.zeros((len(all_idx),1))
        prob_list_p3 = np.zeros((len(all_idx),1))
        prob_list_p4 = np.zeros((len(all_idx),1))

        for i in range(len(all_idx)):
            prob_list_p1[i] = patch_prob_ensemble_f(data1, i, j, grpsize, total_trials)
            prob_list_p2[i] = patch_prob_ensemble_f(data2, i, j, grpsize, total_trials)
            prob_list_p3[i] = patch_prob_ensemble_f(data3, i, j, grpsize, total_trials)
            prob_list_p4[i] = patch_prob_ensemble_f(data4, i, j, grpsize, total_trials)

        y_true = np.repeat(np.array([0,1,2,3]),len(prob_list_p1))
        y_pred = np.concatenate((np.reshape(prob_list_p1,[len(prob_list_p1)]),np.reshape(prob_list_p2,[len(prob_list_p1)]),np.reshape(prob_list_p3,[len(prob_list_p1)]),np.reshape(prob_list_p4,[len(prob_list_p1)])))
        cm = confusion_matrix(y_true, y_pred)

        #print(cm)
        for i in range(4):

            tp = cm[i,i] 
            fp = np.sum(cm[i]) - tp
            fn = np.sum(cm[:,i]) - tp
            tn = np.sum(cm) - tp - fp - fn

            f1[i,j+1] = tp/(tp+0.5*(fp+fn))
            precision[i,j+1] = tp/(tp+fp)
            recall[i,j+1] = tp/(tp+fn)
            

    res_1 = np.reshape(f1[0],(1,grpnmbr+1))    
    res_2 = np.reshape(f1[1],(1,grpnmbr+1))    
    res_3 = np.reshape(f1[2],(1,grpnmbr+1))    
    res_4 = np.reshape(f1[3],(1,grpnmbr+1))    
    
    with open('rev_f1_ensemble_art_1.csv','a') as f:
        np.savetxt(f, res_1, fmt='%s', delimiter=',')
    
    with open('rev_f1_ensemble_art_2.csv','a') as f:
        np.savetxt(f, res_2, fmt='%s', delimiter=',')    
    
    with open('rev_f1_ensemble_art_3.csv','a') as f:
        np.savetxt(f, res_3, fmt='%s', delimiter=',')    
    
    with open('rev_f1_ensemble_art_4.csv','a') as f:
        np.savetxt(f, res_4, fmt='%s', delimiter=',')      
                                   
    prec_1 = np.reshape(precision[0],(1,grpnmbr+1))    
    prec_2 = np.reshape(precision[1],(1,grpnmbr+1))    
    prec_3 = np.reshape(precision[2],(1,grpnmbr+1))    
    prec_4 = np.reshape(precision[3],(1,grpnmbr+1))  
    
    rec_1 = np.reshape(recall[0],(1,grpnmbr+1))    
    rec_2 = np.reshape(recall[1],(1,grpnmbr+1))    
    rec_3 = np.reshape(recall[2],(1,grpnmbr+1))    
    rec_4 = np.reshape(recall[3],(1,grpnmbr+1))        
    
    with open('precision_ensemble_art_1.csv','a') as f:
        np.savetxt(f, prec_1, fmt='%s', delimiter=',')
    
    with open('precision_ensemble_art_2.csv','a') as f:
        np.savetxt(f, prec_2, fmt='%s', delimiter=',')    
    
    with open('precision_ensemble_art_3.csv','a') as f:
        np.savetxt(f, prec_3, fmt='%s', delimiter=',')    
    
    with open('precision_ensemble_art_4.csv','a') as f:
        np.savetxt(f, prec_4, fmt='%s', delimiter=',')    
        
        
    with open('recall_ensemble_art_1.csv','a') as f:
        np.savetxt(f, rec_1, fmt='%s', delimiter=',')
    
    with open('recall_ensemble_art_2.csv','a') as f:
        np.savetxt(f, rec_2, fmt='%s', delimiter=',')    
    
    with open('recall_ensemble_art_3.csv','a') as f:
        np.savetxt(f, rec_3, fmt='%s', delimiter=',')    
    
    with open('recall_ensemble_art_4.csv','a') as f:
        np.savetxt(f, rec_4, fmt='%s', delimiter=',')      