In [99]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from scipy.stats import multivariate_normal as norm
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [100]:
file_data_b_csv=r'/home/jacobo15defrutos/AVS9/5-Pruebas/total_imgs.csv'
df= pd.read_csv(file_data_b_csv)
df_train, df_test = train_test_split(df, test_size=0.2,random_state=42)
print("Train shape: ",df_train.shape)
print("Test shape: ",df_test.shape)

Train shape:  (268, 2)
Test shape:  (67, 2)


In [101]:
labels_train= df_train['Label']
train_list=[]
target_size = (224, 224)
folder= '/home/jacobo15defrutos/AVS9/5-Pruebas/Total_images/'
for name in df_train['Name']:
    path= folder + name
    img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img= cv2.resize(img,target_size)
    img=img.ravel()
    train_list.append(img)
train= np.array(train_list)
train.shape

(268, 50176)

In [102]:
lda=LDA(n_components=2)
lda.fit(train,labels_train)

In [103]:
train_SANO_list=[]
train_CMV_list=[]
train_SURV_list=[]
target_size = (224, 224)
folder= '/home/jacobo15defrutos/AVS9/5-Pruebas/Total_images/'
for name in df['Name']:
    path= folder + name
    img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img= cv2.resize(img,target_size)
    img=img.ravel()
    if 'SANO' in name:
        train_SANO_list.append(img)
    elif 'CMV' in name:
        train_CMV_list.append(img)
    elif 'SURV' in name:
        train_SURV_list.append(img)
train_SANO= np.array(train_SANO_list)
train_CMV= np.array(train_CMV_list)
train_SURV= np.array(train_SURV_list)
train_SURV.shape

(92, 50176)

In [104]:
x_sano=lda.transform(train_SANO)
x_cmv=lda.transform(train_CMV)
x_surv=lda.transform(train_SURV)

In [105]:
mean_sano=np.mean(x_sano,axis=0)
mean_cmv=np.mean(x_cmv,axis=0)
mean_surv=np.mean(x_surv,axis=0)

cvar_sano=np.cov(x_sano.T)
cvar_cmv=np.cov(x_cmv.T)
cvar_surv=np.cov(x_surv.T)

rv_c0= norm(mean_sano,cvar_sano)
rv_c1= norm(mean_cmv,cvar_cmv)
rv_c2= norm(mean_surv,cvar_surv)

In [106]:
labels_test= df_test['Label']
test_list=[]
target_size = (224, 224)
folder= '/home/jacobo15defrutos/AVS9/5-Pruebas/Total_images/'
for name in df_test['Name']:
    path= folder + name
    img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img= cv2.resize(img,target_size)
    img_dense = img.ravel()
    test_list.append(img_dense)
test= np.array(test_list)
test.shape

(67, 50176)

In [107]:
test_SANO_list=[]
test_CMV_list=[]
test_SURV_list=[]
target_size = (224, 224)
folder= '/home/jacobo15defrutos/AVS9/5-Pruebas/Total_images/'
for name in df_test['Name']:
    path= folder + name
    img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img= cv2.resize(img,target_size)
    img=img.ravel()
    if 'SANO' in name:
        test_SANO_list.append(img)
    elif 'CMV' in name:
        test_CMV_list.append(img)
    elif 'SURV' in name:
        test_SURV_list.append(img)
test_SANO= np.array(test_SANO_list)
test_CMV= np.array(test_CMV_list)
test_SURV= np.array(test_SURV_list)
label_test_sano=0*np.ones(len(test_SANO))
label_test_cmv=1*np.ones(len(test_CMV))
label_test_surv=2*np.ones(len(test_SURV))

### Calculate the total test accuracy

In [108]:
P_c0= train_SANO.shape[0]/train.shape[0]
P_c1= train_CMV.shape[0]/train.shape[0]
P_c2= train_SURV.shape[0]/train.shape[0]
print(P_c0,P_c1,P_c2)

0.5783582089552238 0.3283582089552239 0.34328358208955223


In [110]:
count=0
count0=0
count1=0
count2=0
x_test=lda.transform(test)
rv_x=np.zeros(len(test))
for i in range (len(test)):
    rv_c0_0= rv_c0.pdf(x_test[i]) 
    rv_c1_1= rv_c1.pdf(x_test[i])
    rv_c2_2= rv_c2.pdf(x_test[i])
    if (rv_c0_0*P_c0)>(rv_c1_1*P_c1) and (rv_c0_0*P_c0)>(rv_c2_2*P_c2) :
        rv_x[i]=0
        if np.array(labels_test)[i]==0:
            count=count+1
            count0=count0+1
    elif (rv_c1_1*P_c1)>(rv_c0_0*P_c0) and (rv_c1_1*P_c1)>(rv_c2_2*P_c2):  
        rv_x[i]=1
        if np.array(labels_test)[i]==1:
            count=count+1
            count1=count1+1
    elif (rv_c2_2*P_c2)>(rv_c0_0*P_c0) and (rv_c2_2*P_c2)>(rv_c1_1*P_c1):  
        rv_x[i]=2
        if np.array(labels_test)[i]==2:
            count=count+1
            count2=count2+1

In [111]:
accuracy=count/len(labels_test)
print("Total Accuracy: ",accuracy*100,"%")
accuracy0=count0/len(label_test_sano)
accuracy1=count1/len(label_test_cmv)
accuracy2=count2/len(label_test_surv)
print("Class 0 Accuracy: ",accuracy0*100,"%")
print("Class 1 Accuracy: ",accuracy1*100,"%")
print("Class 2 Accuracy: ",accuracy2*100,"%")

Total Accuracy:  38.80597014925373 %
Class 0 Accuracy:  62.5 %
Class 1 Accuracy:  30.76923076923077 %
Class 2 Accuracy:  17.647058823529413 %


In [112]:
x_test=lda.transform(test)
p_sano = rv_c0.pdf(x_test)
p_cmv = rv_c1.pdf(x_test)
p_surv = rv_c2.pdf(x_test)

In [113]:
classes= np.array([0,1,2])
pred= np.argmax(np.c_[p_sano,p_cmv,p_surv],axis=1)
pred

array([1, 0, 0, 2, 0, 1, 0, 2, 1, 1, 0, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 0, 2, 1, 1, 2, 1, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 2, 0, 2,
       1])

In [114]:
from sklearn.metrics import accuracy_score
acc_total= accuracy_score(labels_test,pred)
print("Total Accuracy: ",acc_total*100,"%")

Total Accuracy:  43.28358208955223 %


### Calculate each class accuracy

In [115]:
acc_sano = np.sum(pred[labels_test == 0] == 0)/len(label_test_sano) * 100
acc_cmv = np.sum(pred[labels_test == 1] == 1)/len(label_test_cmv) * 100
acc_surv = np.sum(pred[labels_test == 2] == 2)/len(label_test_surv) * 100
print("Class SANO Accuracy: {:.2f} %".format(acc_sano))
print("Class CMV Accuracy: {:.2f} %".format(acc_cmv))
print("Class SURV Accuracy: {:.2f} %".format(acc_surv))

Class SANO Accuracy: 54.17 %
Class CMV Accuracy: 46.15 %
Class SURV Accuracy: 23.53 %
