## Built-in libraries

In [3]:
import time
import itertools
import sys
import xlsxwriter as xl
import random
from random import randint
import pandas
from pandas import read_csv

import cv2
import numpy as np
import pickle
from sklearn.cluster import KMeans
from sklearn import model_selection
from sklearn.model_selection import train_test_split


## Modules for visulization

In [4]:
from matplotlib import pyplot as plt

In [5]:
from functional_modules import feature_computation_module as fc

from functional_modules import file_locations_module as flocate
from functional_modules import pca_module

from functional_modules import DTreeModel as dtree
from functional_modules import GaussianNBmodel as gauss
from functional_modules import KNeighborModel as knbr
from functional_modules import LDAmodel as lda
from functional_modules import LogRegModel as log
from functional_modules import RandForestModel as rf
from functional_modules import SVCmodel as svc

# VLAD

In [4]:
feature_count = 100



In [5]:
def VLAD(X, visualDictionary):
    predictedLabels = visualDictionary.predict(X)
    centers = visualDictionary.cluster_centers_
    labels = visualDictionary.labels_
    k = visualDictionary.n_clusters

    m,d = X.shape
    V=np.zeros([k,d])
    #computing the differences

    # for all the clusters (visual words)
    for i in range(k):
        # if there is at least one descriptor in that cluster
        if np.sum(predictedLabels==i)>0:
            # add the diferences
            V[i]=np.sum(X[predictedLabels==i,:]-centers[i],axis=0)

    V = V.flatten()
    # power normalization, also called square-rooting normalization
    V = np.sign(V)*np.sqrt(np.abs(V))

    # L2 normalization
    V = V/np.sqrt(np.dot(V,V))
    return V


In [6]:
def getVLADDescriptors(path,visualDictionary,low,high,n):
    descriptors=list()
    idImage =list()
    for i in range(n):
        print('Data-{}'.format(i+1))
        img = np.load(path + 'data{}.npy'.format(i+1), allow_pickle=True)
        l, h = fc.get_high_low_gray_level(img, i+1)
        img = fc.change_image_dynamic_range(img, i+1, l, h)

        final_des = list()
        for j in range(low,high):
            cv2.imwrite('photo.jpg',img[j])
            img1 = cv2.imread('photo.jpg',0)
            kp, des = describeORB(img1)
            
            if des is not None:
                r = des.shape[0]
                c = des.shape[1]
                row = list()
                if r>=feature_count:
                    for k in range(feature_count):
                        for m in range(c):
                            row.append(des[k,m])
                else:
                    for k in range(r):
                        for m in range(c):
                            row.append(des[k,m])

                    for k in range(feature_count-r):
                        for m in range(c):
                            row.append(0)

            row = np.asarray(row)
            final_des.append(row)
        else:
            row = list()

            for k in range(feature_count):
                    for m in range(32):
                        row.append(0)

            row = np.asarray(row)
            final_des.append(row)

        final_des = np.asarray(final_des)
        print('des calculated..')

        print('VLAD-method called ..')
        v=VLAD(final_des,visualDictionary)
        print('VLAD recieved...')
        descriptors.append(v)
        idImage.append(i)

    #list to array
    descriptors = np.asarray(descriptors)
    return descriptors



In [7]:
def  kMeansDictionary(training, k):
    '''
    :param training: Descriptors obtained from SIFT,ORB, or something else
    :param k: number of visual words or clusters..
    :return: returns the words
    '''
    #K-means algorithm
    print('Inside kMeansDictionary function.')
    est = KMeans(n_clusters=k,init='k-means++',tol=0.0001,verbose=1).fit(training)
    #centers = est.cluster_centers_
    #labels = est.labels_
    #est.predict(X)
    print('Exiting kMeansDictionary')
    return est



In [8]:
def describeORB( image):
    #An efficient alternative to SIFT or SURF
    #doc http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_feature2d/py_orb/py_orb.html
    #ORB is basically a fusion of FAST keypoint detector and BRIEF descriptor
    #with many modifications to enhance the performance
    orb=cv2.ORB_create()
    kp, des=orb.detectAndCompute(image, None) #Image should be .jpeg format
    return kp,des



In [9]:
def all_descriptors(loc, low, high, n, descriptors, group):
    '''
    :param loc: Where the files are
    :param low: lowest number of the slice that would be selected - 40
    :param high: highest number of the slice that would be selected - 150+1
    :param n: numbers of files in the location
    :return: a list of descriptors ...
    '''
    size = 0
    h_len = 0
    for i in range(n):
        print('{}-Data-{}'.format(group, i+1))
        img = np.load(loc+'data{}.npy'.format(i+1), allow_pickle=True)
        l, h = fc.get_high_low_gray_level(img, i+1)
        img = fc.change_image_dynamic_range(img, i+1, l, h)

        final_des = list()
        for j in range(low, high):
            cv2.imwrite('photo.jpg', img[j])
            img1 = cv2.imread('photo.jpg', 0)
            kp,des = describeORB(img1)
            
            #print(des.shape)
            '''
            if len(kp) > h_len:
                h_len = len(kp)
                print(h_len)
            '''

            if des is not None:
                r = des.shape[0]
                c = des.shape[1]
                #size = size + 50*c
                row = list()
                if r>=feature_count:
                    for k in range(feature_count):
                        for m in range(c):
                            row.append(des[k,m])
                else:
                    for k in range(r):
                        for m in range(c):
                            row.append(des[k,m])

                    for k in range(feature_count-r):
                        for m in range(c):
                            row.append(0)

                row = np.asarray(row)
                final_des.append(row)
            else:
                row = list()
                for k in range(feature_count):
                    for m in range(32):
                        row.append(0)

                row = np.asarray(row)
                final_des.append(row)

        final_des = np.asarray(final_des)
        descriptors.append(final_des)

        #print('Total Size of Descriptors: {} MB'.format(size/128318))
        #c = input('Enter for next: ')

    #descriptors = list(itertools.chain.from_iterable(descriptors)) #Flatten
    #descriptors = np.asarray(descriptors)
    #print(h_len)
    return descriptors



In [10]:
#################### 1. Making Ready for All_features ########
def get_all_descriptors(low, high):
    des = list()
    total = 0
    print('#######################')
    loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\AD_mainNPY\\'
    n = 54
    total += n
    des = all_descriptors(loc, low, high, n, des, 'AD')
    #beeper()
    #input('AD complete. Enter to continue >>')


    print('#######################')
    loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\CN_mainNPY\\'
    n = 54#115
    total += n
    des = all_descriptors(loc, low, high, n, des, 'CN')
    #beeper()
    #input('CN complete. Enter to continue >>')


    print('#######################')
    loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\MCI_mainNPY\\'
    n = 54#133
    total += n
    des = all_descriptors(loc, low, high, n, des, 'MCI')
    #beeper()
    #input('MCI complete. Enter to continue >>')


    des = list(itertools.chain.from_iterable(des)) #Flatten
    des = np.asarray(des).astype('uint8')

    np.save(
        f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\VLAD_{feature_count}_uint_feat.npy', des)

    print()
    return



In [12]:
############# 2. Making Visual Words #############
def get_visual_dict():
    vlad_data_file = np.load(f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\VLAD_{feature_count}_uint_feat.npy", allow_pickle=True)
    print(vlad_data_file.shape)
    print(np.max(vlad_data_file))
    print(vlad_data_file.dtype)
    print(vlad_data_file[0, -1])
    visualDict = kMeansDictionary(vlad_data_file, 256)
    print('Visual Dictionary obtained.')

    model_file = f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\KMean{feature_count}_model.sav'
    pickle.dump(visualDict, open(model_file, 'wb'))
    print('Visual Dictionary model saved.')
    return



In [13]:
############# 3. Getting the VLAD descriptors #############
def get_vlad_desc(low, high):
    n = 54

    ad_loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\AD_mainNPY\\'
    cn_loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\CN_mainNPY\\'
    mci_loc = 'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\MCI_mainNPY\\'

    visualDict = pickle.load(open(f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\KMean{feature_count}_model.sav", 'rb'))

    '''
    vlad_ad = getVLADDescriptors(ad_loc, visualDict, low, high, n)

    np.save(f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_ad.npy', vlad_ad)

    print('--- AD complete ---')
    #input('AD complete. Enter to continue >>')
    '''

    
    ''''''
    vlad_cn = getVLADDescriptors(cn_loc, visualDict, low, high, n)

    np.save(f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_cn.npy', vlad_cn)

    print()
    input('CN complete. Enter to continue >>')
    ''''''

    ''''''
    vlad_mci = getVLADDescriptors(mci_loc, visualDict, low, high, n)

    np.save(
        f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_mci.npy', vlad_mci)

    print()
    input('MCI complete. Enter to continue >>')
    ''''''

    return



In [14]:
def merge_vlads():
    # param target: 1 for AD, 2 for CN and 3 for MCI

    adv = f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_ad.npy"

    cnv = f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_cn.npy"

    mciv = f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad{feature_count}_mci.npy"

    adv = np.load(adv, allow_pickle=True)
    print('AD-VLAD loaded')
    cnv = np.load(cnv, allow_pickle=True)
    print('CN-VLAD loaded')
    mciv = np.load(mciv, allow_pickle=True)
    print('MCI-VLAD loaded')

    n = adv.shape[0]

    ad_t = np.full((n,), 1, dtype='uint8')
    print('AD-VLAD-T generated')
    cn_t = np.full((n,), 2, dtype='uint8')
    print('AD-VLAD-T generated')
    mci_t = np.full((n,), 3, dtype='uint8')
    print('AD-VLAD-T generated')

    adv_t = np.column_stack((adv, ad_t))
    print('AD-T appended')
    cnv_t = np.column_stack((cnv, cn_t))
    print('CN-T appended')
    mciv_t = np.column_stack((mciv, mci_t))
    print('MCI-T appended')

    vlad_all_cases = np.concatenate((adv_t, cnv_t, mciv_t), axis=0)
    print('All cases merged')

    np.save(f'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad_all_cases_{feature_count}.npy', vlad_all_cases)
    print('Saved')



In [15]:
start_time = time.time()
low = 40
high = 151
# step - 1
#get_all_descriptors(low, high) # complete - 50, 100, 200 
# step - 2
#get_visual_dict()   # complete - 50, 100
# step - 3
#get_vlad_desc(low,high)
# step - 4
#merge_vlads()
e = int(time.time() - start_time)
#print('Time elapsed- {:02d}:{:02d}:{:02d}'.format(e //3600, (e % 3600 // 60), e % 60))

Time elapsed- 00:01:22


# Train-Classify-Test

In [41]:
def prepare_data(data_path):
    all_data = np.load(data_path, allow_pickle=True)
    print('Data shape >> ', all_data.shape)
    all_X = all_data[:, :-1]
    all_Y = all_data[:, -1]
    return all_X, all_Y

def create_excel(excel_loc, title, classifier):
    headers = classifier.headers

    outWorkbook = xl.Workbook(excel_loc+title+'.xlsx')
    outSheet = outWorkbook.add_worksheet()
    L = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    for i in range(len(headers)):
        outSheet.write(L[i]+'1', headers[i])
    print('Excel created >> '+excel_loc+title+'.xlsx')
    return outWorkbook, outSheet

In [42]:
def train_model(classifier, X, Y, book, sheet, line=1, serial=1, doCompo=False):

    combo_list = classifier.combos  # call function here
    number_of_combos = len(combo_list)
    print('Number of total combinations >> ', number_of_combos)
    shuffle = 75
    combo_list = random.choices(combo_list, k=shuffle)
    print('Number of selected combos %d'%shuffle)
    successful = []
    headers = classifier.headers

    print('Processing compo #', serial)
    x = X
    if doCompo:
        x = pca_module.applyPCA(X, serial)
        print('PCA successfully applied for component #%d' % (serial+1))
    
    success = 0
    best_score = 0
    fail = 0
    scores = []

    train_X, test_X, train_Y, test_Y = train_test_split(x, Y, test_size=0.3)

    for c in range(shuffle):
        print('Entering combo #', c+1)
        try:
            print(combo_list[c])
            score_model = classifier.make_model(c, train_X, train_Y, test_X, test_Y)
            #time.sleep(1)
            print('for Compo #{} - Combo #{} - #{} Combos Successful!\nScore: {}'.format(serial, c+1, success+1, score_model))
            success += 1
            #successful.append(combo_list[c])
            if score_model > best_score:
                print('New highest accuracy:',score_model, '>', best_score)                
                best_score = score_model
                scores.append(best_score)
                limit = len(headers) - 3
                for i in range(len(headers)-3):
                    sheet.write(line, i, combo_list[c][i])

                sheet.write(line, (len(headers)-1), best_score*100)
                sheet.write(line, (len(headers)-2), serial)
                sheet.write(line, (len(headers)-3), best_score)
                print('Line #{} --- Component #{}'.format(line, serial))
                line += 1
        except:
            print('Compo #',serial,' - Combo failed at #', c+1)
            fail += 1
        print('Exiting compo #%d - combo #%d'% (serial, c+1))
        print()

    print('Compo %d - all done.'%serial)
    print('Total combinations: ', number_of_combos)
    print('Total success: ', success)
    print('Total failure:', fail)
    #print(successful)
    #input('ENTER to continue...')
    return line,scores



In [43]:
def classify_glcm(model, book, sheet, limit, path):
    X, Y = prepare_data(path)
    line = 1
    scores = []
    for serial in range(1,limit):
        line, best_scores = train_model(model, X, Y, book, sheet, line, serial, True)
        scores.append(best_scores)
        print('Serial #', serial, 'done.')
    print(scores)



In [44]:
def classify_hog(model, book, sheet, limit):    
    line = 1
    scores = []
    for serial in range(1,limit):
        path = flocate.HOG_all_case_feats_form.format(serial)
        X, Y = prepare_data(path)
        line, best_scores = train_model(model, X, Y, book, sheet, line, serial, False)
        scores.append(best_scores)
        print('Serial #', serial, 'done.')
    print(scores)



In [45]:
def classify_vlad(model, book, sheet, path):  
    X, Y = prepare_data(path)
    print('Data distribution complete.')
    scores = []
    line, scores = train_model(model, X, Y, book, sheet)
    print(scores)
    return


In [47]:
start_time = time.time()

#model = dtree
#model = gauss
#model = knbr
#model = svc
#model = rf     # time consuming - 36 combos
model = lda    # time consuming - 210 combos
#model = log     # time consuming - 336 //924 combos
    
#title = model.title+'_glcm'
#title = model.title+'_hog'
n = 100
title = model.title +f'_vlad{n}'

excel_loc = r'E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\FiftyFour\excels\\'
book, sheet = create_excel(excel_loc, title, model)

#limit = 161

# function for handling glcm
#glcm_path = r"E:\THESIS\\ADNI_data\\ADNI1_Annual_2_Yr_3T_306_WORK\FiftyFour\GLCM54feats54.npy"
#classify_glcm(model, book, sheet, limit, glcm_path)
    
# function for handling hog
#classify_hog(model, book, sheet, limit)

# function for handling vlad
vlad_path = f"E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\\vlad\\vlad_all_cases_{n}.npy"
#csv_file_path = r"F:\\AI-ML-DL_works\Irist-dataset-ML\iris-Copy.csv"
classify_vlad(model, book, sheet, vlad_path)
#classify_vlad(model, book, sheet, csv_file_path)
    
book.close()
print()

e = int(time.time() - start_time)
print('{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60), e % 60))

Excel created >> E:\THESIS\ADNI_data\ADNI1_Annual_2_Yr_3T_306_WORK\FiftyFour\excels\\LDA__vlad100.xlsx
Data shape >>  (162, 819201)
Data distribution complete.
Number of total combinations >>  210
Number of selected combos 75
Processing compo # 1
Entering combo # 1
['lsqr', 'auto', False, 0.2]
Compo # 1  - Combo failed at # 1
Exiting compo #1 - combo #1

Entering combo # 2
['lsqr', 0.75, True, 0.1]
Compo # 1  - Combo failed at # 2
Exiting compo #1 - combo #2

Entering combo # 3
['lsqr', 0.5, True, 0.001]
Compo # 1  - Combo failed at # 3
Exiting compo #1 - combo #3

Entering combo # 4
['lsqr', 'auto', True, 0.2]
