# file is present inside a folder called notebooks inside Project directory

In [220]:
import numpy as np
from sklearn.svm import OneClassSVM
import random


from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA, TruncatedSVD, LatentDirichletAllocation, NMF
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from scipy.spatial.distance import mahalanobis, jaccard

import sys
sys.path.append('../')

from utils import get_all_vectors, filter_images
from feature_reduction.feature_reduction import reducer
from metric.distance import distance

In [221]:
models = ['sift']
label = 'dorsal'
k = 20
frt = 'pca'

In [222]:
#getting dorsal vectors and class
dorsal_paths = filter_images(label)
dorsal_vectors, palmar_vectors = np.array([]), np.array([])
for model in models:
    
    paths, temp = get_all_vectors(model, f={'path': {'$in': dorsal_paths}})
    if not dorsal_vectors.size: dorsal_vectors = temp
    else: dorsal_vectors = np.concatenate((dorsal_vectors, temp), axis=1)
        
    _, temp = get_all_vectors(model, f={'path': {'$nin': dorsal_paths}})
    if not palmar_vectors.size: palmar_vectors = temp
    else: palmar_vectors = np.concatenate((palmar_vectors, temp), axis=1)

In [223]:
#train data
q_dorsal_paths = filter_images(label, unlabelled_db=True)
q_dorsal_vectors, q_palmar_vectors = np.array([]), np.array([])
for model in models:
    
    _, temp = get_all_vectors(model, f={'path': {'$in': q_dorsal_paths}}, unlabelled_db=True)
    if not q_dorsal_vectors.size: q_dorsal_vectors = temp
    else: q_dorsal_vectors = np.concatenate((q_dorsal_vectors, temp), axis=1)
        
    _, temp = get_all_vectors(model, f={'path': {'$nin': q_dorsal_paths}}, unlabelled_db=True)
    if not q_palmar_vectors.size: q_palmar_vectors = temp
    else: q_palmar_vectors = np.concatenate((q_palmar_vectors, temp), axis=1)

q_dorsal_class = np.array([1] * len(q_dorsal_vectors))
q_palmar_class = np.array([0] * len(q_palmar_vectors))

In [224]:
def predict_label(query_vector, dorsal_scaler, dorsal_pca, pca_dorsal_vectors, palmar_scaler, palmar_pca, pca_palmar_vectors):
    
    query_vector = query_vector.reshape(1,-1)
    
    #dorsal compute
    scaled_vec = dorsal_scaler.transform(query_vector)
    pca_query_vector_dorsal = dorsal_pca.transform(scaled_vec)
    mean_dorsal = np.mean(pca_dorsal_vectors, axis=0)
    dorsal_dist = mahalanobis(pca_query_vector_dorsal, mean_dorsal, np.linalg.inv(np.cov(pca_dorsal_vectors.T)))
    
    #palmar compute
    scaled_vec = palmar_scaler.transform(query_vector)
    pca_query_vector_palmar = palmar_pca.transform(scaled_vec)  
    mean_palmar = np.mean(pca_palmar_vectors, axis=0)
    palmar_dist =  mahalanobis(pca_query_vector_palmar, mean_palmar, np.linalg.inv(np.cov(pca_palmar_vectors.T)))
    
    if dorsal_dist < palmar_dist: return 1
    return 0

In [225]:
def test():
    
    #Test
    test_data = np.vstack((q_dorsal_vectors, q_palmar_vectors))
    test_labels = np.concatenate((q_dorsal_class, q_palmar_class))
    
    #DORSAL
    pca_dorsal_vectors, _, _, dorsal_scaler, dorsal_pca = reducer(
        dorsal_vectors, 
        k, 
        frt,
        get_scaler_model = True
    )

    #palmar compute
    pca_palmar_vectors, _, _, palmar_scaler, palmar_pca = reducer(
        palmar_vectors, 
        k, 
        frt, 
        get_scaler_model = True
    )
    
    pred_labels = []
    for each in test_data:
        pred_labels.append(predict_label(
            each, dorsal_scaler, dorsal_pca, pca_dorsal_vectors, palmar_scaler, palmar_pca, pca_palmar_vectors
        ))
    score = accuracy_score(pred_labels, test_labels)
    return score

In [230]:
def get_initial_centroid(points, k):
    centroids = points.copy()
    np.random.shuffle(centroids)
    return centroids[:k]

def get_closest(points, centroids, return_min=False):
    c_extended = centroids[:, np.newaxis]
    distances = np.sqrt(((points - c_extended)**2).sum(axis=2))
    
    if not return_min:
        closest_centroids = np.argmin(distances, axis = 0)
        return closest_centroids
    else:
        return np.min(distances)

def get_mean_centroids(points, centroids, closest):
    mean_centroids = []
    for k in range(centroids.shape[0]):
        centroid_points = points[closest == k]
        if centroid_points.size:
            mean_centroids.append(centroid_points.mean(axis=0))
    return np.array(mean_centroids)

def get_final_centroids(points, c):
    centroids = get_initial_centroid(points, c)
    closest = get_closest(points, centroids)

    for _ in range(1000):
        closest = get_closest(points, centroids)
        new_centroids = get_mean_centroids(points, centroids, closest)
        if np.array_equal(centroids, new_centroids): 
#             print('converged at ', _+1)
            centroids = new_centroids.copy()
            break
        else:
            centroids = new_centroids.copy()
    
    return new_centroids, closest

In [None]:
def test(dorsal_vectors, palmar_vectors, test_data, test_labels, c):
    
    #get dorsal centroids
    dorsal_centroids, _ = get_final_centroids(dorsal_vectors, c)
    palmar_centroids, _ = get_final_centroids(palmar_vectors, c)
    
    #predict label and accuracy
    pred_labels = []
    for each in test_data:
        dorsal_dist = get_closest(each.reshape(1,-1), dorsal_centroids, return_min=True)
        palmar_dist = get_closest(each.reshape(1,-1), palmar_centroids, return_min=True)
        p_label = 1 if dorsal_dist < palmar_dist else 0
        pred_labels.append(p_label)
    
    return accuracy_score(pred_labels, test_labels)