In [1]:

import numpy as np
from numpy.linalg import eig

In [76]:


def _prep_vector(d):
    return [np.array([i]).transpose() for i in d]


def projection_vector(data_vectors, d_sum):
    
    S_w = None
    m_diff = None
    for d in data_vectors:
        # mean vector; track mean difference        
        vm = sum(d)/len(d)
        if m_diff is None:
            m_diff = vm
        else:
            m_diff -= vm
        
        # difference between x_k and mean vector            
        xk = [i-vm for i in d]
        # weighted vector
        S_k = np.sum([i.dot(i.T) for i in xk], 0)
        if S_w is None:
            S_w = S_k
        else:
            S_w += S_k

    print(S_w)
    # calculate eigenvalue/vectors    
    Sb = np.dot(m_diff, m_diff.T)    
    e_val, e_vec = eig(np.dot(S_w.T, Sb))
    print(e_val)

    # dot prod of original data vectors and weighted
    w = np.amax(e_vec, 0)    
    return d_sum.dot(w)


def classify(d1, d2):
    
    d_sum = np.array(d1+d2)    
    data_vectors = [_prep_vector(d) for d in [d1, d2]]
    
    projection = projection_vector(data_vectors, d_sum)
    n = len(projection)

    class2_m = np.mean(projection)
    c2_proj = [((i - class2_m)**2)/n for i in projection]
    class2_sd = np.sum(c2_proj)

    den = np.sum([np.abs(x-1) for x in projection])
    class1_m = -n/(2*den)
    c1_proj = [np.abs((i - class1_m)**2-1) for i in projection]
    class1_d = -n/2*(np.sum(c1_proj))
    
    return ((class1_m, class1_d), (class2_m, class2_sd))



In [74]:

# training samples D = {D1,D2}

# d1 = [[12,7,3],[8,10,7],[10,11,9],[7,12,13],[11,9,10]]
# d2 = [[1,4,5],[4,6,6],[1,7,5],[2,8,7],[3,2,5]]

d1 = [[1,2], [-3,-1], [4,5], [-1,1]]
d2 = [[0,-2], [5,2], [-1,-4], [3,1]]


In [55]:

d = _prep_vector(d1+d2)
m = sum(d)/len(d)

xk_m = np.subtract(d, m)
S_k = [i.dot(i.T) for i in xk_m]
S_w = np.sum(S_k, 0)

e_val, e_vec = eig(S_w)
e_top = np.amax(e_vec, 0)

ak = np.dot(e_top.T, xk_m)
pca_classifier = m.T + ak*e_top

pca_classifier


array([[ 7.14168837,  8.64047176,  8.05617319],
       [ 8.22574945,  9.54885987,  8.97826948],
       [10.79366415, 11.70064191, 11.16252336],
       [11.495351  , 12.28861984, 11.75937428],
       [10.89249447, 11.7834567 , 11.24658789],
       [ 0.45894378,  3.04067135,  2.371868  ],
       [ 3.58543212,  5.66050972,  5.0312415 ],
       [ 1.8810962 ,  4.23236282,  3.5815429 ],
       [ 3.88328351,  5.91009405,  5.28459223],
       [ 0.64229695,  3.19431199,  2.52782718]])

In [77]:

weighted_classes = classify(d1, d2)
for c in weighted_classes:
    print(c)
    

[[49.5 44.5]
 [44.5 41.5]]
[ 0. 37.]
(np.float64(-0.17421713457678437), np.float64(-264.26447294584693))
(np.float64(-0.8680491460208003), np.float64(8.526946503261474))


In [64]:

# d = [_prep_vector(i) for i in [d1, d2]]
# m = [sum(i)/len(i) for i in d]
d = _prep_vector(d1+d2)
m = sum(d)/len(d)

# # mean vector
# m1, m2 = sum(d1)/len(d1), sum(d2)/len(d2)
# difference between x_k and mean vector
xk_m = np.subtract(d, m)
# xk_m1 = [i-m1 for i in d1]
# xk_m2 = [i-m2 for i in d2]

Sk = [np.sum([i.dot(i.T) for i in x],0) for x in xk_m]
# Sw = Sk(xk_m1) + Sk(xk_m2)
Sw = sum(Sk)

md = m[0] - m[1]
Sb = np.dot(md,md.T)

e_val, e_vec = eig(np.dot(Sw.T,Sb))
print(e_val)


LinAlgError: 0-dimensional array given. Array must be at least two-dimensional

In [None]:

# d = [_prep_vector(i) for i in [d1, d2]]
# m = [sum(i)/len(i) for i in d]

d_train = [d1, d2]
data = [_prep_vector(d) for d in d_train]
m = [sum(d)/len(d) for d in d_train]

# # mean vector
# m1, m2 = sum(d1)/len(d1), sum(d2)/len(d2)
# difference between x_k and mean vector
xk_m = [[i-mk for i in d] for d, mk in zip(data, m)]
# xk_m1 = [i-m1 for i in d1]
# xk_m2 = [i-m2 for i in d2]

Sk = [np.sum([i.dot(i.T) for i in x],0) for x in xk_m]
# Sw = Sk(xk_m1) + Sk(xk_m2)
Sw = sum(Sk)

md = m[0] - m[1]
Sb = np.dot(md,md.T)

e_val, e_vec = eig(np.dot(Sw.T,Sb))
print(e_val)


ValueError: shapes (1,2,1) and (1,2,1) not aligned: 1 (dim 2) != 2 (dim 1)

In [67]:

## ORIGINAL ##

d_original = np.array(d1+d2)

d = lambda x: [np.array([i]).transpose() for i in x]

d1 = d(d1)
d2 = d(d2)

# mean vector
m1, m2 = sum(d1)/len(d1), sum(d2)/len(d2)
# difference between x_k and mean vector
xk_m1 = [i-m1 for i in d1]
xk_m2 = [i-m2 for i in d2]

Sk = lambda x: np.sum([i.dot(i.T) for i in x],0)
Sw = Sk(xk_m1) + Sk(xk_m2)
print(Sw)

md = m1 - m2
Sb = np.dot(md,md.T)

e_val, e_vec = eig(np.dot(Sw.T,Sb))
print(e_val)
w = np.amax(e_vec,0)

projection = d_original.dot(w)
n = len(projection)

class2_m = np.mean(projection)
class2_sd = np.sum([((i - class2_m)**2)/n for i in projection])

den = np.sum([np.abs(x-1) for x in projection])
class1_m = -n/(2*den)
class1_d = -n/2*(np.sum([np.abs((i - class1_m)**2-1) for i in projection]))

print((class1_m, class1_d), (class2_m, class2_sd))


[[49.5 44.5]
 [44.5 41.5]]
[ 0. 37.]
(np.float64(-0.17421713457678437), np.float64(-264.26447294584693)) (np.float64(-0.8680491460208003), np.float64(8.526946503261474))
