# CH 4 Clustering

In [9]:
def group_assignment(data,centroids):
    grouping_vec_c = np.zeros(len(data))
    for i in range(len(data)):
        dist = np.zeros(len(centroids))
        for j in range(len(centroids)):
            dist[j] = np.linalg.norm(data[i] - centroids[j])
        min_dist = min(dist)
        for j in range(len(centroids)):
            if min_dist == dist[j]:
                grouping_vec_c[i] = j+1
    return grouping_vec_c

def update_centroid(data, grouping, centroids):
    new_centroids = [];
    for i in range(len(centroids)):
        cent = np.zeros(len(data[0]))
        count = 0
        for j in range(len(data)):
            if grouping[j] == (i+1):
                cent = cent+data[j]
                count += 1
        group_average = cent/count
        new_centroids.append(group_average)
    return new_centroids

def clustering_objective(data, grouping, centroids):
    J_obj = 0
    for i in range(len(data)):
        for j in range(len(centroids)):
            if grouping[i] == (j+1):
                J_obj += np.linalg.norm(data[i] - centroids[j])**2
    J_obj = J_obj/len(data)
    return J_obj


def Kmeans_alg(data, centroids):
    iteration = 0
    J_obj_vector = []
    Stop = False
    while Stop == False:
        grouping = group_assignment(data, centroids)
        new_centroids = update_centroid(data, grouping, centroids)
        J_obj = clustering_objective(data, grouping, new_centroids)
        J_obj_vector.append(J_obj)
        iteration += 1
        if np.linalg.norm(np.array(new_centroids) - np.array(centroids)) < 1e-6:
            Stop = True
        else:
            centroids = new_centroids
    return new_centroids, grouping, J_obj_vector, iteration

## Examples Of The Algorithm (K Means)

In [10]:
v = np.array([[5,3,2,1],[2,4,2,4],[1,5,5,1],[3,2,3,2]])
c = np.array([[5,3,2,1],[2,4,2,4],[1,5,5,1]])
Kmeans_alg(v,c)

([array([4. , 2.5, 2.5, 1.5]),
  array([2., 4., 2., 4.]),
  array([1., 5., 5., 1.])],
 array([1., 2., 3., 1.]),
 [0.8750000000000001, 0.8750000000000001],
 2)

In [6]:
from sklearn.cluster import KMeans
import numpy as np
kmeans = KMeans(n_clusters=4, random_state=0).fit(X)
labels = kmeans.labels_
group_representative = kmeans.cluster_centers_
J_clust = kmeans.inertia_

In [5]:
X = np.concatenate([[0.3*np.random.randn(2) for i in range(100)],
[[1,1] + 0.3*np.random.randn(2) for i in range(100)], [[1,-1]
+ 0.3* np.random.randn(2) for i in range(100)]])

In [7]:
print(kmeans)

KMeans(n_clusters=4, random_state=0)


In [9]:
group_representative

array([[ 1.03091461,  1.0811424 ],
       [ 0.70626261, -0.67824375],
       [-0.01607847,  0.01072911],
       [ 1.15041416, -1.03575898]])

In [10]:
J_clust

45.22914480150691

# CH 5 Linear Independence

In [3]:
import numpy as np
def gram_schmidt(a):
    q = []
    for i in range(len(a)):
        #orthogonalization
        q_tilde = a[i]
        for j in range(len(q)):
            q_tilde = q_tilde - (q[j] @ a[i])*q[j]
        #Test for dependennce
        if np.sqrt(sum(q_tilde**2)) <= 1e-10:
            print('Vectors are linearly dependent.')
            print('GS algorithm terminates at iteration ', i+1)
            return q
        #Normalization
        else:
            q_tilde = q_tilde / np.sqrt(sum(q_tilde**2))
            q.append(q_tilde)
    print('Vectors are linearly independent.')
    return q

## Example Of The Algorithm (Gram Schmidt)

In [4]:
q = np.array([[1,-2,1,-1],[1,1,3,-1],[-3,7,1,3]])
print(gram_schmidt(q), "\n")

#Test orthonormality
print('Norm of q[0] :', (sum(q[0]**2))**0.5, "\n")
print('Inner product of q[0] and q[1] :', q[0] @ q[1], "\n")
print('Inner product of q[0] and q[2] :', q[0] @ q[2], "\n")
print('Norm of q[1] :', (sum(q[1]**2))**0.5, "\n")
print('Inner product of q[1] and q[2] :', q[1] @ q[2], "\n")
print('Norm of q[2] :', (sum(q[2]**2))**0.5, "\n")

Vectors are linearly independent.
[array([ 0.37796447, -0.75592895,  0.37796447, -0.37796447]), array([ 0.17457431,  0.56736651,  0.7855844 , -0.17457431]), array([-0.57154761, -0.32659863,  0.48989795,  0.57154761])] 

Norm of q[0] : 2.6457513110645907 

Inner product of q[0] and q[1] : 3 

Inner product of q[0] and q[2] : -19 

Norm of q[1] : 3.4641016151377544 

Inner product of q[1] and q[2] : 4 

Norm of q[2] : 8.246211251235321 



In [5]:
q = np.array([[1,-1,1,-1],[1,1,3,-1],[-3,7,1,3]])
print(gram_schmidt(q))

Vectors are linearly dependent.
GS algorithm terminates at iteration  3
[array([ 0.5, -0.5,  0.5, -0.5]), array([0.        , 0.70710678, 0.70710678, 0.        ])]
