In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv("features.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,question_id,correctly_answered,incorrectly_answered,not_answered,avg_marks_correct,avg_marks_incorrect,avg_marks_na,f1,f2,perc_corr,perc_na,perc_incorr
0,0,1,144,91,34,30.4167,17.7253,24.6855,0.007733,0.278695,25,6,1
1,1,2,114,88,53,29.8246,15.0341,19.9795,0.006053,0.2082,13,0,2
2,2,3,67,132,48,31.1045,21.3409,26.5354,0.003645,0.13186,11,11,12
3,3,4,146,65,54,34.8288,15.3692,22.0449,0.0085,0.340585,41,6,2
4,4,5,82,97,63,31.6951,21.3093,27.0053,0.004511,0.178583,21,18,8


In [4]:
df.drop("Unnamed: 0",axis=1,inplace=True)

In [5]:
col_vals = list(df.columns.values)

In [6]:
ss = StandardScaler()
data = ss.fit_transform(np.array(df[col_vals[1:12]]))
data[0]

array([ 0.14981564, -0.39007751,  0.08718786, -0.27251688, -0.09923819,
        0.7006844 ,  0.1194738 ,  0.09623047,  0.19515755,  0.1560662 ,
       -0.78863677])

In [7]:
n_clusters = 3
features = len(data[0])
total_data = len(data)
W = []

In [8]:
def get_weights():
    '''
    function for initialize random values in the weight vectors for the neural network to be used.
    uses the no of features to initialize a vector.
    '''
    y = np.random.random()*(2.0/np.sqrt(total_data))
    return 0.5 - (1/np.sqrt(total_data)) + y 

In [9]:
'''
Using the above function to initialize the weight vectors
'''
for i in range(n_clusters):
    W.append(list())
    for j in range(features):
        W[i].append(get_weights() * 0.5)

In [10]:
W

[[0.24563295116255257,
  0.2522009839267108,
  0.25183118388529535,
  0.2572810678416092,
  0.23845388788555577,
  0.25375327917333274,
  0.25005915031013415,
  0.24651535465908425,
  0.2414807867255396,
  0.2568018318813328,
  0.25805478724640785],
 [0.23880605658477908,
  0.2539657614633512,
  0.24900854682473988,
  0.2399875475104729,
  0.23979217769911196,
  0.24593556061782262,
  0.24401413387580723,
  0.24606344923534412,
  0.26034406285882705,
  0.2548219681288769,
  0.26139519167844427],
 [0.25120009956036116,
  0.26095748177747136,
  0.25275386168379793,
  0.25908746636683705,
  0.25023061498659005,
  0.24144279013611647,
  0.2548427172176907,
  0.2420453504535711,
  0.24598346554546327,
  0.24832362272796762,
  0.2552806875216018]]

In [11]:
def compute_distance(w,x):
    '''
    function for computing the distance between the x(data) and w(Weight) vector
    takes in two arguments 
    w: weights
    x: features
    '''
    distance=0
    for i in range(len(w)):
        distance = distance + (w[i] - x[i])*(w[i] - x[i])
    distance = np.sqrt(distance)
    return distance

In [12]:
def find_closest_to_x(W,x):
    '''
    function to calculate the closest x vectors to the w vectors
    takes in two arguments
    w: weights
    x: features
    '''
    w = W[0]
    dist = compute_distance(w,x)
    i = 0
    i_n = i
    for w_ in W:
        if compute_distance(w_,x)<dist:
            dist = compute_distance(w_, x)
            w = w_
            i_n = i
        i = i + 1
    return (w,i_n)

In [13]:
la = 0.3    # λ coefficient
dla = 0.05  # Δλ

while la >= 0:
    for k in range(10):
        for x in data:
            wm = find_closest_to_x(W, x)[0]
            for i in range(len(wm)):
                wm[i] = wm[i] + la * (x[i] - wm[i]) 

    la = la - dla

In [14]:
W

[[-1.064214165140594,
  1.3853163529358439,
  0.0016766424508773544,
  0.5789122575773908,
  1.1144445766811,
  0.2167472313380367,
  -1.0589362186060067,
  -1.0121577286544032,
  -0.7753649203576659,
  -0.038092470427336045,
  1.2257182484992892],
 [-0.3528971459336031,
  0.42575321877142136,
  -0.03148088342443953,
  0.9255331810582358,
  0.19833068225758907,
  0.24631345706492322,
  -0.2516682727907761,
  -0.07993904397014939,
  0.35668636402892173,
  -0.06816882496000091,
  -0.11670170798689698],
 [0.8651374213098317,
  -0.5185625226167399,
  -0.6773644354513707,
  -0.1316687152272904,
  -0.3401827524700091,
  -0.6480909559681056,
  0.8714921376445651,
  0.8483295396339349,
  0.6746099659062161,
  -0.6169105319399472,
  -0.5785433025130824]]