In [1]:
import pandas as pd
import numpy as np


In [2]:
class LinearDiscriminant(object):
    
    def fit(self,data):
        grouped=data.groupby('category')

        X_vector = data[["sepal_length","sepal_width","petal_height","petal_width"]].values
        class_categories = data['category'].values
        
        
        feature_count = X_vector.shape[1]
        
        mean_vectors_classes = {}
        count_vectors_classes = {}

        for name, group in grouped:
            group_mean_value = group.mean()
            count_vectors_classes[name] = group.count()["sepal_length"]
            mean_vectors_classes[name] = [group_mean_value["sepal_length"],group_mean_value["sepal_width"] ,\
                                 group_mean_value["petal_height"],group_mean_value["petal_width"]]

        # Calculating Within-class scatter matrix S_W 
        withinClassScatter = np.zeros((feature_count,feature_count))

        
        counter = 0
        withinClassScatterValue = {}

        for row in X_vector:
            row_catergory = class_categories[counter]
            category_mean_vector = mean_vectors_classes[row_catergory]
    
            row = row.reshape(feature_count,1)
            category_mean_vector =  np.reshape(category_mean_vector, (feature_count, 1))
            current_class_scatter_matrix = np.zeros((feature_count,feature_count))
    
            if row_catergory in withinClassScatterValue:
                current_class_scatter_matrix += withinClassScatterValue[row_catergory]
                mean_row_diff = row - category_mean_vector
                current_class_scatter_matrix = current_class_scatter_matrix + (mean_row_diff).dot((mean_row_diff).T)
            else:
                mean_row_diff = row - category_mean_vector
                current_class_scatter_matrix += (mean_row_diff).dot((mean_row_diff).T)
        
            withinClassScatterValue[row_catergory] = current_class_scatter_matrix
            counter = counter+1
    
        for class_name, scatter_of_each_category in withinClassScatterValue.items():
            withinClassScatter = withinClassScatter + scatter_of_each_category
        
        print ("Within-class scatter matrix S_W \n",withinClassScatter)


        # Calculating between-class scatter matrix S_B
        betweenClassScatter = np.zeros((4,4))
        mean_of_all_samples = np.mean(X_vector, axis=0)

        for class_name, mean_vector in mean_vectors_classes.items():
            samples_for_class = count_vectors_classes[class_name]
            mean_vector = np.reshape(mean_vector, (feature_count, 1))
            mean_of_all_samples = mean_of_all_samples.reshape(feature_count,1)
            mean_vec_diff = mean_vector - mean_of_all_samples
            betweenClassScatter += samples_for_class * (mean_vec_diff).dot((mean_vec_diff).T)

        print('between-class Scatter Matrix:\n', betweenClassScatter)

        # Solving for eigenvalue for the matrix S^−1_WS_B  to obtain the linear discriminants.
        eigen_values, eigen_vectors = np.linalg.eig(np.linalg.inv(withinClassScatter).dot(betweenClassScatter))
        print("\n")
        counter = 0

        for values in eigen_values:
            eigvec_sc = eigen_vectors[:,counter].reshape(feature_count,1)   
            print("Eigenvector {}: \n{}".format(counter+1, eigvec_sc))
            print("Eigenvalue {:}: {:.2e}\n".format(counter+1, values))
            counter = counter+1
            

In [3]:
linearDiscriminant = LinearDiscriminant()

# reading the dataset
# sepal length in cm 
# sepal width in cm 
# petal length in cm 
# petal width in cm 
data = pd.read_csv("iris.data" ,  sep="," , names=["sepal_length","sepal_width","petal_height",\
                                                        "petal_width","category"] )
linearDiscriminant.fit(data)


Within-class scatter matrix S_W 
 [[38.9562 13.683  24.614   5.6556]
 [13.683  17.035   8.12    4.9132]
 [24.614   8.12   27.22    6.2536]
 [ 5.6556  4.9132  6.2536  6.1756]]
between-class Scatter Matrix:
 [[ 63.21213333 -19.534      165.16466667  71.36306667]
 [-19.534       10.9776     -56.0552     -22.4924    ]
 [165.16466667 -56.0552     436.64373333 186.90813333]
 [ 71.36306667 -22.4924     186.90813333  80.60413333]]


Eigenvector 1: 
[[ 0.20490976]
 [ 0.38714331]
 [-0.54648218]
 [-0.71378517]]
Eigenvalue 1: 3.23e+01

Eigenvector 2: 
[[-0.00898234]
 [-0.58899857]
 [ 0.25428655]
 [-0.76703217]]
Eigenvalue 2: 2.78e-01

Eigenvector 3: 
[[-0.63708194]
 [-0.02277869]
 [-0.08950882]
 [ 0.76524238]]
Eigenvalue 3: -3.91e-15

Eigenvector 4: 
[[ 0.29549455]
 [-0.41592018]
 [-0.47272235]
 [ 0.71848935]]
Eigenvalue 4: 5.01e-15

