## Importing Libraries

In [1]:
import numpy as np
import sympy as sp
import pandas as pd
import matplotlib.pyplot as plt

## Defined Classes and Functions

In [None]:
class LDA:
    def __init__(self, n=0):
        self.d1 = n

    def fit(self, X, y):
        self.d = len(X[0])
        self.classes = np.unique(y)
        self.no_of_classes = len(self.classes)
        self.split_X = []
        for c in self.classes:
            class_X = []
            for i in range(len(y)):
                if y[i] == c:
                    class_X.append(X[i])
            self.split_X.append(class_X)
        self.split_X = np.array(self.split_X)

        # Step-1: Finding Mean Vectors for each class
        self.mean_vectors = []
        for X_class in self.split_X:
            self.mean_vectors.append(np.mean(X_class, axis=0))
        self.mean_vectors = np.array(self.mean_vectors)

        # Step-2: Computing Scatter Matrices
        # Part-A: Within class Scatter Matrix
        self.covariance_matrices = []
        for X_class in self.split_X:
            self.covariance_matrices.append(np.cov(X_class, rowvar=False))
        self.covariance_matrices = np.array(self.covariance_matrices)
        self.Sw = np.zeroes((self.d, self.d))
        for i in range(self.no_of_classes):
            self.Sw += (len(self.split_X[i])-1)*self.covariance_matrices[i]
        # Part-B: Between class Scatter Matrix
        self.overall_mean = np.mean(X, axis=0)
        self.Sb = np.zeroes((self.d, self.d))
        for i in range(self.no_of_classes):
            mean_vec = self.mean_vectors[i].reshape(self.d, 1)
            ovr_mean = self.overall_mean.reshpae(self.d, 1)
            self.Sb += len(self.split_X[i])*(mean_vec - ovr_mean).dot((mean_vec - ovr_mean).T)

        # Step-3: Finding Eigen Values and Vectors
        self.eig_vals, self.eig_vecs = np.linlg.eig(np.linalg.inv(self.Sw).dot(self.Sb))

        # Step-4: Sorting Eigen Values and deciding on d'
        ind = np.argsort(self.eigen_values)[::-1]
        self.sorted_eig_vals = self.eig_vals[ind]
        self.sorted_eig_vecs = self.eig_vecs[ind]
        if self.d1 < 1:
            if self.d1 <= 0:
                self.d1 = 0.99
            self.total_variance = np.sum(self.sorted_eig_vals)
            self.selected_eig_values = []
            cum_variance = 0
            i = 0
            while cum_variance < self.d1 * self.total_variance:
                cum_variance += self.sorted_eig_vals[i]
                self.selected_eig_values.append(self.sorted_eig_vals[i])
                i += 1
            self.selected_eig_values = np.array(self.selected_eig_values)
            self.d1 = len(self.selected_eig_values)
        self.final_eig_vecs = self.sorted_eig_vecs[:, :self.d1]

    def transform(self, X):
        X1 = np.dot(X, self.final_eig_vecs)
        return X1

## Importing Dataset

In [3]:
dataset = pd.read_csv('gender.csv')
dataset

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.066420,0.151611,0.027740,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.027260,-0.046754,-0.118619,-0.163774,-0.000590,-0.076400,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.019530,-0.119905,...,0.044229,-0.023900,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.080610,-0.015930
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.139830,-0.119243,0.005306,-0.015100,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.076700,0.004966,0.028171,0.026041,0.084135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,796,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,797,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,798,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,799,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


## Test Train Split

In [4]:
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[10:]])
    test_df = pd.concat([test_df, type_df.iloc[:10]])

In [5]:
test_df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.06642,0.151611,0.02774,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.02726,-0.046754,-0.118619,-0.163774,-0.00059,-0.0764,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.01953,-0.119905,...,0.044229,-0.0239,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.08061,-0.01593
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.13983,-0.119243,0.005306,-0.0151,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.0767,0.004966,0.028171,0.026041,0.084135
5,6,male,-0.149119,0.125288,0.142323,-0.009087,-0.031394,-0.123533,0.043598,-0.063999,...,0.060833,0.089529,-0.034872,0.05708,-0.137162,-0.072522,0.052731,-0.14146,0.019018,0.085765
6,7,male,-0.139035,0.073513,-0.00177,-0.034225,-0.10161,0.065105,-0.01442,-0.054993,...,0.081007,-0.002164,0.060377,0.080294,-0.139369,-0.150245,0.078657,0.024194,0.06218,0.036039
7,8,male,-0.074126,-0.000669,0.004166,-0.082413,-0.096091,-0.021992,0.009714,-0.056961,...,0.050497,0.038932,0.02352,-0.09026,-0.147692,-0.008296,0.007609,-0.026687,-0.017523,-0.03831
8,9,male,-0.16622,0.042769,-0.031647,-0.036892,-0.143837,-0.040566,0.042541,-0.122923,...,0.014732,-0.049135,0.08177,-0.027199,-0.096941,-0.094661,0.057797,-0.101063,0.061373,0.062176
9,10,male,-0.18577,0.154008,0.073184,-0.070829,-0.144617,-0.019732,-0.019418,-0.004675,...,0.093317,0.035101,-0.147997,-0.04601,-0.087777,-0.10066,0.03619,0.012158,0.032304,0.085996


In [6]:
train_df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
10,11,male,-0.101760,0.095119,0.022390,0.033455,-0.028316,-0.071314,-0.076263,-0.173371,...,0.103842,0.064531,-0.038534,0.045669,-0.195098,-0.065993,0.086835,0.045227,0.134832,0.053776
11,12,male,-0.126957,0.065444,-0.014750,-0.062769,0.006243,0.033722,-0.069378,-0.109074,...,0.079223,0.102630,0.014118,0.011191,-0.158518,-0.084066,-0.004959,-0.025286,-0.003429,0.057033
12,13,male,0.021787,0.047769,0.031156,-0.036925,-0.125392,0.009113,-0.014069,-0.153379,...,0.057198,0.043197,-0.046054,0.062767,-0.116895,-0.179019,-0.045612,-0.052743,0.034252,0.046343
13,14,male,-0.091019,0.042462,-0.061052,-0.070249,-0.050925,-0.114522,-0.001090,-0.061084,...,0.042027,-0.003301,0.002241,-0.001005,-0.095180,-0.107603,0.031764,-0.026397,0.049204,-0.050450
14,15,male,-0.082929,0.058382,0.008007,-0.010675,-0.099150,-0.102433,0.037710,-0.125727,...,0.037042,-0.006108,-0.022526,-0.046081,-0.123925,-0.124878,-0.028671,-0.026378,0.048825,-0.025185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,796,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,797,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,798,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,799,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


In [7]:
X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values

In [8]:
print(X_train)

[[-0.10175994  0.09511936  0.02239008 ...  0.04522717  0.13483205
   0.0537758 ]
 [-0.12695727  0.06544437 -0.01474994 ... -0.02528606 -0.00342875
   0.05703329]
 [ 0.02178704  0.0477692   0.03115616 ... -0.05274343  0.03425189
   0.04634342]
 ...
 [-0.20285167  0.0370395   0.07973114 ...  0.03738441 -0.00625749
   0.03997689]
 [-0.08829999  0.06353012  0.04962703 ...  0.00970074 -0.01694169
   0.04807128]
 [-0.15620135  0.05516458  0.14271647 ... -0.0102984  -0.02885648
   0.0753232 ]]


In [9]:
print(X_test)

[[-0.06641996  0.15161145  0.02773961 ... -0.07640016  0.10749723
   0.00156654]
 [-0.03061386  0.04966652  0.00808374 ...  0.0176384   0.08060966
  -0.01592966]
 [-0.09617768  0.06112669  0.03532604 ...  0.01739147  0.057652
   0.08611634]
 ...
 [-0.1029727   0.046464    0.01968378 ... -0.08885815  0.04931188
   0.01900873]
 [-0.13482405  0.0933139   0.10350525 ... -0.1021332   0.01416106
   0.0113144 ]
 [-0.08694977  0.1049448   0.09312473 ... -0.0812363   0.0733347
   0.05688613]]


In [10]:
print(y_train)

['male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male

In [11]:
print(y_test)

['male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'female' 'female' 'female' 'female' 'female' 'female' 'female' 'female'
 'female' 'female']


## Encoding the Dependent Variable

In [13]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [14]:
print(y_test)

[1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]


In [15]:
print(y_train)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

## LDA

In [None]:
lda = LDA()
lda.fit(X_train, y_train)