In [50]:
import warnings
warnings.filterwarnings("ignore")

In [71]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import pandas as pd

In [72]:
df = pd.read_excel("demo_LDA_working.xlsx", names=['X1','X2','y'], sheet_name='Sheet1')
df

Unnamed: 0,X1,X2,y
0,1,2,1
1,2,1,1
2,1,1,1
3,3,4,1
4,4,3,2
5,4,4,2
6,1,5,2


In [73]:
m1 = df[df.y==1][['X1','X2']].mean()
n1 = df[df.y==1].shape[0]
m2 = df[df.y==2][['X1','X2']].mean()
n2 = df[df.y==2].shape[0]
m = df[['X1','X2']].mean()
n = df.shape[0]

In [74]:
prior1 = n1/n # prior for class 1
prior2 = n2/n # prior for class 2

In [75]:
# compute centered feature data
df['X1c'] = np.zeros(df.shape[0])
df['X2c'] = np.zeros(df.shape[0])

In [76]:
for i in np.arange(df.shape[0]):
    if df.y[i] == 1:
        df.loc[i,['X1c','X2c']] = list(df.loc[i,['X1','X2']] - m1)
    else:
        df.loc[i,['X1c','X2c']] = list(df.loc[i,['X1','X2']] - m2)

In [77]:
k = 2 # features
C = 1/(n-k) * np.matrix(df[['X1c','X2c']]).T @ np.matrix(df[['X1c','X2c']])
C

matrix([[ 1.75000000e+00, -5.55111512e-17],
        [ 5.55111512e-17,  1.60000000e+00]])

In [78]:
InvC = C.I
InvC

matrix([[ 5.71428571e-01,  1.98254112e-17],
        [-1.98254112e-17,  6.25000000e-01]])

In [79]:
# computation of likelihood for each class for each row
term1 = 1/(2*np.pi * np.sqrt(np.linalg.det(C)))
term1

0.0951132706502103

In [80]:
# for class 1
df['f1'] = np.zeros(df.shape[0])

In [81]:
# for class 1
df['f1'] = np.zeros(df.shape[0])
for i in np.arange(df.shape[0]):
    df.loc[i,['X1c','X2c']] = list(df.loc[i,['X1','X2']] - m1)
    df.f1[i]=term1 * np.exp(-0.5*(np.matrix(df.loc[i,['X1c','X2c']]) @ InvC) @ np.matrix(df.loc[i,['X1c','X2c']]).T)

In [82]:
# for class 2
df['f2'] = np.zeros(df.shape[0])
for i in np.arange(df.shape[0]):
    df.loc[i,['X1c','X2c']] = list(df.loc[i,['X1','X2']] - m2)
    df.f2[i]=term1 * np.exp(-0.5*(np.matrix(df.loc[i,['X1c','X2c']]) @ InvC) @ np.matrix(df.loc[i,['X1c','X2c']]).T)
df

Unnamed: 0,X1,X2,y,X1c,X2c,f1,f2
0,1,2,1,-2.0,-2.0,0.080992,0.00869
1,2,1,1,-1.0,-3.0,0.068355,0.004292
2,1,1,1,-2.0,-3.0,0.059255,0.001822
3,3,4,1,0.0,0.0,0.017438,0.095113
4,4,3,2,1.0,-1.0,0.016381,0.052293
5,4,4,2,1.0,0.0,0.006415,0.071475
6,1,5,2,-2.0,1.0,0.004864,0.022192


In [83]:
# computation of the Bayes Theorem numerator for each class in each row
df['p1f1'] = prior1*df['f1']
df['p2f2'] = prior2*df['f2']

In [84]:
# compute posterior probability for each row, each class
df['postr_class1'] = df['p1f1'] / (df['p1f1'] + df['p2f2'])
df['postr_class2'] = df['p2f2'] / (df['p1f1'] + df['p2f2'])
df

Unnamed: 0,X1,X2,y,X1c,X2c,f1,f2,p1f1,p2f2,postr_class1,postr_class2
0,1,2,1,-2.0,-2.0,0.080992,0.00869,0.046281,0.003724,0.92552,0.07448
1,2,1,1,-1.0,-3.0,0.068355,0.004292,0.03906,0.00184,0.955021,0.044979
2,1,1,1,-2.0,-3.0,0.059255,0.001822,0.03386,0.000781,0.977464,0.022536
3,3,4,1,0.0,0.0,0.017438,0.095113,0.009964,0.040763,0.196432,0.803568
4,4,3,2,1.0,-1.0,0.016381,0.052293,0.009361,0.022411,0.294624,0.705376
5,4,4,2,1.0,0.0,0.006415,0.071475,0.003666,0.030632,0.106878,0.893122
6,1,5,2,-2.0,1.0,0.004864,0.022192,0.002779,0.009511,0.226151,0.773849


In [85]:
# verification
X = df[['X1','X2']]
y = df['y']

In [86]:
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
clf.predict(X)

array([1, 1, 1, 2, 2, 2, 2], dtype=int64)

In [87]:
clf.predict_proba(X)

array([[0.92551999, 0.07448001],
       [0.95502107, 0.04497893],
       [0.9774635 , 0.0225365 ],
       [0.19643184, 0.80356816],
       [0.29462375, 0.70537625],
       [0.10687828, 0.89312172],
       [0.22615079, 0.77384921]])

In [88]:
df[['postr_class1',  'postr_class2']]

Unnamed: 0,postr_class1,postr_class2
0,0.92552,0.07448
1,0.955021,0.044979
2,0.977464,0.022536
3,0.196432,0.803568
4,0.294624,0.705376
5,0.106878,0.893122
6,0.226151,0.773849


In [89]:
# log p(y = 1 | x) - log p(y = 0 | x)
clf.decision_function(X)

array([-2.51982493, -3.05553922, -3.76982493,  1.4087465 ,  0.87303221,
        2.12303221,  1.23017507])

In [90]:
np.log(df.postr_class2) - np.log(df.postr_class1)

0   -2.519825
1   -3.055539
2   -3.769825
3    1.408746
4    0.873032
5    2.123032
6    1.230175
dtype: float64