In [232]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.metrics import confusion_matrix

In [233]:
data = pd.read_csv('glass.csv')
data

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [234]:
Y = data['Type']
X = data.drop(['Type'], axis=1)

In [238]:
class LinearDiscriminantAnalysis:

    def __init__(self):
        self.classes_ =     None # Названия классов
        self.priors_ =      None # Частоты классов (априорная вероятность класса)
        self.covariance_ =  None # Матрица ковариации
        self.mu_c =         None # Средние параметров по классам
        self.beta =        None # Бета
        self.gamma =   None # Гамма

    def validate_data(self, X, y=None):
      if isinstance(X, DataFrame):
        X = X.values
      if isinstance(y, (DataFrame, Series)):
        y = y.values
      if y is None:
        return X
      else:
        return X, y

    def fit(self, X, Y):
        X, y = self.validate_data(X, Y)
        self.classes_, counts = np.unique(Y, return_counts=True)
        self.priors_ = counts / counts.sum()
        self.covariance_ = self._get_weighted_covariance(X, Y)
        self.mu_c = data.groupby(['Type']).mean()
        self.beta = np.matmul(np.linalg.inv(self.covariance_), self.mu_c.T).T
        self.gamma = -0.5 * np.diag(np.dot(self.mu_c, self.beta.T)) + np.log(self.priors_)

    def _get_weighted_covariance(self, X, Y):
        covariance_ = np.zeros(shape=(X.shape[1], X.shape[1]))
        for i, cls in enumerate(self.classes_):
            covariance_ += self.priors_[i] * np.cov(X[Y == cls, :].T)
        return covariance_

    def predict(self, X):
        result = []
        X = self.validate_data(X)
        for x in X:
            mu = np.matmul(self.beta, x.T) + self.gamma
            result += [self.classes_[np.argmax(self.softmax(mu))]]
        return result

    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)

In [239]:
lda = LinearDiscriminantAnalysis()
lda.fit(X,Y)
lda_predict = lda.predict(X)

In [240]:
confusion_matrix(Y, lda_predict)

array([[52, 15,  3,  0,  0,  0],
       [17, 54,  0,  3,  2,  0],
       [11,  6,  0,  0,  0,  0],
       [ 0,  5,  0,  7,  0,  1],
       [ 1,  2,  0,  0,  6,  0],
       [ 1,  2,  0,  1,  0, 25]], dtype=int64)