### Linear Discriminant Analysis is a Dimensionality Reduction method. 
- The idea of LDA is to reduce the total columns present in the data
- Same like PCA, LDA also reduces the columns by deriving a new set columns from the available data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = sns.load_dataset('iris')
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Perform Encoding on species column

In [3]:
data['species'].value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [4]:
dic = {'setosa' : 0, 'versicolor' : 1, 'virginica' : 2}
data['species'] = data['species'].replace(dic)
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## Seperate X and y

In [5]:
X = data.drop('species', axis = 1)
y = data['species']

## Split the data into train and test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [7]:
X_train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
61,5.9,3.0,4.2,1.5
92,5.8,2.6,4.0,1.2
112,6.8,3.0,5.5,2.1
2,4.7,3.2,1.3,0.2
141,6.9,3.1,5.1,2.3


## Apply LDA on the train set

In [9]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components = 2)  ## n_components = 2 indicates derive 2 new columns from the existing 4 columns
lda

In [11]:
lda_data = lda.fit_transform(X_train, y_train)
lda_data

array([[-1.81095809e+00, -2.47729664e-01],
       [-1.10642015e+00,  1.21790315e+00],
       [-5.59012534e+00, -7.45515039e-01],
       [ 7.75102575e+00,  1.77607041e-01],
       [-4.89465024e+00, -1.95682246e+00],
       [ 6.64564592e+00, -1.30207879e+00],
       [ 8.72658311e+00, -7.74168329e-01],
       [-1.03248089e+00,  2.63816399e+00],
       [-5.13691352e+00,  1.61493134e-01],
       [-6.93678105e+00, -2.24692425e+00],
       [-4.66760411e+00,  2.20622424e+00],
       [-6.25512478e+00,  1.45396902e+00],
       [-9.51836256e-01,  1.63848443e+00],
       [-6.52948089e+00, -8.61172190e-01],
       [-2.47113191e+00, -6.92566522e-01],
       [-9.48322968e-01,  1.75672964e+00],
       [-4.25588925e+00,  1.76778000e-01],
       [-3.81019362e+00,  1.02946136e+00],
       [-4.94826198e+00,  7.35453262e-01],
       [-5.02513892e+00,  4.08186133e-01],
       [-1.18836521e+00, -7.45168767e-02],
       [-4.93648848e+00, -7.19334229e-01],
       [-2.21735772e+00, -1.04476996e+00],
       [ 7.

In [12]:
lda_data = pd.DataFrame(lda_data, columns = ['LD1', 'LD2'])
lda_data['species'] = y
lda_data

Unnamed: 0,LD1,LD2,species
0,-1.810958,-0.247730,0
1,-1.106420,1.217903,0
2,-5.590125,-0.745515,0
3,7.751026,0.177607,0
4,-4.894650,-1.956822,0
...,...,...,...
107,7.648322,0.832147,2
108,-5.754603,0.513072,2
109,-0.685153,1.683232,2
110,-6.766429,-1.569343,2


## Seperate X and y

In [13]:
X = lda_data.drop('species', axis = 1)
y = lda_data['species']

## Split the data into train and test set

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Apply Logistic Regression on the train set

In [15]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr

In [16]:
lr.fit(X_train, y_train)

In [17]:
y_pred = lr.predict(X_test)

In [18]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_test)

0.6071428571428571

In [19]:
from sklearn.model_selection import cross_val_score
cross_val_score(lr, X, y, cv = 10)

array([0.5       , 0.41666667, 0.72727273, 0.72727273, 0.45454545,
       0.54545455, 0.45454545, 0.45454545, 0.27272727, 0.45454545])