# Linear Discriminant Analysis

In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)  # type: ignore
df["target"] = pd.Series(iris.target)  # type: ignore
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


In [4]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [5]:
df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')

In [6]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [8]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Before Performing LDA


In [9]:
X_train

array([[-1.02366372, -2.37846268, -0.18295039, -0.29318114],
       [ 0.69517462, -0.10190314,  0.93066067,  0.7372463 ],
       [ 0.92435306,  0.58106472,  1.04202177,  1.63887031],
       [ 0.1222285 , -1.92315077,  0.6522579 ,  0.35083601],
       [ 0.92435306, -1.24018291,  1.09770233,  0.7372463 ],
       [-0.33612839, -1.24018291,  0.03977182, -0.16437771],
       [ 2.07024529, -0.10190314,  1.26474398,  1.38126345],
       [ 0.46599617,  0.58106472,  0.48521625,  0.47963944],
       [-0.45071761, -1.46783886, -0.01590873, -0.16437771],
       [ 0.46599617, -0.784871  ,  0.59657735,  0.7372463 ],
       [ 0.46599617, -0.55721505,  0.70793846,  0.35083601],
       [-1.13825295, -1.24018291,  0.37385514,  0.60844287],
       [ 0.46599617, -1.24018291,  0.6522579 ,  0.86604973],
       [ 1.26812073,  0.35340877,  0.48521625,  0.22203258],
       [ 0.69517462, -0.10190314,  0.76361901,  0.99485316],
       [ 0.1222285 ,  0.80872067,  0.37385514,  0.47963944],
       [-1.25284217,  0.

In [10]:
X_test

array([[-0.10694994, -0.55721505,  0.70793846,  1.51006688],
       [ 0.1222285 , -1.92315077,  0.09545238, -0.29318114],
       [-0.45071761,  2.6299683 , -1.35224199, -1.32360858],
       [ 1.6118884 , -0.32955909,  1.37610509,  0.7372463 ],
       [-1.02366372,  0.80872067, -1.29656144, -1.32360858],
       [ 0.46599617,  0.58106472,  1.20906343,  1.63887031],
       [-1.02366372,  1.03637663, -1.40792255, -1.19480515],
       [ 0.92435306,  0.12575281,  0.48521625,  0.35083601],
       [ 1.03894229, -0.55721505,  0.5408968 ,  0.22203258],
       [ 0.23681773, -0.55721505,  0.09545238,  0.09322915],
       [ 0.23681773, -1.01252695,  0.98634122,  0.22203258],
       [ 0.5805854 ,  0.35340877,  0.37385514,  0.35083601],
       [ 0.23681773, -0.55721505,  0.48521625, -0.03557428],
       [ 0.69517462, -0.55721505,  0.42953569,  0.35083601],
       [ 0.23681773, -0.32955909,  0.48521625,  0.22203258],
       [-1.13825295,  1.26403258, -1.35224199, -1.45241201],
       [ 0.1222285 , -0.

## Reducing to 2D


In [11]:
lda = LinearDiscriminantAnalysis(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

## After performing LDA


In [12]:
X_train

array([[-0.93534728,  2.56340654],
       [-4.9854655 ,  0.12984179],
       [-6.71818728, -2.28054655],
       [-4.50416279,  2.1562057 ],
       [-6.05586017,  1.42531684],
       [-0.85339282,  1.58864502],
       [-6.24525673, -0.85251282],
       [-2.35585784, -0.7209366 ],
       [-0.84540183,  1.70265893],
       [-4.07198666,  0.13954656],
       [-3.67867911,  0.99586764],
       [-4.85461429,  0.63957594],
       [-4.815563  ,  0.36717819],
       [-1.03351774, -0.06914412],
       [-4.74968477, -0.75150098],
       [-2.12692883, -1.0841014 ],
       [ 6.96274732,  0.6311119 ],
       [-5.49216896,  0.15037761],
       [-1.90661334,  0.93348185],
       [-0.15965983, -0.10313754],
       [-1.76011418,  1.14487246],
       [-2.18415219,  0.38057744],
       [-5.10785032,  0.13432765],
       [ 8.5552578 , -0.77296709],
       [ 7.72225853,  0.67356318],
       [-5.21505839,  0.26102083],
       [-1.17102799,  0.59183631],
       [ 7.83631832,  0.02869204],
       [ 9.35553499,

In [13]:
X_test

array([[-6.68570079, -1.46633989],
       [-0.66618058,  2.51798076],
       [ 9.64172871, -1.92452449],
       [-6.07978603,  1.04122003],
       [ 7.82024209, -0.11522561],
       [-7.99924466, -1.89829795],
       [ 8.17911864, -0.81112357],
       [-1.80116131, -0.09944405],
       [-2.00550058,  0.87980334],
       [-0.63354696,  0.43721934],
       [-5.17206675,  2.24334971],
       [-1.57223229, -0.46260885],
       [-2.14291659,  1.4566736 ],
       [-2.15266362,  0.43642697],
       [-2.55163649,  0.68842063],
       [ 8.43330126, -0.32622003],
       [-2.43776518,  0.21176984],
       [-2.39600706,  1.64601702],
       [ 7.09264774,  0.34043384],
       [ 8.89753561, -1.97093371],
       [-5.39432675, -0.51224592],
       [-2.8453458 ,  0.07570874],
       [ 6.54722283,  0.33898181],
       [ 6.71769497,  0.89241415],
       [-3.83501953, -0.15165943],
       [ 8.79213069, -0.9800629 ],
       [ 6.87351864, -1.08701263],
       [-1.14753041,  0.53367193],
       [-0.08589062,