## Lecture 2 - Methods of Classification on High Dimensional Data

In [2]:
import numpy as np
import pandas as pd
import sympy as sp
from nptyping import NDArray, Int, Shape
from scipy.linalg import fractional_matrix_power

### 3. Bayes Rule, Quadratic and Linear Discriminant Analysis

#### Review Bayes Rule

In [3]:
# True positive (the test reports positive when the patient is infected)
# P(P|I): I = Infected, P = Positive
p_positive_infected = 0.99

# False positive (the test reports positive when the patient is not infected) 
# P(P|NI): NI = Not Infected, P = Positive
p_positive_not_infected = 0.10

# Percentage of the population that is infected
# P(I): I = Infected
p_infected = 0.05

# Probability that the test is positive
# P(P) = P(P|I) * P(I) + P(P|NI) * P(NI)
# P(NI) = 1 - P(I)
p_positive = p_positive_infected * p_infected + p_positive_not_infected * (1 - p_infected)

# What is the probability that the patient is infected given that the test is positive?
# P(I|P) = P(P|I) * P(I) / P(P) 
p_infected_given_positive = (p_positive_infected * p_infected) / p_positive
print(
    f'Probability that the patient is infected given that the test is positive: '
    f'{p_infected_given_positive}'
)

Probability that the patient is infected given that the test is positive: 0.34256055363321797


### 5. Quadratic and Linear Discriminant Analysis in Higher Dimensions

#### Two-Class LDA in 2 Dimesnions: Computation

In [9]:
v_sigma = np.array([
    [1/np.sqrt(2)],
    [1/np.sqrt(2)]
])

v_1 = np.array([
    [1/np.sqrt(2)],
    [-1/np.sqrt(2)]
])

var = sp.Symbol("sigma")

# Concatenate the v_sigma and v_1 vectors horizontally
V = np.concatenate((v_sigma, v_1), axis=1)

# Create the A matrix
A = sp.Matrix([
    [var, 0],
    [0, 1]
])

# Calculate the covariance matrix
Sigma = V * A * V.T
print("Sigma: ", Sigma)

# Mean of each distribution
mu_0 = np.array([[0, 0]]).T
mu_1 = np.array([[1, 0]]).T

# Calculate the normal vector
w = (mu_0 - mu_1).T @ Sigma.inv()
print("Normal vector (w): ", w)
print("Normal vector when the variance = 1: ", w.subs(var, 1))

Sigma:  Matrix([[0.5*sigma + 0.5, 0.5*sigma - 0.5], [0.5*sigma - 0.5, 0.5*sigma + 0.5]])
Normal vector (w):  Matrix([[-1.0*(0.5*sigma + 0.5)/sigma, -1.0*(0.5 - 0.5*sigma)/sigma]])
Normal vector when the variance = 1:  Matrix([[-1.00000000000000, 0]])


### 6. Transformation to Spherical Gaussians

#### Transformation to Spherical Gaussians

In [20]:
# Create the covariance matrix
mat1 = (1/np.sqrt(2)) * np.array([
    [1, 1],
    [1, -1]
])
mat2 = np.array([
    [2, 0],
    [0, 1]
])
Sigma = mat1 @ mat2 @ mat1

# Get the eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(Sigma)

# Create a diagonal matrix with the eigenvalues
Lambda = np.diag(eigenvalues)

# Create an alias V for the eigenvectors
V = eigenvectors

# Create the P matrix using Lambda and V
case = 3
if case == 0:
    P = V @ Lambda 
elif case == 1:
    P = fractional_matrix_power(Lambda, 1/2) @ V
elif case == 2:
    P = V @ fractional_matrix_power(Lambda, 1/2)
else:
    P = V @ fractional_matrix_power(Lambda, -1/2)

P = np.flip(P, axis=0)
print("P: ", P)
print()

# Calculate P.T @ Sigma @ P and round the result to 3 decimal places
P_Sigma_P = np.around(P.T @ Sigma @ P, decimals=3)
print("P.T @ Sigma @ P: ", P_Sigma_P)
print()

# Check if P.T @ Sigma @ P is equal to the identity matrix
if np.array_equal(P_Sigma_P, np.identity(2)):
    print("P.T @ Sigma @ P is equal to the identity matrix")
else:
    print("P.T @ Sigma @ P is not equal to the identity matrix")

print()
print("P.T @ P: ", P.T @ P)

# Check the following conditions:
# 1. P.T @ P = I
if np.array_equal(P.T @ P, np.identity(2)):
    print("P.T @ P is equal to the identity matrix")
else:
    print("P.T @ P is not equal to the identity matrix")

# 2. P.T @ P is diagonal
if np.array_equal(np.diag(np.diag(P.T @ P)), P.T @ P):
    print("P.T @ P is diagonal")
else:
    print("P.T @ P is not diagonal")

P:  [[ 0.5         0.70710678]
 [ 0.5        -0.70710678]]

P.T @ Sigma @ P:  [[1. 0.]
 [0. 1.]]

P.T @ Sigma @ P is equal to the identity matrix

P.T @ P:  [[0.5 0. ]
 [0.  1. ]]
P.T @ P is not equal to the identity matrix
P.T @ P is diagonal
