### ques 1

In [20]:
# Part (a): Linear and Quadratic Discriminant Analysis
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the dataset
iris_data = pd.read_csv('iris.csv')

# Split data into features and target
X = iris_data.drop(columns=['Species_name'])  # Features
y = iris_data['Species_name']  # Target

# Split the dataset into training (90%) and test (10%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Linear Discriminant Analysis (LDA)
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred_lda_train = lda.predict(X_train)
y_pred_lda_test = lda.predict(X_test)

# Quadratic Discriminant Analysis (QDA)
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
y_pred_qda_train = qda.predict(X_train)
y_pred_qda_test = qda.predict(X_test)

# Print misclassification rates
print("Linear Discriminant Analysis (LDA):")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_lda_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_lda_test):.4f}")

print("\nQuadratic Discriminant Analysis (QDA):")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_qda_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_qda_test):.4f}")


Linear Discriminant Analysis (LDA):
Train Misclassification Rate: 0.0296
Test Misclassification Rate: 0.0000

Quadratic Discriminant Analysis (QDA):
Train Misclassification Rate: 0.0370
Test Misclassification Rate: 0.0000


In [21]:
# Part (b): Bayes Classifier using Kernel Density Estimation
from sklearn.neighbors import KernelDensity
import numpy as np
from sklearn.metrics import accuracy_score

# KDE-based Bayes Classifier
def kde_bayes_classifier(X_train, y_train, X_test, bandwidth=1.0):
    classes = y_train.unique()
    priors = {cls: (y_train == cls).mean() for cls in classes}
    densities = {}
    
    for cls in classes:
        kde = KernelDensity(bandwidth=bandwidth)
        kde.fit(X_train[y_train == cls])
        densities[cls] = kde

    def predict(X):
        log_probs = {cls: densities[cls].score_samples(X) + np.log(priors[cls]) for cls in classes}
        return np.array([max(log_probs, key=lambda k: log_probs[k][i]) for i in range(X.shape[0])])

    return predict(X_test)

# Predict with KDE-based Bayes Classifier
y_pred_kde_train = kde_bayes_classifier(X_train, y_train, X_train)
y_pred_kde_test = kde_bayes_classifier(X_train, y_train, X_test)

# Print misclassification rates
print("\nBayes Classifier using KDE:")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_kde_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_kde_test):.4f}")



Bayes Classifier using KDE:
Train Misclassification Rate: 0.0000
Test Misclassification Rate: 0.0000


### ques 2

In [25]:
# Part (a): Linear and Quadratic Discriminant Analysis
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the dataset
wine_data = pd.read_csv('wine_italy.csv')

# Split data into features and target
X = wine_data.drop(columns=['Type'])  # Replace 'Class' with actual target column name
y = wine_data['Type']

# Split the dataset into training (90%) and test (10%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Linear Discriminant Analysis (LDA)
lda = LinearDiscriminantAnalysis()  # Priors are estimated by default
lda.fit(X_train, y_train)
y_pred_lda_train = lda.predict(X_train)
y_pred_lda_test = lda.predict(X_test)

# Quadratic Discriminant Analysis (QDA)
qda = QuadraticDiscriminantAnalysis()  # Priors are estimated by default
qda.fit(X_train, y_train)
y_pred_qda_train = qda.predict(X_train)
y_pred_qda_test = qda.predict(X_test)

# Print misclassification rates
print("Linear Discriminant Analysis (LDA):")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_lda_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_lda_test):.4f}")

print("\nQuadratic Discriminant Analysis (QDA):")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_qda_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_qda_test):.4f}")


Linear Discriminant Analysis (LDA):
Train Misclassification Rate: 0.0000
Test Misclassification Rate: 0.0000

Quadratic Discriminant Analysis (QDA):
Train Misclassification Rate: 0.0062
Test Misclassification Rate: 0.0000


In [28]:
# Part (b): Bayes Classifier using Kernel Density Estimation
from sklearn.neighbors import KernelDensity
import numpy as np
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the dataset
wine_data = pd.read_csv('wine_italy.csv')

# Split data into features and target
X = wine_data.drop(columns=['Type'])  # Replace 'Class' with actual target column name
y = wine_data['Type']

# Split the dataset into training (90%) and test (10%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Define function for KDE-based Bayes Classifier
def kde_bayes_classifier(X_train, y_train, X_test, bandwidth=1.0):
    classes = y_train.unique()
    priors = {cls: 1 / len(classes) for cls in classes}  # Equal priors
    densities = {}
    
    for cls in classes:
        kde = KernelDensity(bandwidth=bandwidth)
        kde.fit(X_train[y_train == cls])
        densities[cls] = kde

    def predict(X):
        log_probs = {cls: densities[cls].score_samples(X) + np.log(priors[cls]) for cls in classes}
        return np.array([max(log_probs, key=lambda k: log_probs[k][i]) for i in range(X.shape[0])])

    return predict(X_test)

# Predict with KDE-based Bayes Classifier
y_pred_kde_train = kde_bayes_classifier(X_train, y_train, X_train)
y_pred_kde_test = kde_bayes_classifier(X_train, y_train, X_test)

# Print misclassification rates
print("\nBayes Classifier using KDE:")
print(f"Train Misclassification Rate: {1 - accuracy_score(y_train, y_pred_kde_train):.4f}")
print(f"Test Misclassification Rate: {1 - accuracy_score(y_test, y_pred_kde_test):.4f}")



Bayes Classifier using KDE:
Train Misclassification Rate: 0.0000
Test Misclassification Rate: 0.2222
