<a href="https://colab.research.google.com/github/ghazicc/ML_assignment3/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ML Assignment 3

In [16]:
# Load the Iris Dataset
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)

In [2]:

iris = load_iris()

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

In [5]:
# Define distance metrics and K values
distance_metrics = ['euclidean', 'manhattan', 'cosine']
k_values = [3, 5, 7, 9, 11, 13, 15]
best_k = 0
best_score = 0

# Experiment with distance metrics and cross-validation
for metric in distance_metrics:
    print(f"\nTesting KNN with {metric} distance")
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
        scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy')
        print(f"K={k}, Accuracy={scores.mean():.4f}")
        if scores.mean() > best_score:
            best_k = k
            best_score = scores.mean()








Testing KNN with euclidean distance
K=3, Accuracy=0.9583
K=5, Accuracy=0.9417
K=7, Accuracy=0.9417
K=9, Accuracy=0.9333
K=11, Accuracy=0.9500
K=13, Accuracy=0.9417
K=15, Accuracy=0.9417

Testing KNN with manhattan distance
K=3, Accuracy=0.9583
K=5, Accuracy=0.9417
K=7, Accuracy=0.9417
K=9, Accuracy=0.9417
K=11, Accuracy=0.9500
K=13, Accuracy=0.9500
K=15, Accuracy=0.9500

Testing KNN with cosine distance
K=3, Accuracy=0.9833
K=5, Accuracy=0.9833
K=7, Accuracy=0.9667
K=9, Accuracy=0.9667
K=11, Accuracy=0.9750
K=13, Accuracy=0.9750
K=15, Accuracy=0.9750


# Logistic Regression


In [15]:
# Define hyperparameters for L1 and L2 regularization
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.01, 0.1, 1, 10],  # Regularization strength
    'solver': ['liblinear']  # 'liblinear' supports L1 and L2 penalties
}

# Perform GridSearchCV to tune hyperparameters
log_reg = LogisticRegression(max_iter=1000)
grid_search = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Logistic Regression Model
best_log_reg = grid_search.best_estimator_
print("Best Logistic Regression Parameters:", grid_search.best_params_)

# Predict probabilities for ROC-AUC
y_pred_log = best_log_reg.predict(X_test)
y_pred_prob_log = best_log_reg.predict_proba(X_test)[:, 1]

# Logistic Regression Metrics
log_accuracy = accuracy_score(y_test, y_pred_log)
log_precision = precision_score(y_test, y_pred_log, average='weighted')
log_recall = recall_score(y_test, y_pred_log, average='weighted')
log_f1 = f1_score(y_test, y_pred_log, average='weighted')
# log_roc_auc = roc_auc_score(y_test, y_pred_prob_log, multi_class='ovr', average='weighted')

print("\nLogistic Regression Performance:")
print(f"Accuracy: {log_accuracy:.4f}")
print(f"Precision: {log_precision:.4f}")
print(f"Recall: {log_recall:.4f}")
print(f"F1-Score: {log_f1:.4f}")
# print(f"ROC-AUC: {log_roc_auc:.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log))
print("Classification Report:\n", classification_report(y_test, y_pred_log))

Best Logistic Regression Parameters: {'C': 10, 'penalty': 'l1', 'solver': 'liblinear'}

Logistic Regression Performance:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [20]:

# ----------------- Train SVM with Different Kernels -----------------
kernels = ['linear', 'poly', 'rbf']
results = {}

for kernel in kernels:
    print(f"\nTraining SVM with {kernel} kernel...")
    svm = SVC(kernel=kernel, probability=True, random_state=42)  # Enable probability=True for ROC-AUC
    svm.fit(X_train, y_train)

    # Predictions
    y_pred = svm.predict(X_test)
    y_proba = svm.predict_proba(X_test)[:, 1]  # Probability estimates for ROC-AUC

    # Compute evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

    # Store results
    results[kernel] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'ROC-AUC': roc_auc
    }

    # Print classification report and confusion matrix
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# ----------------- Compare Kernel Performance -----------------
print("\nKernel Performance Comparison:")
for kernel, metrics in results.items():
    print(f"\nKernel: {kernel}")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")

# Best Kernel
best_kernel = max(results, key=lambda k: results[k]['Accuracy'])
print(f"\nBest Kernel based on Accuracy: {best_kernel}")


Training SVM with linear kernel...


AxisError: axis 1 is out of bounds for array of dimension 1