<a href="https://colab.research.google.com/github/Varshini-svnit/ML_LABS/blob/main/lab8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import KFold
import copy

In [None]:
def accuracy_from_scratch(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

def precision_recall_f1_from_scratch(y_true, y_pred, average='macro'):
    classes = np.unique(np.concatenate((y_true, y_pred)))
    all_precisions, all_recalls, all_f1s = [], [], []
    for cls in classes:
        tp = np.sum((y_true == cls) & (y_pred == cls))
        fp = np.sum((y_true != cls) & (y_pred == cls))
        fn = np.sum((y_true == cls) & (y_pred != cls))
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        all_precisions.append(precision)
        all_recalls.append(recall)
        all_f1s.append(f1)
    return np.mean(all_precisions), np.mean(all_recalls), np.mean(all_f1s)

def evaluate_classifier_from_scratch(y_true, y_pred):
    accuracy = accuracy_from_scratch(y_true, y_pred)
    precision, recall, f1 = precision_recall_f1_from_scratch(y_true, y_pred)
    return accuracy, precision, recall, f1

In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000, penalty='none', C=1.0, l1_ratio=0.5):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.penalty = penalty
        self.C = C  # Inverse of regularization strength
        self.l1_ratio = l1_ratio
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        z = np.array(z, dtype=float)
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            if self.penalty == 'l2':
                dw += (1 / self.C) * self.weights
            elif self.penalty == 'l1':
                dw += (1 / self.C) * np.sign(self.weights)
            elif self.penalty == 'elastic_net':
                dw += (1 / self.C) * (self.l1_ratio * np.sign(self.weights) + (1 - self.l1_ratio) * self.weights)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict_proba(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X):
        return np.array([1 if i > 0.5 else 0 for i in self.predict_proba(X)])


In [None]:
class OneVsRestClassifier:
    def __init__(self, base_classifier):
        self.base_classifier_prototype = base_classifier
        self.classifiers, self.classes = [], []

    def fit(self, X, y):
        X = X.astype(float)
        y = y.astype(float)
        self.classes = np.unique(y)
        self.classifiers = []
        for cls in self.classes:
            y_binary = np.where(y == cls, 1, 0)
            classifier = copy.deepcopy(self.base_classifier_prototype)
            classifier.fit(X, y_binary)
            self.classifiers.append(classifier)

    def predict(self, X):
        probabilities = np.array([clf.predict_proba(X) for clf in self.classifiers]).T
        return np.array([self.classes[np.argmax(prob)] for prob in probabilities])


In [None]:
class KNeighborsClassifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2)**2))

    def _predict_single(self, x):
        distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        return Counter(k_nearest_labels).most_common(1)[0][0]

    def predict(self, X):
        return np.array([self._predict_single(x) for x in X])


In [None]:
df = pd.read_csv('drug_200.csv')
df = pd.get_dummies(df, columns=['Sex', 'BP', 'Cholesterol'], drop_first=True)
drug_mapping = {drug: i for i, drug in enumerate(df['Drug'].unique())}
df['Drug'] = df['Drug'].map(drug_mapping)

for col in ['Age', 'Na_to_K']:
    mean, std = df[col].mean(), df[col].std()
    df[col] = (df[col] - mean) / std

X = df.drop('Drug', axis=1).values
y = df['Drug'].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [None]:
print("--- Task 1: Logistic Regression ---")
penalties = {'No Regularization': 'none', 'Lasso (L1)': 'l1', 'Ridge (L2)': 'l2', 'Elastic Net': 'elastic_net'}
for name, penalty in penalties.items():
    scores = []
    lr_prototype = LogisticRegression(learning_rate=0.1, n_iters=1000, penalty=penalty, C=1.0)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # --- FIX: Instantiate OvR inside the loop for a clean state ---
        ovr = OneVsRestClassifier(base_classifier=lr_prototype)
        ovr.fit(X_train, y_train)
        predictions = ovr.predict(X_test)
        scores.append(evaluate_classifier_from_scratch(y_test, predictions))
    avg_scores = np.mean(scores, axis=0)
    print(f"\nResults for {name}:\n  Average Accuracy: {avg_scores[0]:.4f}\n  Average Precision: {avg_scores[1]:.4f}\n  Average Recall: {avg_scores[2]:.4f}\n  Average F1-score: {avg_scores[3]:.4f}")


--- Task 1: Logistic Regression ---

Results for No Regularization:
  Average Accuracy: 0.9350
  Average Precision: 0.9500
  Average Recall: 0.9159
  Average F1-score: 0.9187

Results for Lasso (L1):
  Average Accuracy: 0.4550
  Average Precision: 0.0965
  Average Recall: 0.2100
  Average F1-score: 0.1318

Results for Ridge (L2):
  Average Accuracy: 0.4550
  Average Precision: 0.0965
  Average Recall: 0.2100
  Average F1-score: 0.1318

Results for Elastic Net:
  Average Accuracy: 0.4550
  Average Precision: 0.0965
  Average Recall: 0.2100
  Average F1-score: 0.1318


In [None]:
print("\n\n--- Task 2: K-Nearest Neighbors ---")
k_values = [1, 3, 5]
for k in k_values:
    scores = []
    knn = KNeighborsClassifier(k=k)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        knn.fit(X_train, y_train)
        predictions = knn.predict(X_test)
        scores.append(evaluate_classifier_from_scratch(y_test, predictions))

    avg_scores = np.mean(scores, axis=0)
    print(f"\nResults for K={k}:")
    print(f"  Average Accuracy: {avg_scores[0]:.4f}")
    print(f"  Average Precision: {avg_scores[1]:.4f}")
    print(f"  Average Recall: {avg_scores[2]:.4f}")
    print(f"  Average F1-score: {avg_scores[3]:.4f}")




--- Task 2: K-Nearest Neighbors ---

Results for K=1:
  Average Accuracy: 0.8900
  Average Precision: 0.8637
  Average Recall: 0.9239
  Average F1-score: 0.8755

Results for K=3:
  Average Accuracy: 0.8600
  Average Precision: 0.7853
  Average Recall: 0.8511
  Average F1-score: 0.8007

Results for K=5:
  Average Accuracy: 0.8900
  Average Precision: 0.8382
  Average Recall: 0.8516
  Average F1-score: 0.8316
