In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
print("Original target classes:", np.unique(iris.target))

# Group class 2 and class 3 together to form a new class (Class II)
y_binary = np.where(y == 0, 1, 0)  # Class I is labeled as 1, Class II (classes 2 and 3) is labeled as 0

# Split Class I samples into 40 samples for train and 10 samples for test
X_class_I = X[y_binary == 1]
y_class_I = y_binary[y_binary == 1]

X_train_class_I, X_test_class_I, y_train_class_I, y_test_class_I = train_test_split(X_class_I, y_class_I, test_size=10, train_size=40, random_state=42)

# Split Class II samples into 80 samples for train and 20 samples for test
X_class_II = X[y_binary == 0]
y_class_II = y_binary[y_binary == 0]

X_train_class_II, X_test_class_II, y_train_class_II, y_test_class_II = train_test_split(X_class_II, y_class_II, test_size=20, train_size=80, random_state=42)

# Merge the training and testing sets
X_train = np.concatenate((X_train_class_I, X_train_class_II), axis=0)
y_train = np.concatenate((y_train_class_I, y_train_class_II))

X_test = np.concatenate((X_test_class_I, X_test_class_II), axis=0)
y_test = np.concatenate((y_test_class_I, y_test_class_II))

# Helper functions for logistic regression
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_regression_least_squares(X, y):
    # Add bias term
    X_bias = np.c_[X, np.ones(X.shape[0])]

    # Compute weights using least squares
    weights = np.linalg.inv(X_bias.T @ X_bias) @ X_bias.T @ y

    return weights[:-1], weights[-1]  # Exclude the bias term from weights, return separately

def predict_logistic(X, weights, bias):
    z = np.dot(X, weights) + bias
    y_pred_prob = sigmoid(z)
    return np.where(y_pred_prob >= 0.5, 1, 0)

def logistic_loss(X, y, weights, bias):
    m = X.shape[0]
    z = np.dot(X, weights) + bias
    y_pred_prob = sigmoid(z)
    loss = -1/m * np.sum(y * np.log(y_pred_prob) + (1 - y) * np.log(1 - y_pred_prob))
    return loss

# Helper functions for linear regression
def linear_regression_least_squares(X, y):
    # Add bias term
    X_bias = np.c_[X, np.ones(X.shape[0])]

    # Compute weights using least squares
    weights = np.linalg.inv(X_bias.T @ X_bias) @ X_bias.T @ y

    return weights[:-1], weights[-1]  # Exclude the bias term from weights, return separately

def predict_linear(X, weights, bias):
    z = np.dot(X, weights) + bias
    return np.where(z >= 0.5, 1, 0)

def linear_loss(X, y, weights, bias):
    m = X.shape[0]
    z = np.dot(X, weights) + bias
    loss = 1/(2*m) * np.sum((z - y)**2)
    return loss

def evaluate_model(X_train, X_test, y_train, y_test, model_type='logistic'):
    if model_type == 'logistic':
        weights, bias = logistic_regression_least_squares(X_train, y_train)
        y_pred = predict_logistic(X_test, weights, bias)
        loss = logistic_loss(X_test, y_test, weights, bias)
    elif model_type == 'linear':
        weights, bias = linear_regression_least_squares(X_train, y_train)
        y_pred = predict_linear(X_test, weights, bias)
        loss = linear_loss(X_test, y_test, weights, bias)

    accuracy = np.mean(y_pred == y_test)
    return accuracy, loss

# Train and evaluate model for each case

def train_and_evaluate_class(X, y, class_label, model_type='logistic'):
    y_class_vs_others = np.where(y == class_label, 1, 0)

    # Split the dataset into training and testing sets
    X_class = X[y_class_vs_others == 1]
    y_class = y_class_vs_others[y_class_vs_others == 1]

    X_other = X[y_class_vs_others == 0]
    y_other = y_class_vs_others[y_class_vs_others == 0]

    X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=10, train_size=40, random_state=42)
    X_train_other, X_test_other, y_train_other, y_test_other = train_test_split(X_other, y_other, test_size=20, train_size=80, random_state=42)

    # Merge the training and testing sets
    X_train = np.concatenate((X_train_class, X_train_other), axis=0)
    y_train = np.concatenate((y_train_class, y_train_other))

    X_test = np.concatenate((X_test_class, X_test_other), axis=0)
    y_test = np.concatenate((y_test_class, y_test_other))

    accuracy, loss = evaluate_model(X_train, X_test, y_train, y_test, model_type=model_type)
    return accuracy, loss

# Evaluate for class 1 vs. others
print("--- Class I vs. Class II ---")
logistic_accuracy_1, logistic_loss_1 = train_and_evaluate_class(X, y, 0, model_type='logistic')
linear_accuracy_1, linear_loss_1 = train_and_evaluate_class(X, y, 0, model_type='linear')
print(f"Logistic Regression Accuracy for class 1 vs. others: {logistic_accuracy_1}, Loss: {logistic_loss_1}")
print(f"Linear Regression Accuracy for class 1 vs. others: {linear_accuracy_1}, Loss: {linear_loss_1}")
print("--------------------------------------------------------")

# One vs All approach for classifying IRIS class 2 and class 3

# Classify IRIS class 2 vs. all
print("--- Class 2 vs. All ---")
y_class_2_vs_all = np.where(y == 1, 1, 0)  # Class 2 is labeled as 1, others as 0

# Split the dataset for class 2 vs. all
X_train_2vsall, X_test_2vsall, y_train_2vsall, y_test_2vsall = train_test_split(X, y_class_2_vs_all, test_size=0.2, random_state=42)

logistic_accuracy_2vsall, logistic_loss_2vsall = evaluate_model(X_train_2vsall, X_test_2vsall, y_train_2vsall, y_test_2vsall, model_type='logistic')
linear_accuracy_2vsall, linear_loss_2vsall = evaluate_model(X_train_2vsall, X_test_2vsall, y_train_2vsall, y_test_2vsall, model_type='linear')
print(f"Logistic Regression Accuracy for class 2 vs. all: {logistic_accuracy_2vsall}, Loss: {logistic_loss_2vsall}")
print(f"Linear Regression Accuracy for class 2 vs. all: {linear_accuracy_2vsall}, Loss: {linear_loss_2vsall}")
print("--------------------------------------------------------")

# Classify IRIS class 3 vs. all
print("--- Class 3 vs. All ---")
y_class_3_vs_all = np.where(y == 2, 1, 0)  # Class 3 is labeled as 1, others as 0

# Split the dataset for class 3 vs. all
X_train_3vsall, X_test_3vsall, y_train_3vsall, y_test_3vsall = train_test_split(X, y_class_3_vs_all, test_size=0.2, random_state=42)

logistic_accuracy_3vsall, logistic_loss_3vsall = evaluate_model(X_train_3vsall, X_test_3vsall, y_train_3vsall, y_test_3vsall, model_type='logistic')
linear_accuracy_3vsall, linear_loss_3vsall = evaluate_model(X_train_3vsall, X_test_3vsall, y_train_3vsall, y_test_3vsall, model_type='linear')
print(f"Logistic Regression Accuracy for class 3 vs. all: {logistic_accuracy_3vsall}, Loss: {logistic_loss_3vsall}")
print(f"Linear Regression Accuracy for class 3 vs. all: {linear_accuracy_3vsall}, Loss: {linear_loss_3vsall}")
print("--------------------------------------------------------")


Original target classes: [0 1 2]
--- Class I vs. Class II ---
Logistic Regression Accuracy for class 1 vs. others: 0.6333333333333333, Loss: 0.589001557943503
Linear Regression Accuracy for class 1 vs. others: 1.0, Loss: 0.0073191142154467445
--------------------------------------------------------
--- Class 2 vs. All ---
Logistic Regression Accuracy for class 2 vs. all: 0.3333333333333333, Loss: 0.7313791101105375
Linear Regression Accuracy for class 2 vs. all: 0.8333333333333334, Loss: 0.0760839857889136
--------------------------------------------------------
--- Class 3 vs. All ---
Logistic Regression Accuracy for class 3 vs. all: 0.6666666666666666, Loss: 0.6179558793166736
Linear Regression Accuracy for class 3 vs. all: 0.9666666666666667, Loss: 0.0348007492666801
--------------------------------------------------------
