In [None]:
from sklearn.datasets import load_iris
import random
import numpy as np

iris = load_iris()
X, y = iris.data, iris.target

iris_data = list(zip(X, y))


split_ratio = 0.8


num_samples = len(iris_data)
num_train_samples = int(num_samples * split_ratio)
num_test_samples = num_samples - num_train_samples


random.shuffle(iris_data)

train_data = iris_data[:num_train_samples]
test_data = iris_data[num_train_samples:]

X_train = [data[0] for data in train_data]
y_train = [data[1] for data in train_data]

X_test = [data[0] for data in test_data]
y_test = [data[1] for data in test_data]


X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)


In [None]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def train_logistic_regression(X_train, y_train, learning_rate, num_epochs):
    num_samples, num_features = X_train.shape
    num_classes = len(np.unique(y_train))


    W = np.zeros((num_features, num_classes))
    b = np.zeros(num_classes)

    for epoch in range(num_epochs):

        logits = np.dot(X_train, W) + b


        probabilities = softmax(logits)


        gradient = probabilities
        gradient[range(num_samples), y_train] -= 1
        gradient /= num_samples


        dW = np.dot(X_train.T, gradient)
        db = np.sum(gradient, axis=0)

        W -= learning_rate * dW
        b -= learning_rate * db

    return W, b


learning_rate = 0.01
num_epochs = 1000


W, b = train_logistic_regression(X_train, y_train, learning_rate, num_epochs)




In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score


num_folds = 5


skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)


accuracy_scores = []

for train_index, val_index in skf.split(X_train, y_train):
    X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
    y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]


    W_fold, b_fold = train_logistic_regression(X_fold_train, y_fold_train, learning_rate, num_epochs)


    logits_val = np.dot(X_fold_val, W_fold) + b_fold
    probabilities_val = softmax(logits_val)
    y_pred_val = np.argmax(probabilities_val, axis=1)


    fold_accuracy = accuracy_score(y_fold_val, y_pred_val)
    accuracy_scores.append(fold_accuracy)


mean_accuracy = np.mean(accuracy_scores)
print(f'Mean Accuracy: {mean_accuracy * 100:.2f}%')


Mean Accuracy: 93.33%
