In [47]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold


class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=5000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        self.num_classes = len(np.unique(y))
        self.theta = np.zeros((self.num_classes, X.shape[1]))
        
        for class_label in range(self.num_classes):
            y_binary = (y == class_label).astype(int)
            
            for _ in range(self.num_iterations):
                z = np.dot(X, self.theta[class_label])
                h = self.sigmoid(z)
                gradient = np.dot(X.T, (h - y_binary)) / y_binary.size
                self.theta[class_label] -= self.learning_rate * gradient
    
    def predict_proba(self, X):
        return self.sigmoid(np.dot(X, self.theta.T))
    
    def predict(self, X):
        probabilities = self.predict_proba(X)
        return np.argmax(probabilities, axis=1)

iris = load_iris()
X = iris.data
y = iris.target

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training (80%) and testing (20%) sets using a train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# Initialize the k-fold cross-validation
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

accuracies = []

for train_idx, val_idx in kf.split(X_train):
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

    # Initialize and fit the logistic regression model
    model = LogisticRegression(learning_rate=0.1, num_iterations=1000)
    model.fit(X_train_fold, y_train_fold)

    # Make predictions on the validation set
    y_val_pred = model.predict(X_val_fold)

    # Calculate accuracy for this fold
    accuracy = np.mean(y_val_pred == y_val_fold)
    accuracies.append(accuracy)

# Calculate and display the mean accuracy across all folds
mean_accuracy = np.mean(accuracies)
print("Mean Accuracy using k-fold CV:", mean_accuracy * 100)

# Now, evaluate the model's accuracy on the test set using the previously defined train-test split
y_test_pred = model.predict(X_test)
test_accuracy = np.mean(y_test_pred == y_test)
print("Accuracy on the test set:", test_accuracy * 100)

Mean Accuracy using k-fold CV: 82.5
Accuracy on the test set: 90.0
