# Lab 2 - Logistic regression on Iris using k-fold cross validation

In [1]:
# Import necessary library
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

In [2]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
# Initialize logistic regression model
log_reg = LogisticRegression(max_iter=1000)  # Increase max_iter for convergence

In [4]:
# Define k-fold cross-validation
k_fold = KFold(n_splits=5, shuffle=True, random_state=42)  # 5-fold CV, shuffling data

In [5]:
# Perform cross-validation
scores = cross_val_score(log_reg, X, y, cv=k_fold)
# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

Accuracy for each fold: [1.         1.         0.93333333 0.96666667 0.96666667]


In [6]:
# Print the mean accuracy and standard deviation
print("Mean Accuracy:", scores.mean())
print("Standard Deviation:", scores.std())

Mean Accuracy: 0.9733333333333334
Standard Deviation: 0.024944382578492935


In [7]:
# Define scoring metrics
scoring = {'accuracy': 'accuracy',
           'precision': 'precision_macro',
           'recall': 'recall_macro',
           'f1': 'f1_macro'}

# Perform cross-validation for each metric
for metric, score_func in scoring.items():
    scores = cross_val_score(log_reg, X, y, cv=k_fold, scoring=score_func)
    print(f"{metric.capitalize()} for each fold:", scores)
    print(f"Mean {metric.capitalize()}: {scores.mean()}")
    print(f"Standard Deviation: {scores.std()}")
    print()

Accuracy for each fold: [1.         1.         0.93333333 0.96666667 0.96666667]
Mean Accuracy: 0.9733333333333334
Standard Deviation: 0.024944382578492935

Precision for each fold: [1.         1.         0.93333333 0.97435897 0.97222222]
Mean Precision: 0.9759829059829059
Standard Deviation: 0.02445161570739294

Recall for each fold: [1.         1.         0.93333333 0.96666667 0.97222222]
Mean Recall: 0.9744444444444443
Standard Deviation: 0.024745619390355685

F1 for each fold: [1.         1.         0.92592593 0.96912281 0.97101449]
Mean F1: 0.9732126451394187
Standard Deviation: 0.027176155815727295

