In [2]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold  # Import KFold from model_selection
from sklearn import datasets

# Load the digits dataset
digits = datasets.load_digits()

# Separate features and labels
X_digits = digits.data
y_digits = digits.target

# Normalize the features
scaler = MinMaxScaler()
X_digits_normalized = scaler.fit_transform(X_digits)

# Initialize k-fold cross-validation
k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

# Use the same k-fold split for training and testing
for train_index, test_index in k_fold.split(X_digits_normalized, y_digits):
    
    
    train_features, test_features = X_digits_normalized[train_index], X_digits_normalized[test_index]
    
    train_labels, test_labels = y_digits[train_index], y_digits[test_index]

    # Create a logistic regression model
    logreg = LogisticRegression(max_iter=600, random_state=42)
    logreg.fit(train_features, train_labels)

    # Predict on the testing features
    predictions = logreg.predict(test_features)

    # Evaluate the accuracy
    accuracy = accuracy_score(test_labels, predictions)
    print(f'Accuracy: {accuracy}')

Accuracy: 0.9666666666666667
Accuracy: 0.9777777777777777
Accuracy: 0.9610027855153204
Accuracy: 0.9610027855153204
Accuracy: 0.9442896935933147


In [4]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold
from sklearn import datasets

# Load the digits dataset
digits = datasets.load_digits()

# Separate features and labels
X_digits = digits.data
y_digits = digits.target

# Normalize the features
scaler = MinMaxScaler()
X_digits_normalized = scaler.fit_transform(X_digits)

# Initialize k-fold cross-validation
k_fold = KFold(n_splits=3, shuffle=True, random_state=42)

# Lists to store metrics for each fold
accuracies = []
precisions = []
recalls = []
f1_scores = []
conf_matrices = []

# Use the same k-fold split for training and testing
for train_index, test_index in k_fold.split(X_digits_normalized, y_digits):
    train_features, test_features = X_digits_normalized[train_index], X_digits_normalized[test_index]
    train_labels, test_labels = y_digits[train_index], y_digits[test_index]

    # Create a logistic regression model
    logreg = LogisticRegression(max_iter=600, random_state=42)
    logreg.fit(train_features, train_labels)

    # Predict on the testing features
    predictions = logreg.predict(test_features)

    # Evaluate the performance
    accuracy = accuracy_score(test_labels, predictions)
    precision = precision_score(test_labels, predictions, average='weighted')
    recall = recall_score(test_labels, predictions, average='weighted')
    f1 = f1_score(test_labels, predictions, average='weighted')
    confusion_mat = confusion_matrix(test_labels, predictions)

    # Store metrics for each fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    conf_matrices.append(confusion_mat)

    # Print the metrics for each fold
    print(f'Fold Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
    print(f'Confusion Matrix:\n{confusion_mat}\n')

# Average metrics across folds
avg_accuracy = sum(accuracies) / len(accuracies)
avg_precision = sum(precisions) / len(precisions)
avg_recall = sum(recalls) / len(recalls)
avg_f1 = sum(f1_scores) / len(f1_scores)

# Print average metrics
print(f'Average Accuracy: {avg_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1:.4f}')


Fold Accuracy: 0.9666, Precision: 0.9674, Recall: 0.9666, F1 Score: 0.9667
Confusion Matrix:
[[55  0  0  0  0  0  0  0  0  0]
 [ 0 52  2  0  0  0  0  0  1  2]
 [ 0  0 52  0  0  0  0  0  0  0]
 [ 0  0  0 54  0  1  0  0  2  0]
 [ 0  1  0  0 63  0  0  0  0  0]
 [ 0  0  0  0  0 69  1  0  0  3]
 [ 0  0  0  0  0  1 56  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  1]
 [ 0  1  0  0  0  1  0  0 51  0]
 [ 0  0  0  0  0  1  0  0  2 66]]

Fold Accuracy: 0.9616, Precision: 0.9641, Recall: 0.9616, F1 Score: 0.9622
Confusion Matrix:
[[54  0  0  0  0  0  0  0  0  0]
 [ 0 61  0  0  0  0  0  0  2  0]
 [ 0  1 55  0  0  0  0  0  0  0]
 [ 0  0  0 64  0  0  0  0  4  1]
 [ 0  0  0  0 52  0  0  1  1  0]
 [ 0  0  0  1  0 53  0  0  0  3]
 [ 0  3  0  0  0  0 72  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  0]
 [ 0  4  0  0  0  0  0  0 53  0]
 [ 0  1  0  0  0  0  0  0  1 51]]

Fold Accuracy: 0.9583, Precision: 0.9587, Recall: 0.9583, F1 Score: 0.9581
Confusion Matrix:
[[68  0  0  0  1  0  0  0  0  0]
 [ 0 57  0  1  0  0  1  

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold
from sklearn import datasets

# Load the digits dataset
digits = datasets.load_digits()

# Separate features and labels
X_digits = digits.data
y_digits = digits.target

# Normalize the features
scaler = MinMaxScaler()
X_digits_normalized = scaler.fit_transform(X_digits)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_digits_normalized, y_digits, test_size=0.50, random_state=42)

# Logistic Regression
logreg = LogisticRegression(max_iter=600, random_state=42)
logreg.fit(X_train, y_train)
predictions_logreg = logreg.predict(X_test)
accuracy_logreg = accuracy_score(y_test, predictions_logreg)
print(f'Logistic Regression Accuracy: {accuracy_logreg:.4f}')

# Random Forest
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
predictions_rf = rf_classifier.predict(X_test)
accuracy_rf = accuracy_score(y_test, predictions_rf)
print(f'Random Forest Accuracy: {accuracy_rf:.4f}')

# Support Vector Machine (SVM)
svm_classifier = SVC(random_state=42)
svm_classifier.fit(X_train, y_train)
predictions_svm = svm_classifier.predict(X_test)
accuracy_svm = accuracy_score(y_test, predictions_svm)
print(f'SVM Accuracy: {accuracy_svm:.4f}')

# k-Nearest Neighbors (k-NN)
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(X_train, y_train)
predictions_knn = knn_classifier.predict(X_test)
accuracy_knn = accuracy_score(y_test, predictions_knn)
print(f'k-NN Accuracy: {accuracy_knn:.4f}')


Logistic Regression Accuracy: 0.9588
Random Forest Accuracy: 0.9577
SVM Accuracy: 0.9722
k-NN Accuracy: 0.9744
