<a href="https://colab.research.google.com/github/chandureddy23/Face-Recognition-based-login-system/blob/main/Small_Project_2_write_a_ML_program_to_diagnose_breast_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load the dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=1000),  # Increased max_iter
    'KNN': KNeighborsClassifier()
}

# Perform GridSearchCV for each classifier
for name, classifier in classifiers.items():
    print(f"Classifier: {name}")

    # Define hyperparameters grid for GridSearchCV
    param_grid = {}

    if name == 'Random Forest':
        param_grid = {
            'n_estimators': [50, 100, 150],
            'max_depth': [None, 5, 10, 20],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    elif name == 'AdaBoost':
        param_grid = {
            'n_estimators': [50, 100, 150],
            'learning_rate': [0.01, 0.1, 1.0]
        }
    elif name == 'Logistic Regression':
        param_grid = {
            'C': [0.001, 0.01, 0.1, 1, 10, 100]
        }

        # Increase the number of iterations
        classifier.max_iter = 10000  # You can adjust the value accordingly

    elif name == 'KNN':
        param_grid = {
            'n_neighbors': [3, 5, 7],
            'weights': ['uniform', 'distance']
        }

    # Perform GridSearchCV
    grid_search = GridSearchCV(classifier, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    # Best parameters
    best_params = grid_search.best_params_
    print("Best Parameters:", best_params)

    # Best estimator
    best_classifier = grid_search.best_estimator_

    # Perform k-fold cross-validation
    cv_scores = cross_val_score(best_classifier, X_train, y_train, cv=5)
    print("Cross-validation scores:", cv_scores)
    print("Average cross-validation score:", np.mean(cv_scores))

    # Evaluate on the test set
    y_pred = best_classifier.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_precision = precision_score(y_test, y_pred)
    test_recall = recall_score(y_test, y_pred)
    test_f1_score = f1_score(y_test, y_pred)

    print("Test Accuracy:", test_accuracy)
    print("Precision:", test_precision)
    print("Recall:", test_recall)
    print("F1 Score:", test_f1_score)

    # Calculate sensitivity (precision) and specificity
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()

    print("\n")


Classifier: Random Forest
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 150}
Cross-validation scores: [0.96703297 0.93406593 0.97802198 0.95604396 0.94505495]
Average cross-validation score: 0.956043956043956
Test Accuracy: 0.9649122807017544
Precision: 0.958904109589041
Recall: 0.9859154929577465
F1 Score: 0.9722222222222222


Classifier: AdaBoost
Best Parameters: {'learning_rate': 1.0, 'n_estimators': 150}
Cross-validation scores: [1.         0.97802198 1.         0.97802198 0.93406593]
Average cross-validation score: 0.9780219780219781
Test Accuracy: 0.9736842105263158
Precision: 0.9722222222222222
Recall: 0.9859154929577465
F1 Score: 0.979020979020979


Classifier: Logistic Regression
Best Parameters: {'C': 100}
Cross-validation scores: [0.95604396 0.96703297 0.98901099 0.97802198 0.94505495]
Average cross-validation score: 0.9670329670329672
Test Accuracy: 0.956140350877193
Precision: 0.9459459459459459
Recall: 0.98591549295774