<a href="https://colab.research.google.com/github/cboomus/SAT5114/blob/main/Small_Project_2_SAT5114_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ChatGPT was used for generation and improvement of code for this assignment

# Import from library sci-kit-learn (sklearn) module datasets and module Support Vector Machine (SVM)
from sklearn import datasets, svm

# Import from sklearn module model_selection train_test_split function and RandomizedSearchCV class
from sklearn.model_selection import train_test_split, RandomizedSearchCV

# Import from scipy library stat module the distributions reciprocal and uniform
from scipy.stats import reciprocal, uniform

# Import from sklearn metrics module confusion_matrix function
from sklearn.metrics import confusion_matrix

# Load the breast cancer dataset
cancer = datasets.load_breast_cancer()

# Define the SVM classifier
svm_classifier = svm.SVC()

# Define the hyperparameter distribution to search
param_dist = {
    'C': reciprocal(0.1, 100),
    'kernel': ['linear', 'rbf'],
    'gamma': reciprocal(0.001, 1)
}

# Create a RandomizedSearchCV object with the specified hyperparameter distribution and cross-validation settings
random_search = RandomizedSearchCV(svm_classifier, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', random_state=42)

# Perform the random search on the training data
random_search.fit(cancer.data, cancer.target)

# Print detailed information about the best estimator
print("Best Hyperparameters:")
print(random_search.best_params_)
print("\nBest Estimator:")
print(random_search.best_estimator_)

# Print the mean cross-validated score of the best estimator
print("\nBest Cross-Validation Accuracy:", random_search.best_score_)

# Evaluate performance of the best SVM classifier on the test dataset
best_svm_classifier = random_search.best_estimator_
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=42)
best_svm_classifier.fit(X_train, y_train)
test_accuracy = best_svm_classifier.score(X_test, y_test)
print("\nTest Set Accuracy:", test_accuracy)

# Calculate predictions on the test set
y_pred = best_svm_classifier.predict(X_test)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Extract True Positives, False Positives, True Negatives, and False Negatives
TN, FP, FN, TP = conf_matrix.ravel()

# Calculate sensitivity (true positive rate)
sensitivity = TP / (TP + FN)

# Calculate specificity (true negative rate)
specificity = TN / (TN + FP)

# Print sensitivity and specificity
print("\nSensitivity (True Positive Rate):", sensitivity)
print("Specificity (True Negative Rate):", specificity)


Best Hyperparameters:
{'C': 0.10500232504231348, 'gamma': 0.0011727009450102248, 'kernel': 'linear'}

Best Estimator:
SVC(C=0.10500232504231348, gamma=0.0011727009450102248, kernel='linear')

Best Cross-Validation Accuracy: 0.9507995652848935

Test Set Accuracy: 0.9649122807017544

Sensitivity (True Positive Rate): 0.9859154929577465
Specificity (True Negative Rate): 0.9302325581395349
