### Import libraries and dataset

In [2]:
pip install seaborn

Collecting seaborn
  Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.3/293.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: seaborn
Successfully installed seaborn-0.12.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/Cellar/jupyterlab/3.6.3/libexec/bin/python3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [3]:
cancer_df = pd.DataFrame(np.c_[cancer['data'], cancer['target']], columns = np.append(cancer['feature_names'], 'target'))
cancer_df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


### Prepare the dataset for training and testing

In [4]:
# Write code here to generate the train and test sets. (1 point)

X = cancer_df.drop('target', axis=1)  # Features
y = cancer_df['target']  # Target variable

# Splitting the dataset into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Normalize the data
StandardScaler rescales the data to have a mean of 0 and a standard deviation of 1.

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Train and test the model 

In [8]:
# Train the model
from sklearn.svm import SVC

svm_model = SVC()
svm_model.fit(X_train, y_train)

# Test the model
y_pred = svm_model.predict(X_test)

### Evaluating the model

### Generate the confusion matrix

In [9]:
from sklearn.metrics import confusion_matrix

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[41  2]
 [ 0 71]]


### Print the classification report

In [10]:
from sklearn.metrics import classification_report

# Evaluate the model
classification_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(classification_report)

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.98        43
         1.0       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



### Improve the model by hyperparameter tuning. Use GridSearchCV.

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.01, 0.1, 1, 10],
    'degree': [2, 3, 4],
    'coef0': [0.0, 0.5, 1.0]
}

grid_search = GridSearchCV(SVC(), param_grid, cv=5)

grid_search.fit(X_train, y_train)

### Show the best parameters found and use them to generate the predictions.

In [27]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

Best Parameters: {'C': 0.1, 'coef0': 0.0, 'degree': 2, 'gamma': 0.01, 'kernel': 'linear'}
Best Score: 0.9736263736263737


### Display the confusion matrix 

In [28]:
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[41  2]
 [ 0 71]]


### Display the classification report

In [29]:
from sklearn.metrics import confusion_matrix, classification_report

report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.98        43
         1.0       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



### This time, use the Multilayer Perceptron classifier for the breast cancer dataset.

### Normalize the data

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Train and test the Multilayer Perceptron classifier.

In [6]:
mlp = MLPClassifier(random_state=42, max_iter=1000, learning_rate='adaptive', solver='adam')
mlp.fit(X_train_scaled, y_train)
y_pred = mlp.predict(X_test_scaled)

### Print the classification report

In [7]:
classification_report_orig = classification_report(y_test, y_pred)
print("Classification Report (Original Model):")
print(classification_report_orig)

Classification Report (Original Model):
              precision    recall  f1-score   support

         0.0       0.98      0.95      0.96        43
         1.0       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



### Improve the model by hyperparameter tuning. Use GridSearchCV.

In [10]:
param_grid = {
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant']
}

### Use the best parameters found to generate new predictions.

In [11]:
mlp = MLPClassifier(random_state=42, max_iter=1000, learning_rate='adaptive', solver='adam')
mlp.fit(X_train_scaled, y_train)
y_pred = mlp.predict(X_test_scaled)

In [12]:
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)

### Print the classification report.

In [13]:
classification_report_best = classification_report(y_test, y_pred_best)
print("Classification Report (Best Model):")
print(classification_report_best)

Classification Report (Best Model):
              precision    recall  f1-score   support

         0.0       0.98      0.95      0.96        43
         1.0       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



### Conclusion
After generating the results for both SVM and MLP, what can you say about the results? (2 points)

In [None]:
Based on the classification reports provided for both SVM and MLP models:

SVM:

- The SVM model achieved an overall accuracy of 0.98, indicating that it classified the breast cancer samples with high accuracy.
- The precision for both classes (0 and 1) is quite high, with 1.0 precision for class 0 and 0.97 precision for class 1. This indicates a low number of false positives.
- The recall is also high for both classes, with 0.95 recall for class 0 and 1.0 recall for class 1. This indicates a low number of false negatives.
- The F1-scores are high for both classes, with 0.98 for class 0 and 0.99 for class 1. This suggests a good balance between precision and recall.
- Overall, the SVM model performed very well, with high accuracy, precision, recall, and F1-scores for both classes.

MLP:

- The original MLP model achieved an overall accuracy of 0.97, indicating a high level of accuracy in classifying the breast cancer samples.
- Similar to the SVM model, the MLP model achieved high precision, recall, and F1-scores for both classes.
- The precision for both classes is high, with 0.98 precision for class 0 and 0.97 precision for class 1.
- The recall is also high for both classes, with 0.95 recall for class 0 and 0.99 recall for class 1.
- The F1-scores are also high for both classes, with 0.96 for class 0 and 0.98 for class 1.
- The MLP model demonstrates good performance in terms of accuracy, precision, recall, and F1-scores.
In conclusion, both the SVM and MLP models performed very well in classifying the breast cancer samples.