In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [None]:
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df

In [None]:
df.describe()

In [None]:
sns.pairplot(df, hue='target')

In [None]:
# Correlation heatmap
sns.set(rc={'figure.figsize': (40, 40)})
sns_plot = sns.heatmap(df.corr(), annot=True, cmap='coolwarm')

In [None]:
sns.set(rc={'figure.figsize': (6, 6)})
sns.countplot(x='target', data=df)

In [None]:
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
report_dict = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
report_df

In [None]:
# Get the confusion matrix
sns.set(rc={'figure.figsize': (10, 8)})
cm = confusion_matrix(y_test, y_pred)
print(cm)
group_names = ['True Negative','False Positive','False Negative','True Positive']
group_counts = ["{0:0.0f}".format(value) for value in
                cm.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cm.flatten()/np.sum(cm)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)
# Set the font size for annotations
annot_fontsize = 12
annot_kws = {"fontsize": annot_fontsize}
# Plot the confusion matrix
# fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(cm, annot=labels, fmt='')