In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer

In [2]:
data = load_breast_cancer()

In [3]:
xData = data['data']
yData = data['target']

In [37]:
np.unique(yData, return_counts=True)

(array([0, 1]), array([212, 357]))

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [7]:
xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, stratify=yData, random_state=42)

In [8]:
import xgboost as xgb
# pip install xgboost

In [13]:
model = xgb.XGBClassifier()
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.92      0.95        53
           1       0.96      0.99      0.97        90

    accuracy                           0.97       143
   macro avg       0.97      0.96      0.96       143
weighted avg       0.97      0.97      0.96       143



In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [16]:
model = RandomForestClassifier()
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.92      0.94        53
           1       0.96      0.98      0.97        90

    accuracy                           0.96       143
   macro avg       0.96      0.95      0.95       143
weighted avg       0.96      0.96      0.96       143



In [17]:
model = DecisionTreeClassifier()
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.91        53
           1       0.95      0.93      0.94        90

    accuracy                           0.93       143
   macro avg       0.92      0.93      0.93       143
weighted avg       0.93      0.93      0.93       143



In [18]:
from sklearn.svm import SVC

In [19]:
model = SVC()
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.83      0.89        53
           1       0.91      0.98      0.94        90

    accuracy                           0.92       143
   macro avg       0.93      0.90      0.92       143
weighted avg       0.93      0.92      0.92       143



In [20]:
from sklearn.model_selection import GridSearchCV

In [21]:
parameters = {
    'kernel':('linear', 'rbf'),
    'C':[1, 10],
    'gamma': ["auto", "scale"]
}

In [33]:
model = SVC()
model

In [22]:
clf = GridSearchCV(model, parameters)

In [24]:
clf.fit(xTrain, yTrain)

In [29]:
best_params = clf.best_params_

In [30]:
best_params

{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}

In [34]:
model = SVC(**best_params)

In [35]:
model

In [36]:
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.89      0.93        53
           1       0.94      0.99      0.96        90

    accuracy                           0.95       143
   macro avg       0.96      0.94      0.95       143
weighted avg       0.95      0.95      0.95       143



In [38]:
xTrain.shape

(426, 30)

In [39]:
np.unique(yTrain, return_counts=True)

(array([0, 1]), array([159, 267]))

In [76]:
model = DecisionTreeClassifier()
model.fit(xTrain, yTrain)
y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.92      0.88        53
           1       0.95      0.90      0.93        90

    accuracy                           0.91       143
   macro avg       0.90      0.91      0.90       143
weighted avg       0.91      0.91      0.91       143



In [77]:
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek
# pip install imblearn

In [78]:
ov = SMOTE(random_state=42)

In [79]:
xTrain_resample, yTrain_resample = ov.fit_resample(xTrain, yTrain)

In [80]:
xTrain_resample.shape

(534, 30)

In [81]:
np.unique(yTrain_resample, return_counts=True)

(array([0, 1]), array([267, 267]))

In [82]:
model = DecisionTreeClassifier()
model.fit(xTrain_resample, yTrain_resample)

y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.92      0.89        53
           1       0.95      0.91      0.93        90

    accuracy                           0.92       143
   macro avg       0.91      0.92      0.91       143
weighted avg       0.92      0.92      0.92       143



In [83]:
ov = SMOTETomek(random_state=42)

In [84]:
xTrain_resample, yTrain_resample = ov.fit_resample(xTrain, yTrain)

In [85]:
np.unique(yTrain_resample, return_counts=True)

(array([0, 1]), array([261, 261]))

In [86]:
model = DecisionTreeClassifier()
model.fit(xTrain_resample, yTrain_resample)

y_pred = model.predict(xTest)
print(classification_report(yTest, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92        53
           1       0.96      0.96      0.96        90

    accuracy                           0.94       143
   macro avg       0.94      0.94      0.94       143
weighted avg       0.94      0.94      0.94       143

