In [31]:
import pandas as pd

In [32]:
df = pd.read_csv("breast_cancer.csv")
df.head(5)

Unnamed: 0,Sample code number,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [33]:
x = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values

#### Splitting the dataset for training and testing

In [34]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

#### Applying Feature scaling to the training set

In [35]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

#### Dimensionality Reduction

In [36]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2, random_state=42)
x_train = pca.fit_transform(x_train, y_train)
x_test = pca.transform(x_test)

#### Kfold cross validation and model fit

In [38]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

model = LogisticRegression()
cvs = cross_val_score(model, x_train, y_train, cv=10, scoring="accuracy", n_jobs= -1)
model.fit(x_train, y_train)

In [39]:
print("Accuracy: {:.2f} %".format(cvs.mean()*100))
print("Standard Deviation: {:.2f} %".format(cvs.std()*100))

Accuracy: 97.46 %
Standard Deviation: 2.32 %


In [41]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
y_pred = model.predict(x_test)
cm = confusion_matrix(y_pred, y_test)
print(cm)
print(accuracy_score(y_pred, y_test))
print(classification_report(y_test, y_pred))

[[103   4]
 [  4  60]]
0.9532163742690059
              precision    recall  f1-score   support

           2       0.96      0.96      0.96       107
           4       0.94      0.94      0.94        64

    accuracy                           0.95       171
   macro avg       0.95      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171



In [27]:
# from xgboost import XGBClassifier
# boosting_model = XGBClassifier()
# boosting_model.fit(x_train, y_train)

# y_pred_boost = boosting_model.predict(x_test)
# print(f"Boosting Test Accuracy: {accuracy_score(y_test, y_pred_boost):.4f}")
