In [1]:
# Install any libraries if need be. Comment out after installing
# !pip install xgboost

# ***Load data***

In [2]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import files

uploaded = files.upload()

Saving breast-cancer-wisconsin.csv to breast-cancer-wisconsin.csv


In [25]:
import pandas as pd
import io

# Extract filename
filename = list(uploaded.keys())[0]

#    Load CSV from either:
#      - a filename (string path)
#      - an uploaded file from Colab's files.upload() dict
#      - a Flask file object (from request.files)

if isinstance(uploaded[filename], str):
  # Assume it's a file path
  df = pd.read_csv(uploaded[filename])
elif hasattr(uploaded[filename], 'read'):
  # Flask's file object or BytesIO
  df = pd.read_csv(io.BytesIO(uploaded[filename].read()))
elif isinstance(uploaded[filename], bytes):
  # Bytes directly (Colab uploaded dict value)
  df = pd.read_csv(io.BytesIO(uploaded[filename]))
else:
  raise ValueError("Unsupported file source type.")


# ***Change class from 2 and 4 to 0 and 1, as XGBoost requires 0 and 1***

In [26]:
df['Class'] = df['Class'].replace({2: 0, 4: 1})

# ***Set up x & y, scale x, and split the data for training and testing***

In [27]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Use all numeric features to train model
x = df.select_dtypes(include='number')
x = x.drop('Class', axis=1)
y = df['Class']

# Scale the data
x = StandardScaler().fit_transform(x)

# Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=5)

# ***Create and fit Logistic Regression***

In [28]:
from sklearn.linear_model import LogisticRegression
lgr = LogisticRegression()
lgr = lgr.fit(x_train, y_train)

# Perform initial predictions and evaluate results
y_pred = lgr.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit the K-Neighbors Classifier***


In [29]:
from sklearn.neighbors import KNeighborsClassifier

knc = KNeighborsClassifier(n_neighbors=5)
knc = knc.fit(x_train, y_train)

# Perform initial predictions and evaluate results
y_pred = knc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[110   3]
 [  0  62]]
Accuracy: 0.9828571428571429
Classification report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       113
           1       0.95      1.00      0.98        62

    accuracy                           0.98       175
   macro avg       0.98      0.99      0.98       175
weighted avg       0.98      0.98      0.98       175



# ***Create and fit SVC with kernal = "linear"***

In [30]:
from sklearn.svm import SVC

svc = SVC(kernel = "linear")
svc = svc.fit(x_train, y_train)

# Perform initial predictions and evaluate results
y_pred = svc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")


Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit SVC with kernal = "rbf"***

In [31]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

svc = SVC(kernel = "rbf")
svc = svc.fit(x_train, y_train)

# Perform initial predictions and evaluate results
y_pred = svc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit Naive-Bayes, GaussianNB***

In [32]:
from sklearn.naive_bayes import GaussianNB

# Initialize and train GaussianNB
gnb = GaussianNB()
gnb.fit(x_train, y_train)

# Predict and evaluate on test set
y_pred = gnb.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit Decision Trees***

In [33]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

# Predict and evaluate on test set
y_pred = dtc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit Random Forest***

In [34]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)

# Predict and evaluate on test set
y_pred = rfc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



# ***Create and fit XGBoost***

In [35]:
from xgboost import XGBClassifier

xgbc = XGBClassifier()
xgbc.fit(x_train, y_train)

# Predict and evaluate on test set
y_pred = xgbc.predict(x_test)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[113   0]
 [  0  62]]
Accuracy: 1.0
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       1.00      1.00      1.00        62

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175

