In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [2]:
model_df = pd.read_csv("/Users/limu/Desktop/DS_bootcamp/Project4/clean_brain_tumor.csv")
model_df.head()

Unnamed: 0,Class,Mean,Variance,Standard Deviation,Entropy,Skewness,Kurtosis,Contrast,Energy,ASM,Homogeneity,Dissimilarity,Correlation
0,0,6.535339,619.587845,24.891522,0.109059,4.276477,18.900575,98.613971,0.293314,0.086033,0.530941,4.473346,0.981939
1,0,8.749969,805.957634,28.389393,0.266538,3.718116,14.464618,63.858816,0.475051,0.225674,0.651352,3.220072,0.988834
2,1,7.341095,1143.808219,33.820234,0.001467,5.06175,26.479563,81.867206,0.031917,0.001019,0.268275,5.9818,0.978014
3,1,5.958145,959.711985,30.979219,0.001477,5.677977,33.428845,151.229741,0.032024,0.001026,0.243851,7.700919,0.964189
4,0,7.315231,729.540579,27.010009,0.146761,4.283221,19.079108,174.988756,0.343849,0.118232,0.50114,6.834689,0.972789


In [3]:
x = model_df.drop("Class",axis=1)
y = model_df["Class"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 11)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
x_train = pd.DataFrame(x_train_scaled)
x_test = pd.DataFrame(x_test_scaled)

In [4]:
x_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.079106,0.041627,0.179282,0.338734,0.085335,0.015661,0.010516,0.535263,0.30772,0.736859,0.064929,0.947897
1,0.457278,0.229085,0.46159,0.159222,0.007494,0.000715,0.021154,0.344895,0.13822,0.610337,0.087989,0.913132
2,0.185386,0.09118,0.279634,0.455777,0.038068,0.005386,0.010049,0.635749,0.423925,0.776836,0.057195,0.962478
3,0.773765,0.771897,0.874543,0.041022,0.017241,0.002117,0.034329,0.152291,0.034202,0.457881,0.148888,0.962573
4,0.247005,0.11886,0.32365,0.376297,0.031027,0.004595,0.016652,0.569886,0.345673,0.703035,0.081602,0.930127


In [5]:
x_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.523561,0.264672,0.498555,0.172364,0.004241,0.000365,0.021759,0.361706,0.15052,0.607504,0.095809,0.926496
1,0.11248,0.098961,0.292606,0.320284,0.104216,0.021481,0.059149,0.519749,0.291425,0.582679,0.177207,0.872559
2,0.198082,0.315627,0.547409,0.01417,0.093455,0.017468,0.012561,0.074071,0.011335,0.395205,0.113295,0.410851
3,0.129287,0.064829,0.231035,0.338833,0.053542,0.008215,0.015263,0.535527,0.308002,0.698959,0.080807,0.912253
4,0.194521,0.125479,0.333396,0.41179,0.046188,0.006756,0.036077,0.599909,0.380359,0.657766,0.140573,0.87994


RandomForestClassifier

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

np.random.seed(11)

rf = RandomForestClassifier(random_state=11, n_estimators=300)
rf.fit(x_train, y_train)

random_forest_pred = rf.predict(x_test)

accuracy = accuracy_score(y_test, random_forest_pred)
precision = precision_score(y_test, random_forest_pred)
recall = recall_score(y_test, random_forest_pred)
f1 = f1_score(y_test, random_forest_pred)

metrics_rfc = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})

print(metrics_rfc)

   Accuracy  Precision    Recall  F1 Score
0  0.992032        1.0  0.982808  0.991329


LogisticRegression

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

np.random.seed(11)

lr = LogisticRegression(random_state=11, C=10)
lr.fit(x_train, y_train)

lr_prediction = lr.predict(x_test)

accuracy = accuracy_score(y_test, lr_prediction)
precision = precision_score(y_test, lr_prediction)
recall = recall_score(y_test, lr_prediction)
f1 = f1_score(y_test, lr_prediction)

metrics_lr = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})

rounded_accuracy = round(accuracy * 100, 1)

print(metrics_lr)

   Accuracy  Precision    Recall  F1 Score
0  0.984064   0.994135  0.971347  0.982609


SVC

In [8]:
np.random.seed(11)

model = SVC(C=15, kernel="rbf", random_state=11)
model.fit(x_train, y_train)

svc_prediction = model.predict(x_test)

accuracy = accuracy_score(y_test, svc_prediction)
precision = precision_score(y_test, svc_prediction)
recall = recall_score(y_test, svc_prediction)
f1 = f1_score(y_test, svc_prediction)

metrics_svc = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})

rounded_accuracy = round(accuracy * 100, 1)

print(metrics_svc)

   Accuracy  Precision    Recall  F1 Score
0   0.98672   0.994169  0.977077  0.985549


XGboost

In [9]:
np.random.seed(11)

xgb_classifier = xgb.XGBClassifier(random_state=11, n_estimators=300)
xgb_classifier.fit(x_train, y_train)

xgb_prediction = xgb_classifier.predict(x_test)

accuracy = accuracy_score(y_test, xgb_prediction)
precision = precision_score(y_test, xgb_prediction)
recall = recall_score(y_test, xgb_prediction)
f1 = f1_score(y_test, xgb_prediction)

metrics_xgb = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})

rounded_accuracy = round(accuracy * 100, 1)

print(metrics_xgb)

   Accuracy  Precision    Recall  F1 Score
0  0.992032   0.994236  0.988539  0.991379


KNeighborsClassifier

In [10]:
from sklearn.neighbors import KNeighborsClassifier

knn_classifier = KNeighborsClassifier()
knn_classifier.fit(x_train, y_train)
knn_prediction = knn_classifier.predict(x_test)

knn_accuracy = accuracy_score(y_test, knn_prediction)
knn_precision = precision_score(y_test, knn_prediction)
knn_recall = recall_score(y_test, knn_prediction)
knn_f1 = f1_score(y_test, knn_prediction)

metrics_knn = pd.DataFrame({
    'Accuracy': [knn_accuracy],
    'Precision': [knn_precision],
    'Recall': [knn_recall],
    'F1 Score': [knn_f1]
})

print("\nKNN Classifier Metrics:")
print(metrics_knn)



KNN Classifier Metrics:
   Accuracy  Precision    Recall  F1 Score
0  0.981408   0.997033  0.962751  0.979592


RidgeClassifier

In [11]:
from sklearn.linear_model import RidgeClassifier

ridge_classifier = RidgeClassifier()
ridge_classifier.fit(x_train, y_train)
ridge_prediction = ridge_classifier.predict(x_test)

ridge_accuracy = accuracy_score(y_test, ridge_prediction)
ridge_precision = precision_score(y_test, ridge_prediction)
ridge_recall = recall_score(y_test, ridge_prediction)
ridge_f1 = f1_score(y_test, ridge_prediction)

metrics_ridge = pd.DataFrame({
    'Accuracy': [ridge_accuracy],
    'Precision': [ridge_precision],
    'Recall': [ridge_recall],
    'F1 Score': [ridge_f1]
})

print("\nRidge Classifier Metrics:")
print(metrics_ridge)


Ridge Classifier Metrics:
   Accuracy  Precision    Recall  F1 Score
0  0.964143        1.0  0.922636  0.959762


DecisionTreeClassifier

In [12]:
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(x_train, y_train)
dt_prediction = dt_classifier.predict(x_test)

dt_accuracy = accuracy_score(y_test, dt_prediction)
dt_precision = precision_score(y_test, dt_prediction)
dt_recall = recall_score(y_test, dt_prediction)
dt_f1 = f1_score(y_test, dt_prediction)

metrics_dt = pd.DataFrame({
    'Accuracy': [dt_accuracy],
    'Precision': [dt_precision],
    'Recall': [dt_recall],
    'F1 Score': [dt_f1]
})

print("\nDecision Tree Classifier Metrics:")
print(metrics_dt)


Decision Tree Classifier Metrics:
   Accuracy  Precision    Recall  F1 Score
0  0.978752   0.977077  0.977077  0.977077


GaussianNB

In [13]:
from sklearn.naive_bayes import GaussianNB

nb_classifier = GaussianNB()
nb_classifier.fit(x_train, y_train)
nb_prediction = nb_classifier.predict(x_test)

nb_accuracy = accuracy_score(y_test, nb_prediction)
nb_precision = precision_score(y_test, nb_prediction)
nb_recall = recall_score(y_test, nb_prediction)
nb_f1 = f1_score(y_test, nb_prediction)

metrics_nb = pd.DataFrame({
    'Accuracy': [nb_accuracy],
    'Precision': [nb_precision],
    'Recall': [nb_recall],
    'F1 Score': [nb_f1]
})

print("\nNaive Bayes Classifier Metrics:")
print(metrics_nb)


Naive Bayes Classifier Metrics:
   Accuracy  Precision    Recall  F1 Score
0  0.964143    0.98494  0.936963  0.960352


In [14]:
np.random.seed(11)

# Model training and prediction
models = {
    'Random Forest': RandomForestClassifier(random_state=11, n_estimators=300),
    'Logistic Regression': LogisticRegression(random_state=11, C=10),
    'SVM': SVC(C=15, kernel='rbf', random_state=11),
    'XGBoost': xgb.XGBClassifier(random_state=11, n_estimators=300),
    'KNN': KNeighborsClassifier(),
    'Ridge Classifier': RidgeClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Naive Bayes': GaussianNB()
}

results = []

for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    results.append({
        'Model': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })

# Convert metrics to DataFrame
metrics_df = pd.DataFrame(results)

print(metrics_df)

# Export as CSV file
metrics_df.to_csv('model_metrics.csv', index=False)

                 Model  Accuracy  Precision    Recall  F1 Score
0        Random Forest  0.992032   1.000000  0.982808  0.991329
1  Logistic Regression  0.984064   0.994135  0.971347  0.982609
2                  SVM  0.986720   0.994169  0.977077  0.985549
3              XGBoost  0.992032   0.994236  0.988539  0.991379
4                  KNN  0.981408   0.997033  0.962751  0.979592
5     Ridge Classifier  0.964143   1.000000  0.922636  0.959762
6        Decision Tree  0.978752   0.977077  0.977077  0.977077
7          Naive Bayes  0.964143   0.984940  0.936963  0.960352
