In [2]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.kernel_approximation import RBFSampler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [3]:
#Loading the dataset
data = pd.read_csv('HD 1190.csv')

In [4]:
target_column = 'target'
X = data.drop(columns=[target_column])
y = data[target_column]

In [5]:
# Data preprocessing
le = LabelEncoder()
X['sex'] = le.fit_transform(X['sex'])

In [6]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Standardizing numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# Model training and evaluation
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

# Logistic Regression
logistic_regression = LogisticRegression()
train_and_evaluate_model(logistic_regression, X_train_scaled, y_train, X_test_scaled, y_test)

# Random Forest
random_forest = RandomForestClassifier()
train_and_evaluate_model(random_forest, X_train, y_train, X_test, y_test)

# Gradient Boosting
gradient_boosting = GradientBoostingClassifier()
train_and_evaluate_model(gradient_boosting, X_train, y_train, X_test, y_test)

# Support Vector Machine (SVM)
svm_classifier = SVC()
train_and_evaluate_model(svm_classifier, X_train_scaled, y_train, X_test_scaled, y_test)

# Decision Tree
decision_tree = DecisionTreeClassifier()
train_and_evaluate_model(decision_tree, X_train, y_train, X_test, y_test)

# K-Nearest Neighbors (KNN)
knn_classifier = KNeighborsClassifier()
train_and_evaluate_model(knn_classifier, X_train_scaled, y_train, X_test_scaled, y_test)

# Naive Bayes
naive_bayes = GaussianNB()
train_and_evaluate_model(naive_bayes, X_train, y_train, X_test, y_test)

# Multilayer Perceptron (Neural Network)
mlp_classifier = MLPClassifier()
train_and_evaluate_model(mlp_classifier, X_train_scaled, y_train, X_test_scaled, y_test)

# Radial Basis Function (RBF) Kernel SVM
rbf_kernel_svm = SVC(kernel='rbf')
train_and_evaluate_model(rbf_kernel_svm, X_train_scaled, y_train, X_test_scaled, y_test)

# XGBoost
xgboost_classifier = XGBClassifier()
train_and_evaluate_model(xgboost_classifier, X_train, y_train, X_test, y_test)

def model(classifier):
    
    classifier.fit(x_train,y_train)
    prediction = classifier.predict(x_test)
    cv = RepeatedStratifiedKFold(n_splits = 10,n_repeats = 3,random_state = 1)
    print("Accuracy : ",'{0:.2%}'.format(accuracy_score(y_test,prediction)))
    print("Cross Validation Score : ",'{0:.2%}'.format(cross_val_score(classifier,x_train,y_train,cv = cv,scoring = 'roc_auc').mean()))

def model_evaluation(classifier):
    
    # Confusion Matrix
    cm = confusion_matrix(y_test,classifier.predict(x_test))
    names = ['True Neg','False Pos','False Neg','True Pos']
    counts = [value for value in cm.flatten()]
    percentages = ['{0:.2%}'.format(value) for value in cm.flatten()/np.sum(cm)]
    labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in zip(names,counts,percentages)]
    labels = np.asarray(labels).reshape(2,2)
    sns.heatmap(cm,annot = labels,cmap = colors,fmt ='')
    
    # Classification Report
    print(classification_report(y_test,classifier.predict(x_test)))
    print(f"Accuracy: {accuracy:.2f}")
    model_evaluation(classifier_nb)

Accuracy: 0.8613, Precision: 0.8712, Recall: 0.8779, F1 Score: 0.8745
Accuracy: 0.9454, Precision: 0.9470, Recall: 0.9542, F1 Score: 0.9506
Accuracy: 0.9160, Precision: 0.9173, Recall: 0.9313, F1 Score: 0.9242
Accuracy: 0.8908, Precision: 0.8671, Recall: 0.9466, F1 Score: 0.9051
Accuracy: 0.8866, Precision: 0.9333, Recall: 0.8550, F1 Score: 0.8924
Accuracy: 0.8866, Precision: 0.8714, Recall: 0.9313, F1 Score: 0.9004
Accuracy: 0.8571, Precision: 0.8647, Recall: 0.8779, F1 Score: 0.8712
Accuracy: 0.8908, Precision: 0.8889, Recall: 0.9160, F1 Score: 0.9023
Accuracy: 0.8908, Precision: 0.8671, Recall: 0.9466, F1 Score: 0.9051




NameError: name 'XGBClassifier' is not defined