In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import warnings

warnings.filterwarnings("ignore")

# Load your synthetic dataset
df = pd.read_csv("C:/Users/jhumu/Regular Use/JupyNote/synthetic_data_ctgan_improved.csv")

# Features and target
X = df.drop("diagnosis", axis=1)
y = df["diagnosis"]

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [2]:
from sklearn.svm import SVC

model = SVC(kernel='linear', probability=True)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Logistic Regression Performance:")
print(classification_report(y_test, y_pred))


🔍 Logistic Regression Performance:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       232
           1       0.98      0.98      0.98       224

    accuracy                           0.98       456
   macro avg       0.98      0.98      0.98       456
weighted avg       0.98      0.98      0.98       456



In [3]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Decision Tree Performance:")
print(classification_report(y_test, y_pred))


🔍 Decision Tree Performance:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       232
           1       0.95      0.95      0.95       224

    accuracy                           0.95       456
   macro avg       0.95      0.95      0.95       456
weighted avg       0.95      0.95      0.95       456



In [4]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Random Forest Performance:")
print(classification_report(y_test, y_pred))


🔍 Random Forest Performance:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       232
           1       0.99      1.00      0.99       224

    accuracy                           0.99       456
   macro avg       0.99      0.99      0.99       456
weighted avg       0.99      0.99      0.99       456



In [5]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 KNN Performance:")
print(classification_report(y_test, y_pred))


🔍 KNN Performance:
              precision    recall  f1-score   support

           0       0.89      1.00      0.94       232
           1       0.99      0.87      0.93       224

    accuracy                           0.93       456
   macro avg       0.94      0.93      0.93       456
weighted avg       0.94      0.93      0.93       456



In [6]:
from sklearn.svm import SVC

model = SVC(kernel='rbf', probability=True)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 SVM (RBF) Performance:")
print(classification_report(y_test, y_pred))


🔍 SVM (RBF) Performance:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       232
           1       0.99      0.98      0.98       224

    accuracy                           0.98       456
   macro avg       0.98      0.98      0.98       456
weighted avg       0.98      0.98      0.98       456



In [7]:
from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 AdaBoost Performance:")
print(classification_report(y_test, y_pred))


🔍 AdaBoost Performance:
              precision    recall  f1-score   support

           0       0.81      0.81      0.81       138
           1       0.71      0.70      0.70        90

    accuracy                           0.77       228
   macro avg       0.76      0.76      0.76       228
weighted avg       0.77      0.77      0.77       228



In [8]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Gradient Boosting Performance:")
print(classification_report(y_test, y_pred))


🔍 Gradient Boosting Performance:
              precision    recall  f1-score   support

           0       0.83      0.85      0.84       138
           1       0.76      0.73      0.75        90

    accuracy                           0.80       228
   macro avg       0.79      0.79      0.79       228
weighted avg       0.80      0.80      0.80       228



In [9]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Gaussian Naive Bayes Performance:")
print(classification_report(y_test, y_pred))


🔍 Gaussian Naive Bayes Performance:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83       138
           1       0.74      0.73      0.74        90

    accuracy                           0.79       228
   macro avg       0.78      0.78      0.78       228
weighted avg       0.79      0.79      0.79       228



In [11]:
from sklearn.svm import SVC

model = SVC(kernel='linear', C=1.0, probability=True)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Logistic Regression (via Linear SVM) Performance:")
print(classification_report(y_test, y_pred))


🔍 Logistic Regression (via Linear SVM) Performance:
              precision    recall  f1-score   support

           0       0.81      0.84      0.82       138
           1       0.74      0.69      0.71        90

    accuracy                           0.78       228
   macro avg       0.77      0.76      0.77       228
weighted avg       0.78      0.78      0.78       228



In [12]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(max_depth=5, min_samples_split=5)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Decision Tree Performance:")
print(classification_report(y_test, y_pred))


🔍 Decision Tree Performance:
              precision    recall  f1-score   support

           0       0.81      0.78      0.79       138
           1       0.68      0.72      0.70        90

    accuracy                           0.75       228
   macro avg       0.74      0.75      0.75       228
weighted avg       0.76      0.75      0.76       228



In [13]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=5)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Random Forest Performance:")
print(classification_report(y_test, y_pred))


🔍 Random Forest Performance:
              precision    recall  f1-score   support

           0       0.82      0.86      0.84       138
           1       0.76      0.71      0.74        90

    accuracy                           0.80       228
   macro avg       0.79      0.78      0.79       228
weighted avg       0.80      0.80      0.80       228



In [14]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5, weights='distance')
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 K-Nearest Neighbors Performance:")
print(classification_report(y_test, y_pred))


🔍 K-Nearest Neighbors Performance:
              precision    recall  f1-score   support

           0       0.80      0.80      0.80       138
           1       0.69      0.69      0.69        90

    accuracy                           0.75       228
   macro avg       0.74      0.74      0.74       228
weighted avg       0.75      0.75      0.75       228



In [15]:
from sklearn.svm import SVC

model = SVC(kernel='rbf', C=2.0, gamma=0.1, probability=True)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 SVM (RBF Kernel) Performance:")
print(classification_report(y_test, y_pred))


🔍 SVM (RBF Kernel) Performance:
              precision    recall  f1-score   support

           0       0.79      0.80      0.79       138
           1       0.69      0.68      0.68        90

    accuracy                           0.75       228
   macro avg       0.74      0.74      0.74       228
weighted avg       0.75      0.75      0.75       228



In [16]:
from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier(n_estimators=100, learning_rate=0.8)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 AdaBoost Performance:")
print(classification_report(y_test, y_pred))


🔍 AdaBoost Performance:
              precision    recall  f1-score   support

           0       0.81      0.83      0.82       138
           1       0.73      0.70      0.72        90

    accuracy                           0.78       228
   macro avg       0.77      0.77      0.77       228
weighted avg       0.78      0.78      0.78       228



In [17]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=4)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Gradient Boosting Performance:")
print(classification_report(y_test, y_pred))


🔍 Gradient Boosting Performance:
              precision    recall  f1-score   support

           0       0.81      0.84      0.82       138
           1       0.74      0.69      0.71        90

    accuracy                           0.78       228
   macro avg       0.77      0.76      0.77       228
weighted avg       0.78      0.78      0.78       228



In [18]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("🔍 Gaussian Naive Bayes Performance:")
print(classification_report(y_test, y_pred))


🔍 Gaussian Naive Bayes Performance:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83       138
           1       0.74      0.73      0.74        90

    accuracy                           0.79       228
   macro avg       0.78      0.78      0.78       228
weighted avg       0.79      0.79      0.79       228

