In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc, classification_report
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import RocCurveDisplay
import matplotlib.pyplot as plt
import seaborn as sns
import time


In [None]:

# Zakładamy, że masz już dane w DataFrame 'X' oraz etykiety w 'y'
# X = pd.read_csv('your_data.csv')
# y = X.pop('label')

# Standaryzacja danych
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Wizualizacja PCA
plt.figure(figsize=(10, 7))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', edgecolor='k', s=40)
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.title('PCA of Dataset')
plt.colorbar()
plt.show()

# Klasyfikatory
classifiers = {
    'KNN': KNeighborsClassifier(),
    'LDA': LinearDiscriminantAnalysis(),
    'SVC': SVC(probability=True),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

# Funkcja do trenowania i mierzenia czasu
def train_and_evaluate(classifier, X_train, X_test, y_train, y_test):
    start_time = time.time()
    classifier.fit(X_train, y_train)
    end_time = time.time()
    y_pred = classifier.predict(X_test)
    y_proba = classifier.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = auc(fpr, tpr)
    print(f"Classification Report for {classifier.__class__.__name__}:\n", classification_report(y_test, y_pred))
    print(f"Time taken: {end_time - start_time} seconds")
    return fpr, tpr, roc_auc, end_time - start_time

# Podział danych
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)

# Krzywe ROC
plt.figure(figsize=(15, 10))
for name, classifier in classifiers.items():
    fpr, tpr, roc_auc, _ = train_and_evaluate(classifier, X_train, X_test, y_train, y_test)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curves')
plt.legend(loc='lower right')
plt.show()

# RFEC (Recursive Feature Elimination with Cross-Validation)
X_train_full, X_test_full, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
selector = RFE(RandomForestClassifier(), n_features_to_select=5, step=1)
selector = selector.fit(X_train_full, y_train)
X_train_rfe = selector.transform(X_train_full)
X_test_rfe = selector.transform(X_test_full)

# Krzywe ROC po RFEC
plt.figure(figsize=(15, 10))
for name, classifier in classifiers.items():
    fpr, tpr, roc_auc, _ = train_and_evaluate(classifier, X_train_rfe, X_test_rfe, y_train, y_test)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0, 1])
plt.ylim([0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves after RFEC')
plt.legend(loc='lower right')
plt.show()

# Interpretacja metryk dla najlepszego modelu
# Zakładamy, że najlepszy model to RandomForestClassifier
best_model = RandomForestClassifier()
best_model.fit(X_train_rfe, y_train)
y_pred_best = best_model.predict(X_test_rfe)
print("Best Model Classification Report:\n", classification_report(y_test, y_pred_best))

# Zbadanie modeli w czasie
times = {}
for name, classifier in classifiers.items():
    _, _, _, training_time = train_and_evaluate(classifier, X_train, X_test, y_train, y_test)
    times[name] = training_time

print("Training times for each classifier:")
for name, t in times.items():
    print(f"{name}: {t:.4f} seconds")
