In [20]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import pandas as pd
import numpy as np

def load_and_prepare_data(sample_frac=0.5):
    data = pd.read_csv('train_data.csv', sep=';')
    data = data.sample(frac=sample_frac, random_state=42).reset_index(drop=True)
    data.drop(['case_id', 'patientid'], axis=1, errors='ignore', inplace=True)

    # Mapowanie Stay - łączenie klas powyżej 40 dni w jedną kategorię 4
    if 'Stay' in data.columns and data['Stay'].dtype == object:
        stay_mapping = {
            '0-10': 0,
            '11-20': 1,
            '21-30': 2,
            '31-40': 3,
            '41-50': 4,
            '51-60': 4,
            '61-70': 4,
            '71-80': 4,
            '81-90': 4,
            '91-100': 4,
            'More than 100 Days': 4
        }
        data['Stay'] = data['Stay'].map(stay_mapping).fillna(0).astype(int)

    data.fillna(data.mean(numeric_only=True), inplace=True)
    categorical_cols = data.select_dtypes(include=['object']).columns
    data = pd.get_dummies(data, columns=categorical_cols)

    return data

def main():
    data = load_and_prepare_data(sample_frac=0.5)
    X = data.drop('Stay', axis=1).values.astype(np.float32)
    y = data['Stay'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    print("\nTuning KNN...")
    best_acc = 0
    best_params = None
    for k in [5, 10, 20, 50]:
        for metric in ['euclidean', 'manhattan']:
            for weights in ['uniform', 'distance']:
                knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights=weights, n_jobs=-1)
                knn.fit(X_train, y_train)
                acc = accuracy_score(y_test, knn.predict(X_test))
                if acc > best_acc:
                    best_acc = acc
                    best_params = (k, metric, weights)
    print(f"Best KNN params: k={best_params[0]}, metric={best_params[1]}, weights={best_params[2]}, accuracy={best_acc:.4f}")

    print("\nTesting Decision Tree with class_weight...")
    dt = DecisionTreeClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2,
                                class_weight='balanced', random_state=42)
    dt.fit(X_train, y_train)
    print("Decision Tree Accuracy:", accuracy_score(y_test, dt.predict(X_test)))

    print("\nTesting Random Forest with class_weight...")
    rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
    rf.fit(X_train, y_train)
    print("Random Forest Accuracy:", accuracy_score(y_test, rf.predict(X_test)))

    print("\nTesting XGBoost...")
    xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    xgb.fit(X_train, y_train)
    print("XGBoost Accuracy:", accuracy_score(y_test, xgb.predict(X_test)))

    print("\nTesting Naive Bayes...")
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    print("Naive Bayes Accuracy:", accuracy_score(y_test, nb.predict(X_test)))

if __name__ == "__main__":
    main()


XGBoostError: 
XGBoost Library (libxgboost.dylib) could not be loaded.
Likely causes:
  * OpenMP runtime is not installed
    - vcomp140.dll or libgomp-1.dll for Windows
    - libomp.dylib for Mac OSX
    - libgomp.so for Linux and other UNIX-like OSes
    Mac OSX users: Run `brew install libomp` to install OpenMP runtime.

  * You are running 32-bit Python on a 64-bit OS

Error message(s): ["dlopen(/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/xgboost/lib/libxgboost.dylib, 0x0006): Library not loaded: @rpath/libomp.dylib\n  Referenced from: <948FC7F9-7446-3923-BB9F-85890E78C765> /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/xgboost/lib/libxgboost.dylib\n  Reason: tried: '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file)"]
