In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,confusion_matrix, classification_report

# -----------------------------
# METRIC CALCULATION FUNCTION
# -----------------------------
def calculate_classification_metrics(true, predicted):
    accuracy = accuracy_score(true, predicted)
    precision = precision_score(true, predicted, average='weighted', zero_division=0)
    recall = recall_score(true, predicted, average='weighted', zero_division=0)
    f1 = f1_score(true, predicted, average='weighted', zero_division=0)
    return accuracy, precision, recall, f1


# -----------------------------
# MODEL LIST
# -----------------------------
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVM": SVC(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42)
}

# -----------------------------
# RESULTS STORAGE LIST
# -----------------------------
results = []

# -----------------------------
# TRAIN AND EVALUATE ALL MODELS
# -----------------------------
for name, model in models.items():
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    train_acc, train_prec, train_rec, train_f1 = calculate_classification_metrics(y_train, y_train_pred)
    test_acc, test_prec, test_rec, test_f1 = calculate_classification_metrics(y_test, y_test_pred)

    results.append({
        "Model": name,
        "Train Accuracy": train_acc,
        "Train Precision": train_prec,
        "Train Recall": train_rec,
        "Train F1": train_f1,
        "Test Accuracy": test_acc,
        "Test Precision": test_prec,
        "Test Recall": test_rec,
        "Test F1": test_f1
    })

# -----------------------------
# DISPLAY RESULTS AS A TABLE
# -----------------------------
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True)

print("\nðŸ“Š Classification Model Comparison Results:")
print(results_df.round(4))


In [None]:
#aÅŸaÄŸÄ±daki fonksiyon ile verilen korelasyon deÄŸerinden yÃ¼ksek korelasyon varsa o deÄŸerin adlarÄ±nÄ± liste olarak getirecek. iÅŸe yarar bir kod
def corelation_for_dropping(df,threshold):
    corr = df.corr()
    columns_to_drop = set()
    for x in range(len(corr.columns)):
        for j in range(x):
            if abs((corr.iloc[x,j])) > threshold:
                columns_to_drop.add(corr.columns[x])
    return columns_to_drop


In [None]:
def calculate_model_metrics(true,predicted):
    mae = mean_absolute_error(true,predicted)
    mse = mean_squared_error(true,predicted)
    r2 = r2_score(true,predicted)
    rmse = np.sqrt(mean_squared_error(true,predicted))
    return mae,mse,r2,rmse

In [None]:
#outlier bulma fonksiyonu
def find_outliers_iqr(df, threshold):
    outlier_summary = {}
    numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
    print(numeric_cols)
    for col in numeric_cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        iqr = Q3 - Q1

        lower_bound = Q1 - threshold * iqr
        upper_bound = Q3 + threshold * iqr

        outliers = df[ (df[col] < lower_bound) | (df[col] > upper_bound)]

        outlier_summary[col] = {
            "outlier_count": outliers.shape[0],
            "outlier_percentage": outliers.shape[0] / df.shape[0],
            "lower_bound": lower_bound,
            "upper_bound": upper_bound,
        }
    return pd.DataFrame(outlier_summary)


In [None]:
# hedef kolondaki outlier datalarÄ± silme
def remove_outliers_from_column(df, target_col, threshold=1.5):
    Q1 = df[target_col].quantile(0.25)
    Q3 = df[target_col].quantile(0.75)
    iqr = Q3 - Q1

    lower_bound = Q1 - threshold * iqr
    upper_bound = Q3 + threshold * iqr

    return df[(df[target_col] >= lower_bound) & (df[target_col] <= upper_bound)]

In [None]:
#tÃ¼m kolonlardaki outlier datalarÄ± silme
def remove_outliers_from_all_column(df,threshold=1.5):
    df_clean = df.copy()
    numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns

    for col in numeric_cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        iqr = Q3 - Q1

        lower_bound = Q1 - threshold * iqr
        upper_bound = Q3 + threshold * iqr

        df_clean =  df_clean[ (df_clean[col] >= lower_bound) & (df_clean[col] <= upper_bound)]
    return df_clean.copy()
