In [None]:
import os
import sys

# Notebook /notebooks klasöründe çalışıyor, proje kökü bir üst klasör
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

print("Project root:", project_root)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from src.dataset import load_dataset

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Her sınıftan 50 örnek alarak küçük bir deney seti hazırlıyoruz.
X, y, class_names = load_dataset(
    data_root="../data/raw",
    use_autocontrast=True,
    normalize=True,
    limit_per_class=50,
)

print("X shape:", X.shape)  # (N, H, W)
print("y shape:", y.shape)
print("Unique labels:", set(y))

In [None]:
# Görüntüleri 2D'den (H, W) 1D vektöre çeviriyoruz
N, H, W = X.shape
X_flat = X.reshape(N, H * W)  # (N, H*W)

print("X_flat shape:", X_flat.shape)

# Eğitim ve test setine ayır
X_train, X_test, y_train, y_test = train_test_split(
    X_flat,
    y,
    test_size=0.2,     # %20 test
    random_state=42,
    stratify=y,        # sınıf oranlarını koru
)

print("Train size:", X_train.shape[0])
print("Test size:", X_test.shape[0])

In [None]:
baseline_clf = LogisticRegression(
    max_iter=1000,
    n_jobs=-1,
)

baseline_clf.fit(X_train, y_train)

y_pred_base = baseline_clf.predict(X_test)

acc_base = accuracy_score(y_test, y_pred_base)
print("=== Model 1: Baseline Logistic Regression ===")
print("Test accuracy:", acc_base)
print("\nClassification report:")
print(classification_report(y_test, y_pred_base))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred_base))

In [None]:
balanced_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("logreg", LogisticRegression(
        max_iter=1000,
        n_jobs=-1,
        class_weight="balanced",  # az olan sınıfa daha fazla ağırlık
    )),
])

balanced_clf.fit(X_train, y_train)

y_pred_bal = balanced_clf.predict(X_test)

acc_bal = accuracy_score(y_test, y_pred_bal)
print("=== Model 2: Scaler + Balanced Logistic Regression ===")
print("Test accuracy:", acc_bal)
print("\nClassification report:")
print(classification_report(y_test, y_pred_bal))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred_bal))

In [None]:
n_components = 100  # PCA ile boyutu 100'e indiriyoruz

pca_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("pca", PCA(n_components=n_components)),
    ("logreg", LogisticRegression(
        max_iter=1000,
        n_jobs=-1,
        class_weight="balanced",
    )),
])

pca_clf.fit(X_train, y_train)

y_pred_pca = pca_clf.predict(X_test)

acc_pca = accuracy_score(y_test, y_pred_pca)
print("=== Model 3: Scaler + PCA + Balanced Logistic Regression ===")
print("Test accuracy:", acc_pca)
print("\nClassification report:")
print(classification_report(y_test, y_pred_pca))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred_pca))

# PCA'nın ne kadar varyans tuttuğuna bakalım
pca_step = pca_clf.named_steps["pca"]
explained_var = pca_step.explained_variance_ratio_.sum()
print(f"\nTotal explained variance with {n_components} components: {explained_var:.2f}")

In [None]:
n_show = 8
fig, axes = plt.subplots(2, 4, figsize=(12, 6))

for i, ax in enumerate(axes.ravel()):
    if i >= n_show:
        break
    idx = i
    img = X_test[idx].reshape(H, W)
    true_label = y_test[idx]
    pred_label = y_pred_pca[idx]  # istersen y_pred_bal veya y_pred_base de kullanabilirsin

    ax.imshow(img, cmap="gray")
    ax.axis("off")
    ax.set_title(f"T:{true_label} P:{pred_label}")

plt.tight_layout()
plt.show()