In [1]:
import numpy as np

X_train = np.load("X_train_higgs.npy")
y_train = np.load("y_train_higgs.npy")
X_test  = np.load("X_test_higgs.npy")
y_test  = np.load("y_test_higgs.npy")
class MLP:
    def __init__(self, layer_sizes, activation="relu"):
        self.layer_sizes = layer_sizes
        self.activation_name = activation
        self.weights = []
        self.biases = []

        self._init_parameters()

        # Momentum
        self.v_w = [np.zeros_like(w) for w in self.weights]
        self.v_b = [np.zeros_like(b) for b in self.biases]

        # RMSProp
        self.s_w = [np.zeros_like(w) for w in self.weights]
        self.s_b = [np.zeros_like(b) for b in self.biases]

        # Adam
        self.t = 0
        self.m_w = [np.zeros_like(w) for w in self.weights]
        self.m_b = [np.zeros_like(b) for b in self.biases]
        self.v_w2 = [np.zeros_like(w) for w in self.weights]
        self.v_b2 = [np.zeros_like(b) for b in self.biases]
    def _init_parameters(self):
        for i in range(len(self.layer_sizes) - 1):
            n_in = self.layer_sizes[i]
            # He Initialization , Xavier 
            std = np.sqrt(2.0 / n_in) if self.activation_name == "relu" else np.sqrt(1.0 / n_in)
            
            w = np.random.randn(n_in, self.layer_sizes[i + 1]) * std
            b = np.zeros((1, self.layer_sizes[i + 1]))
            self.weights.append(w)
            self.biases.append(b)
    def _activation(self, z):
        if self.activation_name == "relu": return np.maximum(0, z)
        elif self.activation_name == "sigmoid": return 1 / (1 + np.exp(-z))
        elif self.activation_name == "tanh": return np.tanh(z)
        return z

    def _activation_derivative(self, z):
        if self.activation_name == "relu": return (z > 0).astype(float)
        elif self.activation_name == "sigmoid":
            s = 1 / (1 + np.exp(-z))
            return s * (1 - s)
        elif self.activation_name == "tanh": return 1 - np.tanh(z)**2
        return 1

    def forward(self, X):
        self.cache = []
        a = X
        for i in range(len(self.weights)):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = 1 / (1 + np.exp(-z)) if i == len(self.weights) - 1 else self._activation(z)
            self.cache.append((a, z))
        return a
        
    def backward(self, X, y_true, y_pred, lr=0.01, optimizer="sgd", beta1=0.9, beta2=0.999, eps=1e-8):
        m = X.shape[0]
        dz = y_pred - y_true 
        if optimizer == "adam": self.t += 1
        for i in reversed(range(len(self.weights))):
            a_prev = X if i == 0 else self.cache[i-1][0]
            dw = (1 / m) * np.dot(a_prev.T, dz)
            db = (1 / m) * np.sum(dz, axis=0, keepdims=True)
            if i > 0:
                z_prev = self.cache[i-1][1]
                dz_next = np.dot(dz, self.weights[i].T) * self._activation_derivative(z_prev)
            if optimizer == "momentum":
                self.v_w[i] = beta1 * self.v_w[i] + (1 - beta1) * dw
                self.v_b[i] = beta1 * self.v_b[i] + (1 - beta1) * db
                self.weights[i] -= lr * self.v_w[i]
                self.biases[i] -= lr * self.v_b[i]
            elif optimizer == "rmsprop":
                self.s_w[i] = beta1 * self.s_w[i] + (1 - beta1) * (dw**2)
                self.s_b[i] = beta1 * self.s_b[i] + (1 - beta1) * (db**2)
                self.weights[i] -= lr * dw / (np.sqrt(self.s_w[i]) + eps)
                self.biases[i] -= lr * db / (np.sqrt(self.s_b[i]) + eps)
            elif optimizer == "adam":
                self.m_w[i] = beta1 * self.m_w[i] + (1 - beta1) * dw
                self.m_b[i] = beta1 * self.m_b[i] + (1 - beta1) * db
                self.v_w2[i] = beta2 * self.v_w2[i] + (1 - beta2) * (dw**2)
                self.v_b2[i] = beta2 * self.v_b2[i] + (1 - beta2) * (db**2)
                mw_h = self.m_w[i] / (1 - beta1**self.t)
                mb_h = self.m_b[i] / (1 - beta1**self.t)
                vw_h = self.v_w2[i] / (1 - beta2**self.t)
                vb_h = self.v_b2[i] / (1 - beta2**self.t)
                self.weights[i] -= lr * mw_h / (np.sqrt(vw_h) + eps)
                self.biases[i] -= lr * mb_h / (np.sqrt(vb_h) + eps)
            else: # SGD
                self.weights[i] -= lr * dw
                self.biases[i] -= lr * db

            if i > 0: dz = dz_next

In [2]:
def train_mlp(model, X, y, epochs=20, batch_size=64, lr=0.01, optimizer="sgd"):
    
    n_samples = X.shape[0]
    losses = []
    for epoch in range(epochs):
        # shuffle
        perm = np.random.permutation(n_samples)
        X_shuffled = X[perm]
        y_shuffled = y[perm]
        epoch_loss = 0.0
        for i in range(0, n_samples, batch_size):
            X_batch = X_shuffled[i:i + batch_size]
            y_batch = y_shuffled[i:i + batch_size] 
            y_pred = model.forward(X_batch)
            loss = binary_cross_entropy(y_batch, y_pred)
            epoch_loss += loss
            model.backward(
            X_batch,
            y_batch,
            y_pred,
            lr=lr,
            optimizer=optimizer
)

        avg_loss = epoch_loss / (n_samples // batch_size)
        losses.append(avg_loss)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
    return losses

In [3]:
import numpy as np
from scipy import sparse
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
class PegasosSVM:
    def __init__(self, lambda_reg=0.01, iterations=10000):
        self.lambda_reg = lambda_reg
        self.iterations = iterations
        self.w = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        y_encoded = np.where(y <= 0, -1, 1).ravel()

        for t in range(1, self.iterations + 1):
            i = np.random.randint(0, n_samples)
            xi = X[i]
            yi = y_encoded[i]

            eta = 1.0 / (self.lambda_reg * t)

            if sparse.issparse(xi):
                margin = float(xi.dot(self.w).item())
            else:
                margin = np.dot(xi, self.w)

            self.w *= (1 - eta * self.lambda_reg)

            if yi * margin < 1:
                if sparse.issparse(xi):
                    self.w[xi.indices] += eta * yi * xi.data
                else:
                    self.w += eta * yi * xi
      
            norm = np.linalg.norm(self.w)
            bound = 1.0 / np.sqrt(self.lambda_reg)
            if norm > bound:
                self.w *= (bound / norm)

    def predict(self, X):
        if sparse.issparse(X):
            scores = X.dot(self.w)
        else:
            scores = np.dot(X, self.w)
        return np.where(scores >= 0, 1, 0)
    
    def predict_scores(self, X):
        if sparse.issparse(X):
            return X.dot(self.w)
        return np.dot(X, self.w)

In [4]:
def binary_cross_entropy(y_true, y_pred, eps=1e-8):
    y_pred = np.clip(y_pred, eps, 1 - eps)
    loss = - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return np.mean(loss)

# MLP Modeli (Görev 1)
print("MLP Modeli eğitiliyor...")
mlp_model = MLP([28, 64, 32, 1], activation="relu")
train_mlp(mlp_model, X_train, y_train, epochs=20, optimizer="adam", lr=0.01)

# SVM Modeli (Görev 2)
print("SVM (Pegasos) Modeli eğitiliyor...")
svm_model = PegasosSVM(lambda_reg=0.01, iterations=20000)
svm_model.fit(X_train, y_train)

# --- ADIM 2: ŞİMDİ GÖREV 3'E GEÇEBİLİRSİN ---
# (Senin mevcut olan LogisticRegression kodun buraya gelecek)

MLP Modeli eğitiliyor...
Epoch 1/20 - Loss: 0.6500
Epoch 2/20 - Loss: 0.6151
Epoch 3/20 - Loss: 0.5968
Epoch 4/20 - Loss: 0.5861
Epoch 5/20 - Loss: 0.5761
Epoch 6/20 - Loss: 0.5685
Epoch 7/20 - Loss: 0.5624
Epoch 8/20 - Loss: 0.5538
Epoch 9/20 - Loss: 0.5462
Epoch 10/20 - Loss: 0.5391
Epoch 11/20 - Loss: 0.5360
Epoch 12/20 - Loss: 0.5264
Epoch 13/20 - Loss: 0.5248
Epoch 14/20 - Loss: 0.5163
Epoch 15/20 - Loss: 0.5144
Epoch 16/20 - Loss: 0.5072
Epoch 17/20 - Loss: 0.5032
Epoch 18/20 - Loss: 0.4985
Epoch 19/20 - Loss: 0.4951
Epoch 20/20 - Loss: 0.4892
SVM (Pegasos) Modeli eğitiliyor...


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Tahmin skorlarını topla 
mlp_train_scores = mlp_model.forward(X_train) 
svm_train_scores = svm_model.predict_scores(X_train).reshape(-1, 1)

# Yeni öznitelik vektörü oluştur 
X_meta_train = np.hstack((mlp_train_scores, svm_train_scores))

# Meta-modeli (Lojistik Regresyon) eğit 
meta_model = LogisticRegression()
meta_model.fit(X_meta_train, y_train.ravel())

# Test seti üzerinde hibrit başarıyı ölç [cite: 43]
# (Aynı işlemleri X_test için yapıp meta_model.predict kullanın)