In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


In [25]:
data = pd.read_csv("BankNote_Authentication.csv")

In [23]:
data = data.sample(frac = 1, random_state = 42).reset_index(drop= True)

In [29]:
X, y = data.iloc[:, :-1], data.iloc[:, -1]

In [31]:
print("X type is " + str(type(X)))
print("X shape is " + str(X.shape))
print("y type is " + str(type(y)))
print("y shape is " + str(y.shape))

X type is <class 'pandas.core.frame.DataFrame'>
X shape is (1372, 4)
y type is <class 'pandas.core.series.Series'>
y shape is (1372,)


In [33]:
X = X.to_numpy()

In [35]:
y = y.to_numpy().reshape(-1, 1)

In [37]:
print("X type is " + str(type(X)))
print("X shape is " + str(X.shape))
print("y type is " + str(type(y)))
print("y shape is " + str(y.shape))

X type is <class 'numpy.ndarray'>
X shape is (1372, 4)
y type is <class 'numpy.ndarray'>
y shape is (1372, 1)


In [39]:
from sklearn.model_selection import train_test_split

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify = y)

In [43]:
print("X train shape is " + str(X_train.shape))
print("X test shape is " + str(X_test.shape))
print("y train shape is " + str(y_train.shape))
print("y test shape is " + str(y_test.shape))

X train shape is (1097, 4)
X test shape is (275, 4)
y train shape is (1097, 1)
y test shape is (275, 1)


In [45]:
def tanh(z):
    return np.tanh(z)

def tanh_derivative(z):
    # tanh türevi = 1 - tanh(z)^2
    return 1 - np.tanh(z)**2

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)


In [47]:
def get_activation(activation_name):
    if activation_name == 'tanh':
        return tanh, tanh_derivative
    elif activation_name == 'relu':
        return relu, relu_derivative
    else:
        raise ValueError("Geçersiz aktivasyon fonksiyonu ismi!")


In [49]:
class MLP:
    def __init__(self, layer_sizes, 
                 activation_hidden='tanh', 
                 activation_output='sigmoid', 
                 learning_rate=0.01, 
                 seed=42):
        """
        layer_sizes: Örneğin [4, 5, 1] => 4 giriş, 5 gizli nöron, 1 çıkış nöronu
        activation_hidden: 'tanh' veya 'relu'
        activation_output: genelde 'sigmoid'
        learning_rate: Öğrenme oranı
        seed: Rastgele ağırlık başlatma için seed
        """
        np.random.seed(seed)
        self.layer_sizes = layer_sizes
        self.learning_rate = learning_rate
        
        # Gizli katman aktivasyonu
        self.hidden_activation, self.hidden_activation_deriv = get_activation(activation_hidden)
        
        # Çıkış katmanı için aktivasyon fonksiyonu (genelde sigmoid)
        if activation_output == 'sigmoid':
            self.output_activation = lambda z: 1 / (1 + np.exp(-z))
            self.output_activation_deriv = lambda z: self.output_activation(z) * (1 - self.output_activation(z))
        else:
            raise ValueError("Çıkış katmanı için bu örnekte sadece sigmoid destekleniyor.")
        
        # Ağırlık ve bias başlatma
        self.parameters = {}
        self._init_params()
    
    def _init_params(self):
        # layer_sizes örn: [4, 5, 1]
        for i in range(len(self.layer_sizes) - 1):
            in_dim = self.layer_sizes[i]
            out_dim = self.layer_sizes[i+1]
            # Xavier veya He init isterseniz ekleyebilirsiniz. Burada basit random normal kullandık.
            self.parameters[f"W{i+1}"] = np.random.randn(in_dim, out_dim) * 0.01
            self.parameters[f"b{i+1}"] = np.zeros((1, out_dim))
    
    def forward(self, X):
        """
        İleri yayılım.
        returns:
          - activations: her katmandaki 'a' (aktivasyon)
          - zs: her katmandaki 'z' (lineer kısım)
        """
        activations = [X]
        zs = []
        
        # Kaç katman var (örneğin [4,5,5,1] => 3 adet W ve b seti)
        L = len(self.layer_sizes) - 1
        
        for i in range(1, L+1):
            W = self.parameters[f"W{i}"]
            b = self.parameters[f"b{i}"]
            
            # Önceki katmanın aktivasyonu
            A_prev = activations[-1]
            
            z = A_prev.dot(W) + b
            zs.append(z)
            
            if i < L:
                # Gizli katman aktivasyonu
                a = self.hidden_activation(z)
            else:
                # Çıkış katmanı aktivasyonu (sigmoid)
                a = self.output_activation(z)
            
            activations.append(a)
        
        return activations, zs
    
    def backward(self, activations, zs, y):
        """
        Geri yayılım (backprop).
        y shape = (batch_size, )
        """
        grads = {}
        m = y.shape[0]
        L = len(self.layer_sizes) - 1
        
        # Çıkış katmanı aktivasyonu (sigmoid) için hata hesabı
        # a[L] => output
        aL = activations[-1]
        
        # Binary cross-entropy için dA = -(y/a - (1-y)/(1-a))
        # pratikte a[L] - y olarak da sıkça kullanılıyor (sigmoid cross-entropy yaklaşımı)
        dA = -(np.divide(y, aL) - np.divide(1 - y, 1 - aL))
        
        for i in reversed(range(1, L+1)):
            z = zs[i-1]
            W = self.parameters[f"W{i}"]
            
            if i == L:
                # Çıkış katmanı türevi
                dZ = dA * self.output_activation_deriv(z)
            else:
                # Gizli katman türevi
                dZ = dA * self.hidden_activation_deriv(z)
            
            A_prev = activations[i-1]
            
            # dW = (1/m) * A_prev.T.dot(dZ)
            dW = (1/m) * A_prev.T.dot(dZ)
            
            # db = (1/m) * sum(dZ)
            db = (1/m) * np.sum(dZ, axis=0, keepdims=True)
            
            grads[f"dW{i}"] = dW
            grads[f"db{i}"] = db
            
            # Bir önceki katmana ait dA hesabı
            dA = dZ.dot(W.T)
        
        return grads
    
    def update_params(self, grads):
        """
        Stokastik gradyan inişi (SGD) ile parametre güncellemesi
        """
        L = len(self.layer_sizes) - 1
        for i in range(1, L+1):
            self.parameters[f"W{i}"] -= self.learning_rate * grads[f"dW{i}"]
            self.parameters[f"b{i}"] -= self.learning_rate * grads[f"db{i}"]
    
    def compute_loss(self, y_pred, y_true):
        """
        Binary Cross-Entropy Loss
        y_pred ve y_true (0 veya 1) boyutları (m,)
        """
        m = y_true.shape[0]
        # epsilon ile sayısal kararlılık sağlanabilir
        epsilon = 1e-9
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = - (1/m) * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss
    
    def fit(self, X, y, n_steps=1000):
        """
        Modeli n_steps boyunca eğitir.
        """
        loss_history = []
        for step in range(n_steps):
            # forward
            activations, zs = self.forward(X)
            y_pred = activations[-1].reshape(-1)  # (m,)
            
            # loss
            loss = self.compute_loss(y_pred, y)
            loss_history.append(loss)
            
            # backward
            grads = self.backward(activations, zs, y)
            
            # update
            self.update_params(grads)
            
            # İsterseniz her 100 adımda bir ekrana yazdırabilirsiniz
            if (step+1) % 100 == 0:
                print(f"Step {step+1}/{n_steps}, Loss: {loss:.4f}")
        
        return loss_history
    
    def predict(self, X):
        """
        İleri yayılım yaparak çıktı tahmini (0 veya 1)
        """
        activations, _ = self.forward(X)
        y_prob = activations[-1]
        # 0.5 eşik
        return (y_prob >= 0.5).astype(int).reshape(-1)


In [51]:
n_hidden_candidates = [5, 10]  # örnek
n_steps = 1000

results = []  # (model_name, activation, n_hidden, final_accuracy, final_loss, n_steps)

for activation_func in ['tanh', 'relu']:
    for n_hidden in n_hidden_candidates:
        # 2-Layer
        mlp_2layer = MLP(layer_sizes=[X_train.shape[1], n_hidden, 1],
                 activation_hidden=activation_func,
                 activation_output='sigmoid',
                 learning_rate=0.01,
                 seed=42)

        
        loss_hist_2layer = mlp_2layer.fit(X_train, y_train, n_steps=n_steps)
        
        y_pred_2layer = mlp_2layer.predict(X_test)
        acc_2layer = accuracy_score(y_test, y_pred_2layer)
        final_loss_2layer = loss_hist_2layer[-1]
        
        results.append(("2-Layer", activation_func, n_hidden, acc_2layer, final_loss_2layer, n_steps))
        
        # 3-Layer
        mlp_3layer = MLP(layer_sizes=[X_train.shape[1], n_hidden, n_hidden, 1],
                         activation_hidden=activation_func,
                         activation_output='sigmoid',
                         learning_rate=0.01,
                         seed=42)
        
        loss_hist_3layer = mlp_3layer.fit(X_train, y_train, n_steps=n_steps)
        
        y_pred_3layer = mlp_3layer.predict(X_test)
        acc_3layer = accuracy_score(y_test, y_pred_3layer)
        final_loss_3layer = loss_hist_3layer[-1]
        
        results.append(("3-Layer", activation_func, n_hidden, acc_3layer, final_loss_3layer, n_steps))

# Sonuçları inceleyelim
df_results = pd.DataFrame(results, columns=["Model", "Activation", "n_hidden", "Accuracy", "FinalLoss", "n_steps"])
print(df_results)


Step 100/1000, Loss: 757.5739
Step 200/1000, Loss: 764.7127
Step 300/1000, Loss: 817.5907
Step 400/1000, Loss: 937.4890
Step 500/1000, Loss: 1079.5902
Step 600/1000, Loss: 1211.9188
Step 700/1000, Loss: 1328.4566
Step 800/1000, Loss: 1430.6122
Step 900/1000, Loss: 1520.9984
Step 1000/1000, Loss: 1601.9375
Step 100/1000, Loss: 757.7736
Step 200/1000, Loss: 756.1716
Step 300/1000, Loss: 755.1999
Step 400/1000, Loss: 754.6101
Step 500/1000, Loss: 754.2519
Step 600/1000, Loss: 754.0341
Step 700/1000, Loss: 753.9017
Step 800/1000, Loss: 753.8211
Step 900/1000, Loss: 753.7720
Step 1000/1000, Loss: 753.7421
Step 100/1000, Loss: 757.6439
Step 200/1000, Loss: 762.4838
Step 300/1000, Loss: 820.7771
Step 400/1000, Loss: 959.9543
Step 500/1000, Loss: 1129.6962
Step 600/1000, Loss: 1285.9215
Step 700/1000, Loss: 1421.1396
Step 800/1000, Loss: 1537.9491
Step 900/1000, Loss: 1640.1264
Step 1000/1000, Loss: 1730.8255
Step 100/1000, Loss: 757.7727
Step 200/1000, Loss: 756.1705
Step 300/1000, Loss: 755.

In [52]:
# Örnek olarak 2-Layer + ReLU, n_hidden=5 modelinin seçildiğini varsayalım:
best_model = MLP(layer_sizes=[X_train.shape[1], 5, 1],
                 activation_hidden='relu',
                 activation_output='sigmoid',
                 learning_rate=0.01,
                 seed=42)

best_model.fit(X_train, y_train, n_steps=1000)
y_pred_best = best_model.predict(X_test)

cm = confusion_matrix(y_test, y_pred_best)
acc = accuracy_score(y_test, y_pred_best)
prec = precision_score(y_test, y_pred_best)
rec = recall_score(y_test, y_pred_best)
f1 = f1_score(y_test, y_pred_best)

print("Confusion Matrix:\n", cm)
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1-Score: {f1:.4f}")


Step 100/1000, Loss: 757.5048
Step 200/1000, Loss: 755.1830
Step 300/1000, Loss: 767.9355
Step 400/1000, Loss: 847.3971
Step 500/1000, Loss: 999.7981
Step 600/1000, Loss: 1160.4058
Step 700/1000, Loss: 1308.9173
Step 800/1000, Loss: 1456.1752
Step 900/1000, Loss: 1596.4643
Step 1000/1000, Loss: 1729.9344
Confusion Matrix:
 [[149   4]
 [  0 122]]
Accuracy: 0.9855
Precision: 0.9683
Recall: 1.0000
F1-Score: 0.9839


In [55]:
from sklearn.neural_network import MLPClassifier

# Örneğin 2-Layer, tanh aktivasyon, 5 nöron
mlp_sklearn_2layer = MLPClassifier(hidden_layer_sizes=(5,),
                                   activation='tanh',
                                   solver='sgd',
                                   learning_rate_init=0.01,
                                   max_iter=1000,
                                   random_state=42)

mlp_sklearn_2layer.fit(X_train, y_train)
y_pred_sklearn_2layer = mlp_sklearn_2layer.predict(X_test)

print("Scikit-learn 2-Layer MLP Accuracy:", accuracy_score(y_test, y_pred_sklearn_2layer))

# Örneğin 3-Layer, ReLU aktivasyon, 5 + 5 nöron
mlp_sklearn_3layer = MLPClassifier(hidden_layer_sizes=(5, 5),
                                   activation='relu',
                                   solver='sgd',
                                   learning_rate_init=0.01,
                                   max_iter=1000,
                                   random_state=42)

mlp_sklearn_3layer.fit(X_train, y_train)
y_pred_sklearn_3layer = mlp_sklearn_3layer.predict(X_test)

print("Scikit-learn 3-Layer MLP Accuracy:", accuracy_score(y_test, y_pred_sklearn_3layer))


  y = column_or_1d(y, warn=True)


Scikit-learn 2-Layer MLP Accuracy: 0.9963636363636363
Scikit-learn 3-Layer MLP Accuracy: 1.0


  y = column_or_1d(y, warn=True)


In [57]:
import torch
import torch.nn as nn
import torch.optim as optim

# Torch tensörlerine dönüştürme
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# 3-Layer MLP (2 gizli katman)
class MLP_Torch(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation='relu'):
        super(MLP_Torch, self).__init__()
        if activation == 'relu':
            self.act = nn.ReLU()
        elif activation == 'tanh':
            self.act = nn.Tanh()
        else:
            raise ValueError("Aktivasyon fonksiyonu bilinmiyor!")
        
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.act(self.layer1(x))
        x = self.act(self.layer2(x))
        x = self.sigmoid(self.output(x))
        return x

model_torch = MLP_Torch(input_dim=X_train.shape[1], hidden_dim=5, activation='relu')
criterion = nn.BCELoss()   # Binary Cross Entropy
optimizer = optim.SGD(model_torch.parameters(), lr=0.01)

# Eğitim döngüsü
n_steps = 1000
for step in range(n_steps):
    # ileri yayılım
    y_pred = model_torch(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    
    # geri yayılım
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (step+1) % 100 == 0:
        print(f"Step {step+1}/{n_steps}, Loss: {loss.item():.4f}")

# Test aşaması
model_torch.eval()
with torch.no_grad():
    y_test_pred = model_torch(X_test_torch)
    # 0.5 eşiği
    y_test_pred_class = (y_test_pred >= 0.5).float()
    
acc_torch = accuracy_score(y_test, y_test_pred_class.numpy())
print("PyTorch 3-Layer MLP Accuracy:", acc_torch)


Step 100/1000, Loss: 0.5150
Step 200/1000, Loss: 0.3229
Step 300/1000, Loss: 0.1905
Step 400/1000, Loss: 0.1251
Step 500/1000, Loss: 0.0910
Step 600/1000, Loss: 0.0712
Step 700/1000, Loss: 0.0583
Step 800/1000, Loss: 0.0492
Step 900/1000, Loss: 0.0424
Step 1000/1000, Loss: 0.0372
PyTorch 3-Layer MLP Accuracy: 0.9818181818181818


In [59]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

# Torch tensörlerine dönüştürme
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# 2-Layer MLP (1 gizli katman)
class MLP_Torch_2Layer(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation='relu'):
        super(MLP_Torch_2Layer, self).__init__()
        # Aktivasyon fonksiyonunun seçimi
        if activation == 'relu':
            self.act = nn.ReLU()
        elif activation == 'tanh':
            self.act = nn.Tanh()
        else:
            raise ValueError("Aktivasyon fonksiyonu bilinmiyor!")
        
        # 1 gizli katman ve 1 çıkış katmanı
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.act(self.layer1(x))   # Gizli katman aktivasyonu
        x = self.sigmoid(self.output(x)) # Çıkış katmanı (sigmoid)
        return x

# Modeli oluşturma
model_torch_2layer = MLP_Torch_2Layer(input_dim=X_train.shape[1], hidden_dim=5, activation='relu')

# Kayıp fonksiyonu ve optimizasyon (SGD)
criterion = nn.BCELoss()   # Binary Cross-Entropy Loss
optimizer = optim.SGD(model_torch_2layer.parameters(), lr=0.01)

# Eğitim döngüsü
n_steps = 1000
for step in range(n_steps):
    # İleri yayılım
    y_pred = model_torch_2layer(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    
    # Geri yayılım ve parametre güncelleme
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (step+1) % 100 == 0:
        print(f"Step {step+1}/{n_steps}, Loss: {loss.item():.4f}")

# Test aşaması
model_torch_2layer.eval()
with torch.no_grad():
    y_test_pred = model_torch_2layer(X_test_torch)
    # 0.5 eşiği ile sınıflandırma
    y_test_pred_class = (y_test_pred >= 0.5).float()

# Test accuracy hesaplama
acc_torch_2layer = accuracy_score(y_test, y_test_pred_class.numpy())
print("PyTorch 2-Layer MLP Accuracy:", acc_torch_2layer)


Step 100/1000, Loss: 0.5336
Step 200/1000, Loss: 0.3930
Step 300/1000, Loss: 0.2779
Step 400/1000, Loss: 0.2061
Step 500/1000, Loss: 0.1595
Step 600/1000, Loss: 0.1296
Step 700/1000, Loss: 0.1095
Step 800/1000, Loss: 0.0949
Step 900/1000, Loss: 0.0839
Step 1000/1000, Loss: 0.0752
PyTorch 2-Layer MLP Accuracy: 0.9854545454545455
