In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [None]:
X, y = make_classification(n_samples=100, n_features=3, n_redundant=0, n_repeated=0, n_classes=2, flip_y=0.2, random_state=1)

In [None]:
print(f"Shape of X: {X.shape}, shape of y: {y.shape}")

In [None]:
print("X setinin ilk 5 elemanı:")
print(X[:5])
print(" ")
print("y setinin ilk 5 elemanı:")
print(y[:5])
print(" ")
print("X setinin en büyük ve en küçük değerleri:")
print(np.max(X), np.min(X))

In [None]:
plt.figure(figsize=(10, 8))
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y, s=25)
plt.show()

In [None]:
X_new = np.zeros((X.shape[0], X.shape[1] + 5))
X_new[:, 0] = X[:, 0]
X_new[:, 1] = X[:, 1]
X_new[:, 2] = X[:, 2]
X_new[:, 3] = X[:, 0] * X[:, 0]
X_new[:, 4] = X[:, 1] * X[:, 1] * X[:, 1]
X_new[:, 5] = X[:, 2] * X[:, 2]
X_new[:, 6] = X[:, 0] * X[:, 1]
X_new[:, 7] = X[:, 0] * X[:, 1] * X[:, 2]

print(X_new[:5])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.95, random_state=42)

In [None]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [None]:
class LogisticRegression:
    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.num_samples = 0
        self.num_features = 0
        self.bias = None
        self.losses = []
        self.validation_losses = []

    def loss(self, y_true, y_predicted):
        log_probs = y_true * np.log(y_predicted) + (1 - y_true) * (np.log(1 - y_predicted))
        return (-1.0 / self.num_samples) * np.sum(log_probs)

    def fit(self, X, y, X_test, y_test):
        self.num_samples, self.num_features = X.shape
        self.weights = np.zeros(self.num_features)
        self.bias = 0

        # gradient descent
        for i in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = sigmoid(linear_model)

            dw = (1 / self.num_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / self.num_samples) * np.sum(y_predicted - y)

            self.weights = self.weights - (self.lr * dw)
            self.bias = self.bias - (self.lr * db)

            if i % 20 == 0:
                self.losses.append(self.loss(y, y_predicted))
                # calculate test set loss
                linear_model_validation = np.dot(X_test, self.weights) + self.bias
                y_predicted_validation = sigmoid(linear_model_validation)
                self.validation_losses.append(self.loss(y_test, y_predicted_validation))

        return self.losses, self.validation_losses

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = sigmoid(linear_model)
        y_predicted_classes = np.where(y_predicted > 0.5, 1, 0)
        return y_predicted_classes

    def accuracy(self, y_true, y_predicted):
        return np.sum(y_true == y_predicted) / len(y_predicted)


In [None]:
LEARNING_RATE = 0.01
NUM_ITERS = int(2e5)

In [None]:
regressor = LogisticRegression(lr=LEARNING_RATE, n_iters=NUM_ITERS)
loss, validation_set_loss = regressor.fit(X_train, y_train, X_test, y_test)

plt.figure(figsize=(6, 6))
plt.plot(range(len(loss)), loss, label="Train loss")
plt.plot(range(len(loss)), validation_set_loss, label="Validation loss")
plt.xlabel("Iterations (x20)", fontsize=18)
plt.ylabel(r"$J(\theta)$", fontsize=18)
plt.legend()
plt.show()

In [None]:
y_train_pred = regressor.predict(X_train)
print(f"Train accuracy: {regressor.accuracy(y_train, y_train_pred)}")

y_test_pred = regressor.predict(X_test)
print(f"Test accuracy: {regressor.accuracy(y_test, y_test_pred)}")

In [None]:
class LogisticRegressionRegularized:
    def __init__(self, lr=0.001, lambd=10, n_iters=400):
        self.lr = lr
        self.lambd = lambd
        self.n_iters = n_iters
        self.weights = None
        self.num_samples = 0
        self.num_features = 0
        self.bias = None
        self.losses = []
        self.validation_losses = []

    def loss(self, y_true, y_predicted):
        log_probs = y_true * np.log(y_predicted) + (1 - y_true) * (np.log(1 - y_predicted))
        cross_entropy_cost = (-1.0 / self.num_samples) * np.sum(log_probs)
        L2_regularization_cost = (self.lambd / (2 * self.num_samples)) * np.sum(np.square(self.weights))
        return cross_entropy_cost + L2_regularization_cost

    def fit(self, X, y, X_test, y_test):
        self.num_samples, self.num_features = X.shape
        self.weights = np.zeros(self.num_features)
        self.bias = 0

        # gradient descent
        for i in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = sigmoid(linear_model)

            db = (1 / self.num_samples) * np.sum(y_predicted - y)
            dw = (1 / self.num_samples) * np.dot(X.T, (y_predicted - y)) + (self.lambd / self.num_samples) * self.weights

            self.bias = self.bias - (self.lr * db)
            self.weights = self.weights - (self.lr * dw)

            if i % 20 == 0:
                self.losses.append(self.loss(y, y_predicted))
                # calculate test set loss
                linear_model_validation = np.dot(X_test, self.weights) + self.bias
                y_predicted_validation = sigmoid(linear_model_validation)
                self.validation_losses.append(self.loss(y_test, y_predicted_validation))

        return self.losses, self.validation_losses

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = sigmoid(linear_model)
        y_predicted_classes = np.where(y_predicted > 0.5, 1, 0)
        return y_predicted_classes

    def accuracy(self, y_true, y_predicted):
        return np.sum(y_true == y_predicted) / len(y_predicted)

In [None]:
regressor_regularized = LogisticRegressionRegularized(lr=LEARNING_RATE, lambd=10, n_iters=NUM_ITERS)
losses_regularized, validation_loss_regularized = regressor_regularized.fit(X_train, y_train, X_test, y_test)

plt.figure(figsize=(6, 6))
plt.plot(range(len(losses_regularized)), losses_regularized, label="Train loss r.")
plt.plot(range(len(losses_regularized)), validation_loss_regularized, label="Validation loss r.")
plt.xlabel("Iterations (x20)", fontsize=18)
plt.ylabel(r"$J(\theta)$ Regularized", fontsize=18)
plt.legend()
plt.show()

y_train_pred_regularized = regressor_regularized.predict(X_train)
print(f"Train accuracy: {regressor_regularized.accuracy(y_train, y_train_pred_regularized)}")

y_test_pred_regularized = regressor_regularized.predict(X_test)
print(f"Test accuracy: {regressor_regularized.accuracy(y_test, y_test_pred_regularized)}")

In [None]:
plt.figure(figsize=(10, 8))

plt.plot(range(len(losses_regularized)), losses_regularized, label="Regularized", color="coral")
plt.plot(range(len(validation_loss_regularized)), validation_loss_regularized, label="Val Regularized", color="red")

plt.plot(range(len(loss)), loss, label="Not regularized", color="blue")
plt.plot(range(len(validation_set_loss)), validation_set_loss, label="Val Not Regularized", color="teal")

plt.xlabel("Iterations (x20)", fontsize=18)
plt.ylabel(r"$J(\theta)$", fontsize=18)

plt.legend()
plt.show()