In [9]:
import numpy as np
def confusion_matrix_metrics(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    
    return TP, TN, FP, FN

def accuracy(TP, TN, FP, FN):
    return (TP + TN) / (TP + TN + FP + FN)

def precision(TP, FP):
    return TP / (TP + FP) if (TP + FP) != 0 else 0

def recall(TP, FN):
    return TP / (TP + FN) if (TP + FN) != 0 else 0

def f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0




In [11]:
import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.theta = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def cost_function(self, h, y):
        m = len(y)
        return (-1 / m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    
    def get_grad(self, X, y, h):
        n = X.shape[0]
        grad = (X.T @ (h - y)) / n

        return grad

    def fit(self, X, y):

        """
        inputs:
        X: (n, d)
        y: (n, )

        output:
        theta: (d,)
        """


        n, d = X.shape
        self.theta = np.zeros(d+1)

        X = np.hstack((np.ones((n, 1)), X))

        for _ in range(self.num_iterations):
            z = X @ self.theta
            h = self.sigmoid(z)
            gradient = self.get_grad(X, y, h)
            self.theta -= self.learning_rate * gradient

            # Optional: print cost to monitor training progress
            if _ % 100 == 0:
                print(f'Cost at iteration {_}: {self.cost_function(h, y)}')

    def predict_prob(self, X):
        n = X.shape[0]
        X = np.hstack((np.ones((n, 1)), X))
        return self.sigmoid(X @ self.theta)

    def predict(self, X, threshold=0.5):
        return self.predict_prob(X) >= threshold


# Example usage
# Sample dataset
X = np.array([[0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [4.0, 5.0]])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Initialize and train the model
model = LogisticRegression(learning_rate=0.1, num_iterations=1000)
model.fit(X, y)

# Predicting
predictions = model.predict(X)
print(f'Predictions: {predictions}')

# Compute metrics based on the predictions
TP, TN, FP, FN = confusion_matrix_metrics(y, predictions)

# Calculate each metric
accuracy_value = accuracy(TP, TN, FP, FN)
precision_value = precision(TP, FP)
recall_value = recall(TP, FN)
f1_score_value = f1_score(precision_value, recall_value)

# Print the results
print(f'Confusion Matrix:\nTP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}')
print(f'Accuracy: {accuracy_value:.2f}')
print(f'Precision: {precision_value:.2f}')
print(f'Recall: {recall_value:.2f}')
print(f'F1 Score: {f1_score_value:.2f}')



Cost at iteration 0: 0.6931471805599453
Cost at iteration 100: 0.417733470663037
Cost at iteration 200: 0.31957830938342935
Cost at iteration 300: 0.2667029335288293
Cost at iteration 400: 0.2335126167481712
Cost at iteration 500: 0.21049444572851583
Cost at iteration 600: 0.1934164669963105
Cost at iteration 700: 0.1801235441879058
Cost at iteration 800: 0.16940277142923932
Cost at iteration 900: 0.16051786274317126
Predictions: [False False False False  True  True  True  True]
Confusion Matrix:
TP: 4, TN: 4, FP: 0, FN: 0
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
