Logistic Regression mathematics in coding

In [None]:
#  libraries
import numpy as np

#  sigmoid function
def sigmoid(z):
    """
    Compute the sigmoid of z.
    """
    return 1 / (1 + np.exp(-z))

# cost function (Log Loss)
def compute_cost(y, y_pred):
    """
    Compute the logistic regression cost.
    y: Actual labels (0 or 1)
    y_pred: Predicted probabilities (between 0 and 1)
    """
    m = len(y)
    cost = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return cost

#   gradient descent
def gradient_descent(X, y, w, b, learning_rate, num_iterations):
    """
    Train logistic regression model using gradient descent.
    X: Feature matrix
    y: Labels
    w: Weights (coefficients)
    b: Bias term
    learning_rate: Learning rate for gradient descent
    num_iterations: Number of iterations
    """
    m = X.shape[0]
    for i in range(num_iterations):
        # Compute the predictions
        z = np.dot(X, w) + b
        y_pred = sigmoid(z)

        # Compute the gradients
        dw = (1 / m) * np.dot(X.T, (y_pred - y))
        db = (1 / m) * np.sum(y_pred - y)

        # Update weights and bias
        w -= learning_rate * dw
        b -= learning_rate * db

        # Print cost every 100 iterations (optional)
        if i % 100 == 0:
            cost = compute_cost(y, y_pred)
            print(f"Iteration {i}: Cost = {cost}")

    return w, b

# Prediction
def predict(X, w, b):
    """
    Predict the class (0 or 1) for input data X.
    X: Feature matrix
    w: Weights
    b: Bias
    """
    z = np.dot(X, w) + b
    y_pred = sigmoid(z)
    predictions = (y_pred >= 0.5).astype(int)
    return predictions

#  dataset
# Example dataset with 2 features
X = np.array([[0.5, 1.5],
              [1.0, 2.0],
              [1.5, 0.5],
              [2.0, 1.0],
              [3.0, 3.5]])
y = np.array([0, 0, 1, 1, 1])  # Labels (0 or 1)

# Normalize the data (optional but recommended for better performance)
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Initialize weights and bias
num_features = X.shape[1]
w = np.zeros(num_features)  # Initialize weights as zeros
b = 0  # Initialize bias as zero

# Train the model
learning_rate = 0.1
num_iterations = 1000
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Make predictions
X_test = np.array([[1.5, 2.0], [2.5, 2.5]])  # Example test data
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)  # Normalize test data
predictions = predict(X_test, w, b)

print("Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.19610142026457208
Iteration 200: Cost = 0.10984298685297385
Iteration 300: Cost = 0.0755870487241721
Iteration 400: Cost = 0.05746885820861871
Iteration 500: Cost = 0.04631784069699418
Iteration 600: Cost = 0.03878060970584242
Iteration 700: Cost = 0.03335145848859831
Iteration 800: Cost = 0.029256857148436077
Iteration 900: Cost = 0.026059591713895623
Predictions: [1 1]


Experiments of Logistic Regression

experiment 1

In [None]:
import numpy as np

# Dataset
X = np.array([[15, 9.8, 1, 21],  # Habitable
              [400, 3.7, 0, 0],   # Not habitable
              [-100, 1.6, 0, 0],  # Not habitable
              [20, 9.8, 1, 23],   # Habitable
              [30, 24.8, 1, 10]]) # Not habitable
y = np.array([1, 0, 0, 1, 0])  # Labels: 1 = Habitable, 0 = Not Habitable

# Normalizing features
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Initialize weights and bias
w = np.zeros(X.shape[1])
b = 0

# Training the model
learning_rate = 0.1
num_iterations = 1000
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Test data
X_test = np.array([[25, 9.8, 1, 22],  # Similar to Earth
                   [300, 3.7, 0, 0]]) # Venus-like
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)

# Predictions
predictions = predict(X_test, w, b)
print("Space Exploration Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.11825943792799957
Iteration 200: Cost = 0.0609186965449627
Iteration 300: Cost = 0.040532652968367965
Iteration 400: Cost = 0.030256491694355868
Iteration 500: Cost = 0.02409894353472342
Iteration 600: Cost = 0.020007911921016218
Iteration 700: Cost = 0.01709662374334022
Iteration 800: Cost = 0.014920876426395033
Iteration 900: Cost = 0.013234072134995986
Space Exploration Predictions: [1 0]


Experiment2

In [None]:
# Dataset
X = np.array([[23, 5, 1, 1],  # Suspicious
              [10, 0, 0, 0],  # Not suspicious
              [2, 3, 1, 1],   # Suspicious
              [14, 1, 0, 0],  # Not suspicious
              [21, 4, 1, 0]]) # Suspicious
y = np.array([1, 0, 1, 0, 1])  # Labels: 1 = Suspicious, 0 = Not Suspicious

# Normalize and train
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
w = np.zeros(X.shape[1])
b = 0
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Test data
X_test = np.array([[20, 2, 1, 1],  # Likely suspicious
                   [8, 0, 0, 0]])  # Normal
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)

# Predictions
predictions = predict(X_test, w, b)
print("Hacking Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.060210955948932686
Iteration 200: Cost = 0.031105581072305856
Iteration 300: Cost = 0.020941240708517644
Iteration 400: Cost = 0.01577931928319315
Iteration 500: Cost = 0.012658644313562914
Iteration 600: Cost = 0.01056896790881666
Iteration 700: Cost = 0.009071998743141348
Iteration 800: Cost = 0.007946948176002873
Iteration 900: Cost = 0.007070541772786891
Hacking Predictions: [1 1]


Experiment3

In [None]:
# Dataset
X = np.array([[12, 1, 1],  # Adaptive traits present
              [3, 0, 1],   # No adaptive traits
              [15, 1, 0],  # Adaptive traits present
              [7, 0, 1],   # No adaptive traits
              [10, 1, 1]]) # Adaptive traits present
y = np.array([1, 0, 1, 0, 1])  # Labels: 1 = Adaptive traits, 0 = None

# Normalize and train
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
w = np.zeros(X.shape[1])
b = 0
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Test data
X_test = np.array([[8, 1, 1],  # Likely adaptive
                   [4, 0, 0]]) # Non-adaptive
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)

# Predictions
predictions = predict(X_test, w, b)
print("Evolutionary Biology Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.07613171443229241
Iteration 200: Cost = 0.0396043618220428
Iteration 300: Cost = 0.026726444864907734
Iteration 400: Cost = 0.020169554451149135
Iteration 500: Cost = 0.016200096360405546
Iteration 600: Cost = 0.013539417512517726
Iteration 700: Cost = 0.011631843657751497
Iteration 800: Cost = 0.010197177170078862
Iteration 900: Cost = 0.009078863263776802
Evolutionary Biology Predictions: [1 1]


Experiment 4

In [None]:
# Dataset
X = np.array([[130, 240, 1],  # Disease present
              [120, 200, 0],  # No disease
              [140, 250, 1],  # Disease present
              [115, 180, 0],  # No disease
              [135, 230, 1]]) # Disease present
y = np.array([1, 0, 1, 0, 1])  # Labels: 1 = Disease, 0 = No Disease

# Normalize and train
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
w = np.zeros(X.shape[1])
b = 0
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Test data
X_test = np.array([[125, 220, 1],  # Likely disease
                   [110, 190, 0]]) # Healthy
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)

# Predictions
predictions = predict(X_test, w, b)
print("Simple Biology Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.048966948497879234
Iteration 200: Cost = 0.025217515812226497
Iteration 300: Cost = 0.017019653912466244
Iteration 400: Cost = 0.012860418638859516
Iteration 500: Cost = 0.01034284039454739
Iteration 600: Cost = 0.008654057048360515
Iteration 700: Cost = 0.007442075706216874
Iteration 800: Cost = 0.006529626887330923
Iteration 900: Cost = 0.0058176886871188485
Simple Biology Predictions: [1 1]


experiment5

In [None]:
# Dataset
X = np.array([[200, 21, 1],  # Alien life present
              [50, 0, 0],    # No alien life
              [180, 19, 1],  # Alien life present
              [30, 0, 0],    # No alien life
              [210, 22, 1]]) # Alien life present
y = np.array([1, 0, 1, 0, 1])  # Labels: 1 = Alien life, 0 = None

# Normalize and train
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
w = np.zeros(X.shape[1])
b = 0
w, b = gradient_descent(X, y, w, b, learning_rate, num_iterations)

# Test data
X_test = np.array([[190, 20, 1],  # Signs of life
                   [20, 0, 0]])   # Barren
X_test = (X_test - np.mean(X, axis=0)) / np.std(X, axis=0)

# Predictions
predictions = predict(X_test, w, b)
print("Alien Life Predictions:", predictions)


Iteration 0: Cost = 0.6931471805599453
Iteration 100: Cost = 0.03920355609795441
Iteration 200: Cost = 0.01961389277894668
Iteration 300: Cost = 0.013072921595764378
Iteration 400: Cost = 0.009806451691185285
Iteration 500: Cost = 0.007848433960815153
Iteration 600: Cost = 0.006543855332144376
Iteration 700: Cost = 0.005612312617777542
Iteration 800: Cost = 0.004913751797574125
Iteration 900: Cost = 0.004370432967097405
Alien Life Predictions: [1 1]
