# Logistic Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt


In [None]:
def initialize_parameters(n):
    # Initialize the weight vector with zeros
    w = np.zeros(n)
    # Initialize the bias parameter to zero
    b = 0
    # Return the initialized weight vector and bias parameter
    return w, b


In [None]:
def sigmoid(z):
    # Compute the sigmoid function, which maps any real-valued number to the range (0, 1)
    # The sigmoid function is defined as 1 / (1 + exp(-z)), where z is the input
    return 1 / (1 + np.exp(-z))


In [None]:
def compute_cost_and_gradients(X, y, w, b):
    # Get the number of training examples
    m = X.shape[0]
    # Compute the linear combination of inputs and weights, and add bias
    z = np.dot(X, w) + b
    # Apply the sigmoid activation function
    a = sigmoid(z)
    
    # Compute the logistic loss function
    cost = -(1/m) * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))
    
    # Compute the gradients of the cost function with respect to weights and bias
    dw = (1/m) * np.dot(X.T, (a - y))
    db = (1/m) * np.sum(a - y)
    
    # Return the cost and gradients
    return cost, dw, db

def gradient_descent(X, y, w, b, learning_rate, num_iterations):
    # List to store costs for visualization
    costs = []
    
    # Loop through the specified number of iterations
    for i in range(num_iterations):
        # Compute cost and gradients for current parameters
        cost, dw, db = compute_cost_and_gradients(X, y, w, b)
        
        # Update weights and bias using gradient descent
        w -= learning_rate * dw
        b -= learning_rate * db
        
        # Store cost for visualization and monitoring
        if i % 100 == 0:
            costs.append(cost)
            # Print cost for monitoring progress
            print(f"Iteration {i}: Cost {cost}")
    
    # Return optimized weights, bias, and costs
    return w, b, costs


In [None]:
def predict(X, w, b):
    # Compute the linear combination of input features and weights, and add bias
    z = np.dot(X, w) + b
    # Apply the sigmoid activation function to obtain probabilities
    a = sigmoid(z)
    # Convert probabilities to binary predictions (0 or 1) based on a threshold of 0.5
    # If the probability is greater than or equal to 0.5, classify as 1 (True), otherwise classify as 0 (False)
    return a >= 0.5


In [None]:
def plot_decision_boundary(X, y, w, b):
    # Get the minimum and maximum values for the first feature
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    # Get the minimum and maximum values for the second feature
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    # Create a grid of points using the minimum and maximum values of both features
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                         np.arange(y_min, y_max, 0.01))
    # Predict the class labels for each point on the grid
    Z = predict(np.c_[xx.ravel(), yy.ravel()], w, b)
    # Reshape the predictions to match the grid shape
    Z = Z.reshape(xx.shape)
    
    # Plot the decision boundary as a filled contour plot
    plt.contourf(xx, yy, Z, alpha=0.8)
    # Scatter plot of the training data points
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o')
    # Label for the x-axis
    plt.xlabel('Input 1')
    # Label for the y-axis
    plt.ylabel('Input 2')
    # Title for the plot
    plt.title('Decision Boundary')
    # Display the plot
    plt.show()


# Task1

In [None]:
# AND gate dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 0, 0, 1])

In [None]:
# Initialize parameters
n = X.shape[1]
w, b = initialize_parameters(n)

# Train the model
learning_rate = 0.1
num_iterations = 1000
w, b, costs = gradient_descent(X, y, w, b, learning_rate, num_iterations)


In [None]:
# Predict outputs
predictions = predict(X, w, b)
print("Predictions:", predictions)
print("Actual:", y)

# Calculate accuracy
accuracy = np.mean(predictions == y) * 100
print(f"Accuracy: {accuracy}%")

In [None]:
print("Model Weights:", w)
print("Model Bias:", b)


In [None]:
# Visualize the decision boundary
plot_decision_boundary(X, y, w, b)

# Task2

In [None]:

# OR gate dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 1])


In [None]:
# Initialize parameters
n = X.shape[1]
w, b = initialize_parameters(n)

# Train the model
learning_rate = 0.1
num_iterations = 1000
w, b, costs = gradient_descent(X, y, w, b, learning_rate, num_iterations)


In [None]:
# Predict outputs using the predict function with the trained weights and bias
predictions = predict(X, w, b)

# Print the predicted outputs and the actual labels
print("Predictions:", predictions)
print("Actual:", y)

# Calculate accuracy by comparing the predicted outputs with the actual labels
accuracy = np.mean(predictions == y) * 100

# Print the accuracy percentage
print(f"Accuracy: {accuracy}%")


In [None]:
print("Model Weights:", w)
print("Model Bias:", b)


In [None]:
# Visualize the decision boundary
plot_decision_boundary(X, y, w, b)


# Task3

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
# Logistic Regression Class
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=10000):
        # Initialize logistic regression with specified learning rate and number of iterations
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
    
    def fit(self, X, y):
        # Fit the logistic regression model to the training data
        
        # Get the number of training examples and features
        self.m, self.n = X.shape
        
        # Initialize weights and bias
        self.weights = np.zeros((self.n, 1))
        self.bias = 0
        
        # Lists to store losses, accuracies, and iterations for plotting
        self.losses = []
        self.accuracies = []
        self.iterations = []
        
        # Iterate through the specified number of iterations
        for i in range(self.num_iterations):
            # Update weights and bias using gradient descent
            self._update_weights(X, y)
            
            # Save loss and accuracy every 1000 iterations and at the last iteration
            if i % 1000 == 0 or i == self.num_iterations - 1:
                loss = self._compute_loss(X, y)
                accuracy = self._calculate_accuracy(X, y)
                self.losses.append(loss)
                self.accuracies.append(accuracy)
                self.iterations.append(i)
                if i == self.num_iterations - 1:
                    # Save final loss and accuracy
                    self.final_loss = loss
                    self.final_accuracy = accuracy
    
    def _update_weights(self, X, y):
        # Update weights and bias using gradient descent
        
        # Compute linear model
        linear_model = np.dot(X, self.weights) + self.bias
        
        # Apply sigmoid activation function
        y_predicted = sigmoid(linear_model)
        
        # Compute gradients
        dw = (1 / self.m) * np.dot(X.T, (y_predicted - y))
        db = (1 / self.m) * np.sum(y_predicted - y)
        
        # Update weights and bias
        self.weights -= self.learning_rate * dw
        self.bias -= self.learning_rate * db
    
    def _calculate_accuracy(self, X, y):
        # Calculate accuracy of the model on the given data
        
        # Predict class labels
        y_pred = self.predict(X)
        
        # Calculate accuracy
        accuracy = np.mean(y_pred == y)
        return accuracy
    
    def predict(self, X):
        # Predict class labels for the given data
        
        # Compute linear model
        linear_model = np.dot(X, self.weights) + self.bias
        
        # Apply sigmoid activation function
        y_predicted = sigmoid(linear_model)
        
        # Convert probabilities to class labels
        y_predicted_class = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_class).reshape(-1, 1)
    
    def _compute_loss(self, X, y):
        # Compute the logistic loss
        
        # Compute linear model
        linear_model = np.dot(X, self.weights) + self.bias
        
        # Apply sigmoid activation function
        y_predicted = sigmoid(linear_model)
        
        # Compute loss
        loss = - (1 / self.m) * np.sum(y * np.log(y_predicted + 1e-15) + (1 - y) * np.log(1 - y_predicted + 1e-15))
        return loss


In [None]:
# Load dataset
trainX = np.loadtxt('trainX.txt')
trainY = np.loadtxt('trainY.txt')
testX = np.loadtxt('testX.txt')
testY = np.loadtxt('testY.txt')


In [None]:
# Ensure Y is in the correct shape (N, 1) instead of (N,)
trainY = trainY.reshape(-1, 1)
testY = testY.reshape(-1, 1)

In [None]:
# Convert labels to binary (0 and 1)
trainY = np.where(trainY == 2, 0, 1)
testY = np.where(testY == 2, 0, 1)

In [None]:
# Normalize the data
trainX = trainX / 255.0
testX = testX / 255.0

In [None]:
# Initial predictions check
model = LogisticRegression(learning_rate=0.1, num_iterations=1)
model.fit(trainX, trainY)
initial_predictions = model.predict(trainX)

In [None]:
# Training and evaluating the model with different learning rates
etas = [0.1, 0.01, 0.001]
num_iterations = 10000

In [None]:
# Loop through each learning rate (eta) in the list of etas
for eta in etas:
    # Print the current learning rate being used for training
    print(f'Training with learning rate: {eta}')
    
    # Create a logistic regression model instance with the current learning rate and specified number of iterations
    model = LogisticRegression(learning_rate=eta, num_iterations=num_iterations)
    
    # Fit the model to the training data
    model.fit(trainX, trainY)
    
    # Get the final training accuracy achieved by the model
    train_accuracy = model.final_accuracy
    
    # Print the training accuracy achieved with the current learning rate
    print(f'Learning Rate: {eta}, Training Accuracy: {train_accuracy * 100:.2f}%')
    
    # Plotting loss and accuracy
    # Create a new figure for plotting
    plt.figure(figsize=(14, 5))
    
    # Plot the loss curve in the left subplot
    plt.subplot(1, 2, 1)
    plt.plot(model.iterations, model.losses, marker='o')  # Plot loss values over iterations
    plt.title(f'Loss Curve (Learning Rate: {eta})')  # Set title for the subplot
    plt.xlabel('Iterations')  # Label x-axis
    plt.ylabel('Loss')  # Label y-axis
    
    # Plot the accuracy curve in the right subplot
    plt.subplot(1, 2, 2)
    plt.plot(model.iterations, [acc * 100 for acc in model.accuracies], marker='o')  # Plot accuracy values over iterations
    plt.title(f'Accuracy Curve (Learning Rate: {eta})')  # Set title for the subplot
    plt.xlabel('Iterations')  # Label x-axis
    plt.ylabel('Accuracy (%)')  # Label y-axis
    
    # Adjust subplot layout to prevent overlapping
    plt.tight_layout()
    
    # Show the plot
    plt.show()


In [None]:
# Evaluate each learning rate on the test data
for eta in etas:
    # Create a logistic regression model instance with the current learning rate and specified number of iterations
    model = LogisticRegression(learning_rate=eta, num_iterations=num_iterations)
    
    # Fit the model to the training data
    model.fit(trainX, trainY)
    
    # Calculate the accuracy of the model on the test data
    test_accuracy = model._calculate_accuracy(testX, testY)
    
    # Print the test accuracy achieved with the current learning rate
    print(f'Learning Rate: {eta}, Test Accuracy: {test_accuracy * 100:.2f}%')
