In [None]:
import numpy as np

class LinearSVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Initialize w (zeros) and b (zero)
        self.w = np.zeros(n_features)
        self.b = 0

        # Ensure y values are converted to {-1, 1} if they are essentially {0, 1}
        # If y contains 0, it becomes -1. If y is already -1, it stays -1.
        y_ = np.where(y <= 0, -1, 1)

        # Loop for n_iters
        for _ in range(self.n_iters):
            # Iterate through every sample x_i, y_i
            for idx, x_i in enumerate(X):
                # Check the Margin Condition: y_i(w . x_i - b) >= 1?
                # Note: The formula in the image is explicitly (w . x_i - b)
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1

                if condition:
                    # If True (Correct & Safe):
                    # Update w only based on the regularization term.
                    # Gradient of lambda||w||^2 is 2 * lambda * w
                    self.w -= self.learning_rate * (2 * self.lambda_param * self.w)
                else:
                    # If False (Misclassified or Inside Margin):
                    # Update w and b based on the gradient of both terms.

                    # 1. Gradient for w:
                    # derivative of regularization: 2 * lambda * w
                    # derivative of hinge loss (1 - y(wx - b)): -y_i * x_i
                    # Total gradient = 2 * lambda * w - y_i * x_i
                    dw = 2 * self.lambda_param * self.w - y_[idx] * x_i
                    self.w -= self.learning_rate * dw

                    # 2. Gradient for b:
                    # The hinge loss term inside max is (1 - y_i * w * x_i + y_i * b)
                    # Derivative with respect to b is +y_i
                    db = y_[idx]
                    self.b -= self.learning_rate * db

    def predict(self, X):
        # Compute the linear output: f(x) = w . X - b
        linear_output = np.dot(X, self.w) - self.b

        # Return the sign of the result (-1 or 1)
        return np.sign(linear_output)

In [None]:
import numpy as np

# 1. Define Sigmoid Activation Function and its Derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# 2. XOR Dataset
X = np.array([[0,0], [0,1], [1,0], [1,1]]) # Inputs
y = np.array([[0], [1], [1], [0]])         # Correct Answers (Labels)

# 3. Initialize Weights and Biases randomly
np.random.seed(42) # For consistent results

# Weights from Input to Hidden Layer (2 inputs -> 2 hidden neurons)
weights_input_hidden = np.random.uniform(size=(2, 2))
bias_hidden = np.random.uniform(size=(1, 2))

# Weights from Hidden to Output Layer (2 hidden -> 1 output neuron)
weights_hidden_output = np.random.uniform(size=(2, 1))
bias_output = np.random.uniform(size=(1, 1))

learning_rate = 0.5
epochs = 10000

# 4. Training Loop (Forward + Backward Pass)
for i in range(epochs):

    #  Forward Pass
    # Hidden Layer Calculation
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = sigmoid(hidden_layer_input)

    # Output Layer Calculation
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)

    # Backward Pass (Backpropagation)

    # Calculate Error
    error = y - predicted_output

    # Calculate Gradients (How much to change weights)
    # 1. Output Layer Gradient
    d_predicted_output = error * sigmoid_derivative(predicted_output)

    # 2. Hidden Layer Error Contribution
    error_hidden_layer = d_predicted_output.dot(weights_hidden_output.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)

    # --- Update Weights (Gradient Descent) ---
    weights_hidden_output += hidden_layer_output.T.dot(d_predicted_output) * learning_rate
    bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate

    weights_input_hidden += X.T.dot(d_hidden_layer) * learning_rate
    bias_hidden += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate

# 5. Final Prediction Test
print("Final Hidden Weights:\n", weights_input_hidden)
print("Final Output Weights:\n", weights_hidden_output)
print("\nPredictions after training:")
print(predicted_output)

Final Hidden Weights:
 [[4.59244504 6.47246975]
 [4.5971031  6.49153682]]
Final Output Weights:
 [[-10.32676834]
 [  9.62121009]]

Predictions after training:
[[0.01890475]
 [0.98371361]
 [0.98369334]
 [0.01686123]]
