In [3]:
#The Perceptron
import numpy as np

class Perceptron:
    def __init__(self, learning_rate=0.1, epochs=10):
        self.lr = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def activation(self, z):
        """Step function: Returns 1 if z >= 0, else 0."""
        return 1 if z >= 0 else 0

    def fit(self, X, y):
        """Trains the perceptron on the given dataset."""
        n_samples, n_features = X.shape

        # Initialize weights and bias to small random numbers or zeros
        self.weights = np.zeros(n_features)
        self.bias = 0

        for epoch in range(self.epochs):
            errors = 0
            for i in range(n_samples):
                # 1. Calculate Weighted Sum (Evidence Aggregation)
                linear_output = np.dot(X[i], self.weights) + self.bias

                # 2. Make Prediction (Decision Rule)
                y_predicted = self.activation(linear_output)

                # 3. Calculate Error Signal
                error = y[i] - y_predicted

                # 4. Update Weights and Bias if there is an error
                if error != 0:
                    self.weights += self.lr * error * X[i]
                    self.bias += self.lr * error
                    errors += 1

            #  Print progress
            # print(f"Epoch {epoch+1}: Errors = {errors}")

            # Convergence check: If no errors, stop early
            if errors == 0:
                break

    def predict(self, X):
        """Predicts output for new inputs."""
        linear_output = np.dot(X, self.weights) + self.bias
        return np.array([self.activation(z) for z in linear_output])

# --- Dataset Definitions ---
# Inputs (x1, x2)
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

# Outputs for different gates
gates = {# now i use and or nand nor xor
    "AND": np.array([0, 0, 0, 1]),
    "OR":  np.array([0, 1, 1, 1]),
    "NAND": np.array([1, 1, 1, 0]),
    "NOR":  np.array([1, 0, 0, 0]),
    "XOR":  np.array([0, 1, 1, 0])
}

#  Training and Testing
for gate_name, y in gates.items():
    print(f"\n--- Training {gate_name} Gate ---")
    p = Perceptron(learning_rate=0.1, epochs=20)
    p.fit(X, y)

    predictions = p.predict(X)
    print(f"Final Weights: {p.weights}")
    print(f"Final Bias: {p.bias}")
    print(f"Predictions: {predictions}")

    # Verification
    if np.array_equal(predictions, y):
        print(f" {gate_name} Gate learned successfully.")
    else:
        print(f" {gate_name} Gate failed to converge (linearly inseparable).")


--- Training AND Gate ---
Final Weights: [0.2 0.1]
Final Bias: -0.20000000000000004
Predictions: [0 0 0 1]
 AND Gate learned successfully.

--- Training OR Gate ---
Final Weights: [0.1 0.1]
Final Bias: -0.1
Predictions: [0 1 1 1]
 OR Gate learned successfully.

--- Training NAND Gate ---
Final Weights: [-0.2 -0.1]
Final Bias: 0.2
Predictions: [1 1 1 0]
 NAND Gate learned successfully.

--- Training NOR Gate ---
Final Weights: [-0.1 -0.1]
Final Bias: 0.0
Predictions: [1 0 0 0]
 NOR Gate learned successfully.

--- Training XOR Gate ---
Final Weights: [-0.1  0. ]
Final Bias: 0.0
Predictions: [1 1 0 0]
 XOR Gate failed to converge (linearly inseparable).


### Q1. Why did the XOR gate fail to converge?
**Answer:** A single-layer perceptron can only learn functions that are **linearly separable**. This means the decision boundary must be a straight line. If you plot the inputs for XOR, you cannot draw a single straight line to separate the outputs of 0 from the outputs of 1.

### Q2. What is the effect of the learning rate ?
**Answer:**
* **Too high:** The model oscillates and fails to settle on the correct weights, causing instability.
* **Too low:** The model converges very slowly, requiring too many epochs.
* A moderate value (e.g., 0.1` or 0.01) provides a good balance between speed and stability for this linear problem.

### Q3. Why did the same code learn different gates?
**Answer:** The underlying algorithm (the learning rule) remains unchanged because it defines *how* to update weights based on error. The different gates are learned because the **input-output data (the dataset)** defines the decision boundary. The weights adjust based specifically on the data presented to them.

---