# Feed Forward Neural Network

## Step 1: Load Data
We start by downloading and loading the dataset `data_ffnn.txt`. The file consists of three columns: `x1`, `x2`, and `y`. This is a multi-class problem.


In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load the data
file_path = "data_ffnn.txt"  # Ensure this file is in your working directory
data = pd.read_csv(file_path, sep='\s+', header=0, names=['x1', 'x2', 'y'])

# Display the first few rows
print(data.head())


## Step 2: Plot the Data
Visualize the data in 2D, coloring each point according to its class.


In [26]:
# plot the data
plt.figure(figsize=(8, 6))
for label in data['y'].unique():
    subset = data[data['y'] == label]
    plt.scatter(subset['x1'], subset['x2'], label=f'Class {label}')
    
plt.xlabel('x1')
plt.ylabel('x2')
plt.title('Data Visualization 2D')
plt.legend()
plt.show()


## Step 3: Forward Propagation
We implement forward propagation for a feedforward neural network with three layers. The hidden layer will have `K` neurons.


In [27]:
def forward_propagation(X, V, W):
    """
    Perform forward propagation through the neural network.
    
    Parameters:
    -----------
    X : array-like
        Input data of shape (n_samples, n_features)
    V : array-like
        Weight matrix for hidden layer
    W : array-like
        Weight matrix for output layer
    
    Returns:
    --------
    dict
        Dictionary containing:
        - X_bar: Input data with bias term
        - X_bar_bar: Input to hidden layer
        - F: Hidden layer activation
        - F_bar: Hidden layer output with bias
        - F_bar_bar: Input to output layer
        - G: Network output
        - E: Sum of Squared Error
    """
    # Add bias term to input
    X_bar = np.hstack((np.ones((X.shape[0], 1)), X))
    
    # Hidden layer
    X_bar_bar = np.dot(X_bar, V)
    F = 1 / (1 + np.exp(-X_bar_bar))  # sigmoid activation
    F_bar = np.hstack((np.ones((F.shape[0], 1)), F))
    F_bar_bar = np.dot(F_bar, W)
    
    # Output layer
    G = 1 / (1 + np.exp(-F_bar_bar))  # Sigmoid activation
    
    # Store all intermediate values
    results = {
        'X_bar': X_bar,
        'X_bar_bar': X_bar_bar,
        'F': F,
        'F_bar': F_bar,
        'F_bar_bar': F_bar_bar,
        'G': G,

    }
    
    return results


In [28]:
# Define network parameters
N = 2  # input features
K = 4  # hidden neurons
J = len(data['y'].unique())  # output classes

# Initialize weights
V = np.random.randn(N + 1, K)
W = np.random.randn(K + 1, J)

# Prepare input data
X = data[['x1', 'x2']].values

# Perform forward propagation
results = forward_propagation(X, V, W)

# Create one-hot encoded target
y_true_onehot = np.eye(J)[data['y'].astype(int)]

X_bar = results['X_bar']
F_bar = results['F_bar']
F = results['F']
G = results['G']

# Calculate error
E = 0.5 * np.sum((results['G'] - y_true_onehot) ** 2)
print(f"Error: {E:.4f}")

y_pred = np.argmax(results['G'], axis=1)
print("\nFirst few predictions vs actual:")
for i in range(5):
    print(f"Predicted: {y_pred[i]}, Actual: {data['y'].iloc[i]}")

## Step 4: back propagation


In [29]:
# Define the learning rates
alpha_1, alpha_2 = 10**-3, 10**-3

# Define the number of iterations
iterations = 0

# Define a threshold
threshold = 10**-4

# Define an error array
errors = [10**-5,E]



In [30]:
def backpropagation(V_BP, W_BP, X_bar_BP, F_BP, F_bar_BP, G_BP, y_one_hot_BP, alpha_1_BP, alpha_2_BP):

    # Step 1: Calculate output layer error
    delta_G = G_BP * (1 - G_BP) * (G_BP - y_one_hot_BP)
    
    # Step 2: Calculate hidden layer error
    
    # First, get the term coming from the next layer
    dGxWT = np.dot(delta_G, W_BP.T)
    
    # Calculate δf while excluding the bias thats why we do * dGxWT[:, 1:]
    delta_F = F_BP * (1 - F_BP) * dGxWT[:, 1:]
    
    # Step 3: Calculate gradients
    
    # For output layer weights (W)
    dE_dW = np.dot(F_bar_BP.T, delta_G)
    
    # For hidden layer weights (V)
    dE_dV = np.dot(X_bar_BP.T, delta_F)
       
    # Step 4: update weights
    
    # Update W and V using gradient descent
    W_new = W_BP - alpha_1_BP * dE_dW
    V_new = V_BP - alpha_2_BP * dE_dV
    
    return W_new, V_new


In [31]:
while abs(errors[-1] - errors[-2]) > threshold or  iterations < 10000:
    
    iterations += 1
    
    #backpropagation and weight updates
    W, V = backpropagation(V, W, X_bar, F, F_bar, G, y_true_onehot, alpha_1, alpha_2)
    
    # Forward propagation
    results = forward_propagation(X, V, W)
    F = results['F']
    F_bar = results['F_bar']
    G = results['G']
    
    # calculate error
    E = 0.5 * np.sum((G - y_true_onehot) ** 2)    
    errors.append(E)
    
    if iterations % 1000 == 0:
        print(f"Iteration {iterations}, Error: {E}")
        
#final results
print("\nFinal Error:", errors[-1])
print("Number of iterations:", len(errors))
    


## Step 5: Plotting the errors

In [32]:
# Tracer la réduction de l'erreur
plt.figure(figsize=(8, 5))
plt.plot(errors, label="Erreur SSE")
plt.xlabel("Itérations")
plt.ylabel("Erreur (SSE)")
plt.title("Réduction de l'erreur au fil des itérations")
plt.legend()
plt.grid()
plt.show()

## Step 6: Optimal parameters

In [33]:
def display_weights(V, W):
    """
    Affiche simplement les matrices de poids V et W
    """
    print("\nMatrice V (couche cachée):")
    print(V)
    print("\nMatrice W (couche de sortie):")
    print(W)

    print("\nTailles des matrices:")
    print(f"V: {V.shape} - entrée vers couche cachée")
    print(f"W: {W.shape} - couche cachée vers sortie")

# Afficher les poids
display_weights(V, W)

## Step 7: plotting training outputs values vs Predicted values

In [34]:
# Get predictions for training data
results = forward_propagation(X, V, W)
predicted_outputs = results['G']
predicted_classes = np.argmax(predicted_outputs, axis=1)
actual_classes = data['y'].values

# Calculate accuracy
accuracy = np.mean(predicted_classes == actual_classes)
print(f"Classification Accuracy: {accuracy * 100:.2f}%")

# Plot predicted vs actual classes
plt.figure(figsize=(12, 6))

# Plot actual classes
plt.subplot(121)
for label in np.unique(actual_classes):
    mask = actual_classes == label
    plt.scatter(X[mask, 0], X[mask, 1], label=f'Class {label}')
plt.title('Actual Classes')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()

# Plot predicted classes
plt.subplot(122)
for label in np.unique(predicted_classes):
    mask = predicted_classes == label
    plt.scatter(X[mask, 0], X[mask, 1], label=f'Class {label}')
plt.title('Predicted Classes')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()

plt.tight_layout()
plt.show()

# Print detailed comparison
print("\nDetailed Comparison (first 10 samples):")
print("Sample\tActual\tPredicted")
print("-" * 30)
for i in range(10):
    print(f"{i}\t{actual_classes[i]}\t{predicted_classes[i]}")

## Step 8: Testing the model with values

In [35]:
# Test data points
X_test = np.array([
    [0, 0],    # Test point 1
    [2, 2],    # Test point 2
    [4, 4],    # Test point 3
    [4.5, 1.5] # Test point 4
])

# Perform forward propagation on test data
test_output = forward_propagation(X_test, V, W)

# Get predicted classes
predicted_classes = np.argmax(test_output['G'], axis=1)

# Visualize the results
plt.figure(figsize=(10, 6))

# Plot training data
for label in np.unique(data['y']):
    mask = data['y'] == label
    plt.scatter(data[mask]['x1'], data[mask]['x2'], 
               alpha=0.5, label=f'Training Class {label}')

# Plot test points
plt.scatter(X_test[:, 0], X_test[:, 1], 
           color='red', marker='*', s=200, 
           label='Test Points')

# Add annotations for test points
for i, (x, y) in enumerate(X_test):
    plt.annotate(f'T{i+1}\nClass {predicted_classes[i]}', 
                (x, y), xytext=(10, 10), 
                textcoords='offset points')

plt.title('Test Points and Their Predicted Classes')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()
plt.grid(True)
plt.show()

# Calculate prediction confidence
confidences = np.max(test_output['G'], axis=1)
print("\nPrediction Confidences:")
print("-" * 40)
for i, conf in enumerate(confidences):
    print(f"Test Point {i+1}: {conf:.3f}")

## Step 9: Plot classification results

In [36]:
# Create subplots for comparison
plt.figure(figsize=(12, 5))

# 1. Original Data Plot (Given classes)
plt.subplot(121)
for label in np.unique(data['y']):
    mask = data['y'] == label
    plt.scatter(data[mask]['x1'], data[mask]['x2'], 
               label=f'Class {label}')

plt.title('Original Data with Given Classes')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()
plt.grid(True)

# 2. Model Predictions (Training + Test points)
plt.subplot(122)

# Get predictions for training data
train_output = forward_propagation(X, V, W)
train_pred = np.argmax(train_output['G'], axis=1)

# Plot training predictions
for label in np.unique(train_pred):
    mask = train_pred == label
    plt.scatter(X[mask, 0], X[mask, 1], 
               label=f'Predicted Class {label}')

# Get and plot test predictions
test_output = forward_propagation(X_test, V, W)
test_pred = np.argmax(test_output['G'], axis=1)
plt.scatter(X_test[:, 0], X_test[:, 1], 
           color='red', marker='*', s=200,
           label='Test Points')

# Label test points
for i, (x, y) in enumerate(X_test):
    plt.annotate(f'T{i+1}\nClass {test_pred[i]}', 
                (x, y), xytext=(10, 5),
                textcoords='offset points')

plt.title('Model Predictions\n(Training + Test Points)')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Print test point predictions
print("\nTest Point Classifications:")
print("-" * 50)
print("Point (x1, x2)      | Predicted Class")
print("-" * 50)
for i, (point, pred) in enumerate(zip(X_test, test_pred)):
    print(f"T{i+1}: ({point[0]:.1f}, {point[1]:.1f})  | Class {pred}")