In [3]:
import numpy as np  # Import NumPy for numerical computations and array operations
import pandas as pd  # Import Pandas for data manipulation and analysis with DataFrames
import matplotlib.pyplot as plt  # Import Matplotlib for creating static, interactive visualizations
import seaborn as sns  # Import Seaborn for statistical data visualization built on Matplotlib
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split  # Import function to split dataset into training and testing subsets
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix, 
                             ConfusionMatrixDisplay, 
                             f1_score)  # Import function to calculate various metric

In [4]:
RANDOM_STATE=24
np.random.seed(RANDOM_STATE)
rng=np.random.default_rng(seed=RANDOM_STATE)

TEST_SIZE=0.2
NOISE=0.2
EPOCHS=20000
ALPHA=0.1
N_SAMPLE=1000
params={"legend.fontsi"}

In [27]:
X,y=make_moons(n_samples=N_SAMPLE,shuffle=True,noise=NOISE,random_state=RANDOM_STATE)

np.float64(0.36493292704099645)

In [None]:
y=pd.get_dummies(y).to_numpy()
X

(np.float64(0.36493292704099645), np.float64(1.4654943925052067e-16))

In [13]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=TEST_SIZE,random_state=RANDOM_STATE)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((800, 2), (200, 2), (800, 2), (200, 2))

In [31]:
from sklearn.preprocessing import StandardScaler

s=StandardScaler()
X_train=s.fit_transform(X_train)
X_test=s.transform(X_test)



In [40]:
loss_hist={}
# y_train=y_train.reshape(-1,1)
param={}
param["h_dim"]=[X_train.shape[1],5,5,4,3,y_train.shape[1]]##No. of neurons in hidden layer
y_train.shape

(800, 2)

In [33]:
np.exp(710)

  np.exp(710)


np.float64(inf)

In [None]:
def fn_softmax(z):
    exp_score=np.exp(z-np.max(z))
    return exp_score/np.sum(exp_score,axis=1,keepdims=True)

In [39]:
sm=fn_softmax(np.asarray([[-1,0,1.0],[-10,0,8]]))
sm.sum(axis=1)

array([1., 1.])

In [None]:
def fn_activ(z):
    return np.tanh(z)
def fn_active_prime(z):
    return 1.0 - np.tanh(z)**2

In [41]:
def predict(model,X):
    W1=model["W1"]
    W2=model["W2"]
    b1=model["b1"]
    b2=model["b2"]

    #Forward propogation
    z1=X.dot(W1) + b1 #Aggregation
    a1=fn_activ(z1) # Activation

    z2=a1.dot(W2) + b2 #Aggregation
    a2=fn_softmax(z2) # Activation

    return a2>=0.5

In [None]:
def calculate_loss(model, X , y):
    W1=model["W1"]
    W2=model["W2"]
    b1=model["b1"]
    b2=model["b2"]

    #Forward propogation
    z1=X.dot(W1) + b1 #Aggregation
    a1=fn_activ(z1) # Activation

    z2=a1.dot(W2) + b2 #Aggregation
    a2=fn_sigmoid(z2) # Activation

    data_loss=-(y * np.log(a2) + (1-y)* np.log(1-a2)).sum()

    return data_loss/X.shape[0]

In [42]:
def built_model(param,X_tr,y_tr,X_ts,y_ts,alpha,n_epoch):
    m=X_tr.shape[0]
    nn_output_dim=y.shape[1]
    W1=rng.random((param["h_dim"][0],param["h_dim"][1]))/np.sqrt(param["h_dim"][0])
    W2=rng.random((param["h_dim"][1],param["h_dim"][2]))/np.sqrt(param["h_dim"][1])
    W3=rng.random((param["h_dim"][2],param["h_dim"][3]))/np.sqrt(param["h_dim"][2])
    W4=rng.random((param["h_dim"][3],param["h_dim"][4]))/np.sqrt(param["h_dim"][3])
    W5=rng.random((param["h_dim"][4],param["h_dim"][5]))/np.sqrt(param["h_dim"][4])
    
    b1=np.zeros((1,param["h_dim"][1]))
    b2=np.zeros((1,param["h_dim"][2]))
    b3=np.zeros((1,param["h_dim"][3]))
    b4=np.zeros((1,param["h_dim"][4]))
    b5=np.zeros((1,param["h_dim"][5]))
    
    
    loss,epoch=[], []

    for i in range(n_epoch):

        #Forward propogation
        z1=X_tr.dot(W1) + b1 #Aggregation
        a1=fn_activ(z1) # Activation

        z2=a1.dot(W2) + b2 #Aggregation
        a2=fn_softmax(z2) # Activation

        #Back Propogation

        #Layer2
        dz2=a2-y
        assert (z2.shape == dz2.shape), f"Shape z2: {z2.shape},{dz2.shape}"

        dW2=(a1.T).dot(dz2)
        assert (W2.shape == dW2.shape), f"Shape w2: {W2.shape},{dW2.shape}"

        db2=np.sum(dz2,axis=0,keepdims=True)
        assert (b2.shape == db2.shape), f"Shape b2: {b2.shape},{db2.shape}"

        da1=dz2.dot(W2.T)
        assert (a1.shape == da1.shape), f"Shape a2: {a2.shape},{da1.shape}"

        #Layer1
        dz1=da1 * fn_active_prime(z1)
        assert (z1.shape == dz1.shape), f"Shape z1: {z1.shape},{dz1.shape}"

        dW1=(X_tr.T).dot(dz1)
        assert (W1.shape == dW1.shape), f"Shape W1: {W1.shape},{dW1.shape}"

        db1=np.sum(dz1,axis=0,keepdims=True)
        assert (b1.shape == db1.shape), f"Shape b1: {b1.shape},{db1.shape}"

        W1 = W1 - alpha * dW1/m
        b1 = b1 - alpha * db1/m
        W2 = W2 - alpha * dW2/m
        b2 = b2 - alpha * db2/m

        model={"W1":W1,"W2":W2,"b1":b1,"b2":b2}

        if(i%100 == 0):
            curr_loss = calculate_loss(model,X,y)
            epoch.append(i)
            loss.append(curr_loss)
            print(f"epoch - {i} : Loss - {curr_loss}")
    
            loss_hist["epoch"] = epoch
            loss_hist["loss"] = loss
    return model


In [None]:
model=built_model(param,X_train,y_train,X_test,y_test,alpha=ALPHA,n_epoch=EPOCHS)
model

In [None]:
loss_df=pd.DataFrame(loss_hist)
loss_df

In [None]:
loss_df.plot(x="epoch",y="loss")


In [None]:
def fn_plot_decision_boundary(X: np.ndarray, y :np.ndarray, model, pred_func):
    """
    Plots the decision boundary for a classification model.

    Args:
        X: The input features (numpy array, expected shape (m, 2)).
        y: The true labels (numpy array).
        model: A dictionary containing the trained weights and biases (W1, W2, B1, B2).
        pred_func: A function that takes the model and a feature array (XX) 
                   and returns the class predictions (0 or 1).
    """
    # NOTE: The weights retrieval 'model.we' was incorrect and is removed.
    
    fig, ax = plt.subplots(figsize=(8, 5))
    
    # Small increment value to create a fine grid for smooth decision boundary
    dm = 0.05
    padding = 0.5 # Increased padding for better visualization
    
    # Calculate the range for x-axis (first feature) with padding
    x_min, x_max = X[:, 0].min() - padding, X[:, 0].max() + padding
    
    # Calculate the range for y-axis (second feature) with padding  
    y_min, y_max = X[:, 1].min() - padding, X[:, 1].max() + padding
    
    # Create a mesh grid covering the entire feature space
    xx, yy = np.meshgrid(np.arange(x_min, x_max, dm),
                         np.arange(y_min, y_max, dm))
    
    # Flatten the mesh grid arrays and stack them column-wise to create coordinate pairs
    XX = np.c_[xx.ravel(), yy.ravel()] # Resulting shape: (n_points, 2)

    # NOTE: The line 'XX = np.hstack((XX, np.ones((XX.shape[0], 1))))' is removed.
    # The bias term is handled internally by the neural network's forward propagation 
    # (z1 = X.dot(w1) + b1), so the input data (XX) shouldn't be augmented with a column of ones.

    # Make predictions for the entire mesh grid
    # The pred_func should handle the forward pass through the model and return binary predictions.
    y_p = pred_func(model, XX) 
    
    # Reshape predictions to match the original mesh grid dimensions (xx.shape)
    # y_p is expected to be a 1D array of predictions (0 or 1).
    Z = np.array(y_p).reshape(xx.shape)

    # Create filled contour plot showing the decision regions
    # Use 'coolwarm' or 'bwr' for binary classification. 'Purples' is usually for single-class density.
    ax.contourf(xx, yy, Z, alpha=0.6, cmap=plt.cm.coolwarm) 
    
    # Scatter plot of the actual data points, colored by their true class labels
    # Use 'y' for the color (true label) instead of X[:, 2] (which might not exist or be the label).
    ax.scatter(X[:, 0], X[:, 1], c=y.flatten(), s=40, edgecolor='k', cmap=plt.cm.coolwarm) 
    
    # Set plot title and axis labels
    ax.set_title('Decision Boundary')
    ax.set_xlabel('Feature 1')  
    ax.set_ylabel('Feature 2') 
    
    # Display the final plot
    plt.show()


In [None]:
fn_plot_decision_boundary(X, y, model, predict)