In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from app import MLP
import matplotlib
matplotlib.use('TkAgg')

class MLP:
    def __init__(self, layer_sizes, activations, cost_fn, optimizer, init_method, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.layer_sizes = layer_sizes
        self.activations = activations
        self.cost_fn = cost_fn
        self.optimizer = optimizer
        self.init_method = init_method
        self.beta1 = beta1  # Adam
        self.beta2 = beta2  # Adam
        self.epsilon = epsilon  # Adam
        self.weights, self.biases = self.initialize_weights()
        self.m = 0  # number of samples, initialized during training

        if optimizer == 'adam':
            self.m_weights = [np.zeros_like(w) for w in self.weights]
            self.v_weights = [np.zeros_like(w) for w in self.weights]
            self.m_biases = [np.zeros_like(b) for b in self.biases]
            self.v_biases = [np.zeros_like(b) for b in self.biases]

    def initialize_weights(self):
        weights = []
        biases = []
        for i in range(1, len(self.layer_sizes)):
            if self.init_method == 'xavier':
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) * np.sqrt(1 / self.layer_sizes[i-1])
            elif self.init_method == 'he':
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) * np.sqrt(2 / self.layer_sizes[i-1])
            else:  # normal or uniform
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1])
            b = np.zeros((self.layer_sizes[i], 1))
            weights.append(w)
            biases.append(b)
        return weights, biases
    
    def forward(self, X):
        activations = [X]
        zs = []
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            # Ensure b is broadcasted correctly
            z = np.dot(w, activations[-1]) + b  # z = W * a + b
            zs.append(z)
            if self.activations[i] == 'relu':
                activations.append(self.relu(z))
            elif self.activations[i] == 'sigmoid':
                activations.append(self.sigmoid(z))
            elif self.activations[i] == 'tanh':
                activations.append(self.tanh(z))
            elif self.activations[i] == 'leaky_relu':
                activations.append(self.leaky_relu(z))
            else:  # Linear activation
                activations.append(z)
        return activations, zs
    
    def backward(self, activations, zs, y):
        weight_grads = [None] * len(self.weights)
        bias_grads = [None] * len(self.biases)
        
        if self.cost_fn == 'bce':
            dz = activations[-1] - y  # for binary cross-entropy
        elif self.cost_fn == 'mse':
            dz = (activations[-1] - y) * self.deriv_mse(activations[-1], y)
        
        for i in reversed(range(len(self.weights))):
            # dz is (units in current layer, number of samples)
            # activations[i] is (units in previous layer, number of samples)
            weight_grads[i] = np.dot(dz, activations[i].T) / self.m  # Ensure proper matrix multiplication
            bias_grads[i] = np.sum(dz, axis=1, keepdims=True) / self.m  # Sum over the sample dimension
            
            if i > 0:
                dz = np.dot(self.weights[i].T, dz) * self.deriv_activation(zs[i-1], self.activations[i-1])
        
        return weight_grads, bias_grads
    
    def train(self, X, y, epochs, batch_size, learning_rate, X_val, y_val):
        self.m = X.shape[1]  # number of samples

        # Initialize a list to store loss values and validation accuracy
        loss_history = []
        acc_history = []

        # Set up the plot outside the loop with two subplots
        plt.ion()  # Turn on interactive mode for live updating plots
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(21, 6))

        for epoch in range(epochs):
            activations, zs = self.forward(X)
            loss = self.compute_cost(activations[-1], y)
            loss_history.append(loss)  # Append current loss to history
            weight_grads, bias_grads = self.backward(activations, zs, y)
            acc = self.calculate_acc(X_val, y_val)
            acc_history.append(acc)

            self.update_parameters(weight_grads, bias_grads, learning_rate, epoch)
            if epoch % 10 == 0:
                # print(f"Epoch {epoch}, Loss: {loss}")
                self.update_plots(ax1, ax2, ax3, epoch, loss_history, acc_history, epochs)  # Update both plots

        # Update plots one final time after training
        self.update_plots(ax1, ax2, ax3, epoch, loss_history, acc_history, epochs)  # Update both plots

        plt.ioff()  # Turn off interactive mode
        plt.show()  # Show the final plot

    def update_plots(self, ax1, ax2, ax3, epoch, loss_history, acc_history, total_epochs):
        # Clear the previous plots
        ax1.clear()
        ax2.clear()
        ax3.clear()

        # --- Update Weight Histogram on ax1 ---
        # Combine all layer weights into a single array for the histogram
        all_weights = np.concatenate([w.flatten() for w in self.weights])

        # Plot histogram of all weights combined
        ax1.hist(all_weights, bins=100, color='blue', alpha=0.7)

        # Add title and labels
        ax1.set_title(f"Weight Distribution at Epoch {epoch}")
        ax1.set_xlabel("Weight values")
        ax1.set_ylabel("Frequency")

        # --- Plot Loss over Epochs on ax2 ---
        ax2.plot(loss_history, color='red')

        # Add title and labels
        ax2.set_title("Loss over Epochs")
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Loss")

        ax2.set_xlim(0, total_epochs)  # Set x-axis limits to number of epochs

        # --- Plot accuracy over Epochs on ax2 ---
        ax3.plot(acc_history, color='red')

        # Add title and labels
        ax3.set_title("Accuracy over Epochs in the Validation Data")
        ax3.set_xlabel("Epoch")
        ax3.set_ylabel("Accuracy")

        ax3.set_xlim(0, total_epochs)  # Set x-axis limits to number of epochs

        # Adjust layout to prevent overlap
        plt.tight_layout()

        # Draw the updated plots
        fig = ax1.figure  # Get the figure object associated with the axes
        fig.canvas.draw()
        fig.canvas.flush_events()

        # Optional: Pause to allow the plot to update
        plt.pause(0.1)

    def calculate_acc(self, X_val, y_val):

        activations, _ = self.forward(X_val)
        y_pred = activations[-1]

        # Convert predictions to class labels
        y_pred_labels = np.argmax(y_pred, axis=0)
        y_true_labels = np.argmax(y_val, axis=0)

        # Calculate accuracy
        acc = np.mean(y_pred_labels == y_true_labels)
        return acc

    def update_parameters(self, weight_grads, bias_grads, learning_rate, t):
        if self.optimizer == 'sgd':
            for i in range(len(self.weights)):
                self.weights[i] -= learning_rate * weight_grads[i]
                self.biases[i] -= learning_rate * bias_grads[i]
        elif self.optimizer == 'adam':
            for i in range(len(self.weights)):
                # Adam updates
                self.m_weights[i] = self.beta1 * self.m_weights[i] + (1 - self.beta1) * weight_grads[i]
                self.v_weights[i] = self.beta2 * self.v_weights[i] + (1 - self.beta2) * (weight_grads[i] ** 2)

                m_hat_w = self.m_weights[i] / (1 - self.beta1 ** (t + 1))
                v_hat_w = self.v_weights[i] / (1 - self.beta2 ** (t + 1))

                self.weights[i] -= learning_rate * m_hat_w / (np.sqrt(v_hat_w) + self.epsilon)

                self.m_biases[i] = self.beta1 * self.m_biases[i] + (1 - self.beta1) * bias_grads[i]
                self.v_biases[i] = self.beta2 * self.v_biases[i] + (1 - self.beta2) * (bias_grads[i] ** 2)

                m_hat_b = self.m_biases[i] / (1 - self.beta1 ** (t + 1))
                v_hat_b = self.v_biases[i] / (1 - self.beta2 ** (t + 1))

                self.biases[i] -= learning_rate * m_hat_b / (np.sqrt(v_hat_b) + self.epsilon)

    # Activation functions and their derivatives
    def relu(self, z):
        return np.maximum(0, z)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def tanh(self, z):
        return np.tanh(z)
    
    def leaky_relu(self, z, alpha=0.01):
        return np.where(z > 0, z, alpha * z)
    
    def deriv_activation(self, z, activation_fn):
        if activation_fn == 'relu':
            return np.where(z > 0, 1, 0)
        elif activation_fn == 'sigmoid':
            a = self.sigmoid(z)
            return a * (1 - a)
        elif activation_fn == 'tanh':
            return 1 - np.tanh(z)**2
        elif activation_fn == 'leaky_relu':
            return np.where(z > 0, 1, 0.01)
        else:
            return 1
        
        # Add softmax function
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # Subtract max for numerical stability
        return exp_z / np.sum(exp_z, axis=0, keepdims=True)
    
    # Cost functions
    def binary_cross_entropy(self, y_pred, y_true):
        epsilon = 1e-12  # Small value to avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1. - epsilon)  # Clip predictions to prevent log(0)
        m = y_true.shape[1]
        cost = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) / m
        return np.squeeze(cost)
    
    def mean_squared_error(self, y_pred, y_true):
        # Ensure y_pred and y_true are numpy arrays (not numpy matrices)
        y_pred = np.asarray(y_pred)
        y_true = np.asarray(y_true)
        
        return np.mean((y_pred - y_true) ** 2)
    
    def compute_cost(self, y_pred, y_true):
        if self.cost_fn == 'bce':
            return self.binary_cross_entropy(y_pred, y_true)
        elif self.cost_fn == 'mse':
            return self.mean_squared_error(y_pred, y_true)
    
    def deriv_mse(self, y_pred, y_true):
        return 2 * (y_pred - y_true)
    
    # Model export and import
    def export_model(self, filename):
        model_data = {
            'weights': self.weights,
            'biases': self.biases,
            'layer_sizes': self.layer_sizes,
            'activations': self.activations,
            'cost_fn': self.cost_fn,
            'optimizer': self.optimizer,
            'init_method': self.init_method
        }
        np.savez(filename, **model_data)

    def import_model(self, filename):
        model_data = np.load(filename, allow_pickle=True)
        self.weights = model_data['weights']
        self.biases = model_data['biases']
        self.layer_sizes = model_data['layer_sizes']
        self.activations = model_data['activations']
        self.cost_fn = model_data['cost_fn']
        self.optimizer = model_data['optimizer']
        self.init_method = model_data['init_method']

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from app import MLP
import matplotlib
from mnist1d.data import make_dataset, get_dataset_args

matplotlib.use('TkAgg')

class MLP:
    def __init__(self, layer_sizes, activations, cost_fn, optimizer, init_method, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.layer_sizes = layer_sizes
        self.activations = activations
        self.cost_fn = cost_fn
        self.optimizer = optimizer
        self.init_method = init_method
        self.beta1 = beta1  # Adam
        self.beta2 = beta2  # Adam
        self.epsilon = epsilon  # Adam
        self.weights, self.biases = self.initialize_weights()
        self.m = 0  # number of samples, initialized during training

        if optimizer == 'adam':
            self.m_weights = [np.zeros_like(w) for w in self.weights]
            self.v_weights = [np.zeros_like(w) for w in self.weights]
            self.m_biases = [np.zeros_like(b) for b in self.biases]
            self.v_biases = [np.zeros_like(b) for b in self.biases]

    def initialize_weights(self):
        weights = []
        biases = []
        for i in range(1, len(self.layer_sizes)):
            if self.init_method == 'xavier':
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) * np.sqrt(1 / self.layer_sizes[i-1])
            elif self.init_method == 'he':
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) * np.sqrt(2 / self.layer_sizes[i-1])
            else:  # normal or uniform
                w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1])
            b = np.zeros((self.layer_sizes[i], 1))
            weights.append(w)
            biases.append(b)
        return weights, biases
    
    def forward(self, X):
        activations = [X]
        zs = []
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            # Ensure b is broadcasted correctly
            z = np.dot(w, activations[-1]) + b  # z = W * a + b
            zs.append(z)
            if self.activations[i] == 'relu':
                activations.append(self.relu(z))
            elif self.activations[i] == 'sigmoid':
                activations.append(self.sigmoid(z))
            elif self.activations[i] == 'tanh':
                activations.append(self.tanh(z))
            elif self.activations[i] == 'leaky_relu':
                activations.append(self.leaky_relu(z))
            else:  # Linear activation
                activations.append(z)
        return activations, zs
    
    def backward(self, activations, zs, y):
        weight_grads = [None] * len(self.weights)
        bias_grads = [None] * len(self.biases)
        
        if self.cost_fn == 'bce':
            dz = activations[-1] - y  # for binary cross-entropy
        elif self.cost_fn == 'mse':
            dz = (activations[-1] - y) * self.deriv_mse(activations[-1], y)
        
        for i in reversed(range(len(self.weights))):
            # dz is (units in current layer, number of samples)
            # activations[i] is (units in previous layer, number of samples)
            weight_grads[i] = np.dot(dz, activations[i].T) / self.m  # Ensure proper matrix multiplication
            bias_grads[i] = np.sum(dz, axis=1, keepdims=True) / self.m  # Sum over the sample dimension
            
            if i > 0:
                dz = np.dot(self.weights[i].T, dz) * self.deriv_activation(zs[i-1], self.activations[i-1])
        
        return weight_grads, bias_grads
    
    def train(self, X, y, epochs, batch_size, learning_rate, X_val, y_val):
        self.m = X.shape[1]  # number of samples

        # Initialize lists to store training loss and validation accuracy
        loss_history = []
        acc_history = []

        # Set up the plot outside the loop with three subplots
        plt.ion()  # Turn on interactive mode for live updating plots
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(21, 6))

        total_batches = int(np.ceil(self.m / batch_size))  # Total number of batches per epoch

        for epoch in range(epochs):
            # Shuffle the data at the beginning of each epoch
            permutation = np.random.permutation(self.m)
            X_shuffled = X[:, permutation]
            y_shuffled = y[:, permutation]

            epoch_loss = 0  # To accumulate loss over the epoch

            for batch in range(total_batches):
                # Define the start and end indices of the batch
                start = batch * batch_size
                end = min(start + batch_size, self.m)

                # Get the mini-batch data
                X_batch = X_shuffled[:, start:end]
                y_batch = y_shuffled[:, start:end]

                # Forward pass on the mini-batch
                activations, zs = self.forward(X_batch)
                loss = self.compute_cost(activations[-1], y_batch)
                epoch_loss += loss  # Accumulate the loss

                # Backward pass
                weight_grads, bias_grads = self.backward(activations, zs, y_batch)

                # Update parameters
                self.update_parameters(weight_grads, bias_grads, learning_rate, epoch)

                # print weight hist, loss and acc for each mini_batch
                self.update_plots(ax1, ax2, ax3, epoch, loss_history, acc_history, epochs)


            # Average loss over the epoch
            avg_epoch_loss = epoch_loss / total_batches
            loss_history.append(avg_epoch_loss)

            # Calculate validation accuracy at the end of the epoch
            acc = self.calculate_acc(X_val, y_val)
            acc_history.append(acc)

            # # Update plots every few epochs
            # if epoch % 10 == 0 or epoch == epochs - 1:
            #     print(f"Epoch {epoch}, Loss: {avg_epoch_loss}, Validation Accuracy: {acc}")
            #     self.update_plots(ax1, ax2, ax3, epoch, loss_history, acc_history, epochs)

        # Update plots one final time after training
        self.update_plots(ax1, ax2, ax3, epoch, loss_history, acc_history, epochs)

        plt.ioff()  # Turn off interactive mode
        plt.show()  # Show the final plot

    def update_plots(self, ax1, ax2, ax3, epoch, loss_history, acc_history, total_epochs):
        # Clear the previous plots
        ax1.clear()
        ax2.clear()
        ax3.clear()

        # --- Update Weight Histogram on ax1 ---
        # Combine all layer weights into a single array for the histogram
        all_weights = np.concatenate([w.flatten() for w in self.weights])

        # Plot histogram of all weights combined
        ax1.hist(all_weights, bins=25, color='blue', alpha=0.7)

        # Add title and labels
        ax1.set_title(f"Weight Distribution at Epoch {epoch}")
        ax1.set_xlabel("Weight values")
        ax1.set_ylabel("Frequency")

        # --- Plot Loss over Epochs on ax2 ---
        ax2.plot(loss_history, color='red')

        # Add title and labels
        ax2.set_title("Loss over Epochs")
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Loss")

        ax2.set_xlim(0, total_epochs)  # Set x-axis limits to number of epochs

        # --- Plot accuracy over Epochs on ax2 ---
        ax3.plot(acc_history, color='red')

        # Add title and labels
        ax3.set_title("Accuracy over Epochs in the Validation Data")
        ax3.set_xlabel("Epoch")
        ax3.set_ylabel("Accuracy")

        ax3.set_xlim(0, total_epochs)  # Set x-axis limits to number of epochs
        ax3.set_ylim(0, 1)  # Set y-axis limits to 0-1

        # Adjust layout to prevent overlap
        plt.tight_layout()

        # Draw the updated plots
        fig = ax1.figure  # Get the figure object associated with the axes
        fig.canvas.draw()
        fig.canvas.flush_events()

        # Optional: Pause to allow the plot to update
        # plt.pause(0.01)

    def calculate_acc(self, X_val, y_val):

        activations, _ = self.forward(X_val)
        y_pred = activations[-1]

        # Convert predictions to class labels
        y_pred_labels = np.argmax(y_pred, axis=0)
        y_true_labels = np.argmax(y_val, axis=0)

        # Calculate accuracy
        acc = np.mean(y_pred_labels == y_true_labels)
        return acc

    def update_parameters(self, weight_grads, bias_grads, learning_rate, t):
        if self.optimizer == 'sgd':
            for i in range(len(self.weights)):
                self.weights[i] -= learning_rate * weight_grads[i]
                self.biases[i] -= learning_rate * bias_grads[i]
        elif self.optimizer == 'adam':
            for i in range(len(self.weights)):
                # Adam updates
                self.m_weights[i] = self.beta1 * self.m_weights[i] + (1 - self.beta1) * weight_grads[i]
                self.v_weights[i] = self.beta2 * self.v_weights[i] + (1 - self.beta2) * (weight_grads[i] ** 2)

                m_hat_w = self.m_weights[i] / (1 - self.beta1 ** (t + 1))
                v_hat_w = self.v_weights[i] / (1 - self.beta2 ** (t + 1))

                self.weights[i] -= learning_rate * m_hat_w / (np.sqrt(v_hat_w) + self.epsilon)

                self.m_biases[i] = self.beta1 * self.m_biases[i] + (1 - self.beta1) * bias_grads[i]
                self.v_biases[i] = self.beta2 * self.v_biases[i] + (1 - self.beta2) * (bias_grads[i] ** 2)

                m_hat_b = self.m_biases[i] / (1 - self.beta1 ** (t + 1))
                v_hat_b = self.v_biases[i] / (1 - self.beta2 ** (t + 1))

                self.biases[i] -= learning_rate * m_hat_b / (np.sqrt(v_hat_b) + self.epsilon)

    # Activation functions and their derivatives
    def relu(self, z):
        return np.maximum(0, z)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def tanh(self, z):
        return np.tanh(z)
    
    def leaky_relu(self, z, alpha=0.01):
        return np.where(z > 0, z, alpha * z)
    
    def deriv_activation(self, z, activation_fn):
        if activation_fn == 'relu':
            return np.where(z > 0, 1, 0)
        elif activation_fn == 'sigmoid':
            a = self.sigmoid(z)
            return a * (1 - a)
        elif activation_fn == 'tanh':
            return 1 - np.tanh(z)**2
        elif activation_fn == 'leaky_relu':
            return np.where(z > 0, 1, 0.01)
        else:
            return 1
        
        # Add softmax function
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # Subtract max for numerical stability
        return exp_z / np.sum(exp_z, axis=0, keepdims=True)
    
    # Cost functions
    def binary_cross_entropy(self, y_pred, y_true):
        epsilon = 1e-12  # Small value to avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1. - epsilon)  # Clip predictions to prevent log(0)
        m = y_true.shape[1]
        cost = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) / m
        return np.squeeze(cost)
    
    def mean_squared_error(self, y_pred, y_true):
        # Ensure y_pred and y_true are numpy arrays (not numpy matrices)
        y_pred = np.asarray(y_pred)
        y_true = np.asarray(y_true)
        
        return np.mean((y_pred - y_true) ** 2)
    
    def compute_cost(self, y_pred, y_true):
        if self.cost_fn == 'bce':
            return self.binary_cross_entropy(y_pred, y_true)
        elif self.cost_fn == 'mse':
            return self.mean_squared_error(y_pred, y_true)
    
    def deriv_mse(self, y_pred, y_true):
        return 2 * (y_pred - y_true)
    
    # Model export and import
    def export_model(self, filename):
        model_data = {
            'weights': self.weights,
            'biases': self.biases,
            'layer_sizes': self.layer_sizes,
            'activations': self.activations,
            'cost_fn': self.cost_fn,
            'optimizer': self.optimizer,
            'init_method': self.init_method
        }
        np.savez(filename, **model_data)

    def import_model(self, filename):
        model_data = np.load(filename, allow_pickle=True)
        self.weights = model_data['weights']
        self.biases = model_data['biases']
        self.layer_sizes = model_data['layer_sizes']
        self.activations = model_data['activations']
        self.cost_fn = model_data['cost_fn']
        self.optimizer = model_data['optimizer']
        self.init_method = model_data['init_method']

In [None]:
# Load Iris dataset
iris = load_iris()
X = iris.data  # Shape: (150 samples, 4 features)
y = iris.target.reshape(-1, 1)  # Shape: (150 samples, 1)

# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y)  # Shape: (150 samples, 3 classes)

# Normalize the dataset
scaler = StandardScaler()
X = scaler.fit_transform(X)  # Shape: (150 samples, 4 features)

# Split into training and val sets (no transposition needed here)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Transpose X_train and y_train for the MLP (MLP expects (features, samples) shape)
X_train, X_val = X_train.T, X_val.T  # Now shape is (features, samples)
y_train, y_val = y_train.T, y_val.T  # Now shape is (classes, samples)

# Initialize the MLP model
mlp = MLP(
    layer_sizes=[X_train.shape[0], 10, 3],  # 4 input features, 10 units in hidden layer, 3 output classes
    activations=['relu', 'softmax'],        # Hidden: ReLU, Output: Softmax for multi-class classification
    cost_fn='bce',                          # Use Cross-Entropy Loss
    optimizer='sgd',                       # Use Adam optimizer
    init_method='xavier'
)

# Train the model
mlp.train(X_train, y_train, epochs=100, batch_size=25, learning_rate=0.01, X_val=X_val, y_val=y_val)

# val the model (Forward pass for prediction)
activations, _ = mlp.forward(X_val)
y_pred = activations[-1]

# Convert predictions to class labels
y_pred_labels = np.argmax(y_pred, axis=0)
y_true_labels = np.argmax(y_val, axis=0)

# Calculate accuracy
accuracy = np.mean(y_pred_labels == y_true_labels)

# Assuming you have y_pred_labels and y_true_labels already computed
conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)
print("Confusion Matrix:")
print(conf_matrix)

In [5]:
from mnist1d.data import make_dataset, get_dataset_args

defaults = get_dataset_args()
data = make_dataset(defaults)
x, y, t = data['x'], data['y'], data['t']

help(make_dataset)

Help on function make_dataset in module mnist1d.data:

make_dataset(args=None, template=None)
    # make a dataset

