In [3]:
import mlx.core as mx
import mlx.nn as nn

In [1]:
import mlx.core as mx
import mlx.nn as nn
# Creating array objects
# MLX uses mx.array() instead of torch.Tensor()
# MLX doesn't require explicit grad tracking (no requires_grad=True)
# By default, MLX uses float32, unlike PyTorch which defaults to float32 but we used .double() for float64 MLX does not support float64
x1 = mx.array([2.0])
x2 = mx.array([0.0])
w1 = mx.array([-3.0])
w2 = mx.array([1.0])
b = mx.array([6.8813735870195432])

# Forward pass: Constructing the computational graph
# This is similar to PyTorch, but MLX handles grad tracking implicitly
# MLX uses a functional approach, so operations create new arrays instead of modifying in-place
n = x1*w1 + x2*w2 + b
o = mx.tanh(n)

# Printing the output value
# MLX arrays can be directly converted to Python scalars using .item()
# Unlike PyTorch, there's no need for .data.item() as MLX doesn't have a separate .data attribute
print(o.item())

# Backward pass and gradient computation
# MLX uses a functional approach with value_and_grad for combined forward and backward passes
# This is different from PyTorch's o.backward() method
def forward(x1, x2, w1, w2, b):
    n = x1*w1 + x2*w2 + b
    return mx.tanh(n).sum()  # Sum to ensure a scalar output
    # MLX's value_and_grad requires a scalar output, hence the .sum()
    # This differs from PyTorch where .backward() can be called on any tensor

# Creating a function that computes both value and gradients
# argnums specifies which inputs we want gradients for (all of them in this case)
# This replaces PyTorch's .backward() and automatic grad accumulation
grad_func = mx.value_and_grad(forward, argnums=[0, 1, 2, 3, 4])

# Compute value and gradients
# This single line replaces separate forward and backward passes in PyTorch
# value is the output of the forward function, grads is a tuple of gradients
value, grads = grad_func(x1, x2, w1, w2, b)

print('---')
# Printing gradients
# In MLX, gradients are returned as a tuple from value_and_grad
# This differs from PyTorch where gradients are stored in .grad attributes
# We use .item() to convert single-element arrays to Python scalars
print('x2', grads[1].item())
print('w2', grads[3].item())
print('x1', grads[0].item())
print('w1', grads[2].item())

0.7071067094802856
---
x2 0.5000001192092896
w2 0.0
x1 -1.5000003576278687
w1 1.000000238418579


In [7]:
import mlx.core as mx
import mlx.nn as nn

class Neuron(nn.Module):
    def __init__(self, nin):
        super().__init__()
        self.w = mx.random.uniform(low=-1, high=1, shape=(nin,))
        self.b = mx.random.uniform(low=-1, high=1, shape=(1,))
    
    def __call__(self, x):
        act = mx.sum(self.w * x) + self.b
        out = mx.tanh(act)
        return out

# Loss function
def mse_loss(pred, target):
    return mx.mean((pred - target) ** 2)

# Forward pass and loss computation
def forward_and_loss(params, x, target):
    w, b = params
    act = mx.sum(w * x) + b
    pred = mx.tanh(act)
    loss = mse_loss(pred, target)
    return loss

# Test the Neuron
if __name__ == "__main__":
    n = Neuron(3)
    x = mx.array([2.0, 3.0, -1.0])
    output = n(x)
    print(f"Neuron output: {output}")

    target = mx.array([1.0])
    
    # Compute loss and gradients
    loss_and_grad_fn = mx.value_and_grad(forward_and_loss)
    
    # Print detailed information about the inputs
    print(f"Neuron weights shape: {n.w.shape}")
    print(f"Neuron bias shape: {n.b.shape}")
    print(f"Input x shape: {x.shape}")
    print(f"Target shape: {target.shape}")
    
    try:
        loss, grads = loss_and_grad_fn([n.w, n.b], x, target)
        print(f"Loss: {loss}")
        print(f"Gradients of w: {grads[0]}")
        print(f"Gradient of b: {grads[1]}")
    except Exception as e:
        print(f"An error occurred: {e}")
        print("Let's try to understand the output of loss_and_grad_fn:")
        result = loss_and_grad_fn([n.w, n.b], x, target)
        print(f"Type of result: {type(result)}")
        print(f"Shape of result: {mx.shape(result)}")
        if isinstance(result, tuple):
            print(f"Number of elements in result tuple: {len(result)}")
            for i, elem in enumerate(result):
                print(f"Element {i} type: {type(elem)}")
                print(f"Element {i} shape: {mx.shape(elem)}")

Neuron output: array([0.999975], dtype=float32)
Neuron weights shape: (3,)
Neuron bias shape: (1,)
Input x shape: (3,)
Target shape: (1,)
Loss: array(6.03052e-10, dtype=float32)
Gradients of w: array([-4.82211e-09, -7.23316e-09, 2.41105e-09], dtype=float32)
Gradient of b: array([-2.41105e-09], dtype=float32)


In [9]:
import mlx.core as mx
import mlx.nn as nn

class Neuron(nn.Module):
    def __init__(self, nin):
        super().__init__()
        self.w = mx.random.uniform(low=-1, high=1, shape=(nin,))
        self.b = mx.random.uniform(low=-1, high=1, shape=(1,))
    
    def __call__(self, x):
        act = mx.sum(self.w * x) + self.b
        out = mx.tanh(act)
        return out

def mse_loss(pred, target):
    return mx.mean((pred - target) ** 2)

def forward_and_loss(params, x, target):
    w, b = params
    act = mx.sum(w * x) + b
    pred = mx.tanh(act)
    loss = mse_loss(pred, target)
    return loss

if __name__ == "__main__":
    n = Neuron(3)
    x = mx.array([2.0, 3.0, -1.0])
    target = mx.array([1.0])
    
    loss_and_grad_fn = mx.value_and_grad(forward_and_loss)
    
    # Training loop
    for epoch in range(20):  # 20 epochs for demonstration
        output = n(x)
        loss, grads = loss_and_grad_fn([n.w, n.b], x, target)
        
        print(f"Epoch {epoch}:")
        print(f"  Neuron output: {output.item():.6f}")
        print(f"  Loss: {loss.item():.6f}")
        print(f"  Gradients of w: {grads[0]}")
        print(f"  Gradient of b: {grads[1].item():.6f}")
        
        # Parameter update (gradient descent)
        learning_rate = 0.1
        n.w = n.w - learning_rate * grads[0]
        n.b = n.b - learning_rate * grads[1]
        
        print(f"  Updated weights: {n.w}")
        print(f"  Updated bias: {n.b.item():.6f}")
        print()

    print("Final parameters:")
    print(f"Weights: {n.w}")
    print(f"Bias: {n.b.item():.6f}")

Epoch 0:
  Neuron output: -0.999236
  Loss: 3.996946
  Gradients of w: array([-0.0122099, -0.0183149, 0.00610496], dtype=float32)
  Gradient of b: -0.006105
  Updated weights: array([-0.447477, -0.800427, 0.977206], dtype=float32)
  Updated bias: 0.347549

Epoch 1:
  Neuron output: -0.999222
  Loss: 3.996889
  Gradients of w: array([-0.0124353, -0.018653, 0.00621767], dtype=float32)
  Gradient of b: -0.006218
  Updated weights: array([-0.446233, -0.798561, 0.976584], dtype=float32)
  Updated bias: 0.348170

Epoch 2:
  Neuron output: -0.999207
  Loss: 3.996830
  Gradients of w: array([-0.0126692, -0.0190038, 0.0063346], dtype=float32)
  Gradient of b: -0.006335
  Updated weights: array([-0.444966, -0.796661, 0.975951], dtype=float32)
  Updated bias: 0.348804

Epoch 3:
  Neuron output: -0.999192
  Loss: 3.996769
  Gradients of w: array([-0.012912, -0.019368, 0.00645598], dtype=float32)
  Gradient of b: -0.006456
  Updated weights: array([-0.443675, -0.794724, 0.975305], dtype=float32)
  

In [10]:
import mlx.core as mx
import mlx.nn as nn

class Neuron(nn.Module):
    def __init__(self, nin):
        super().__init__()
        self.w = mx.random.uniform(low=-1, high=1, shape=(nin,))
        self.b = mx.random.uniform(low=-1, high=1, shape=(1,))
    
    def __call__(self, x):
        act = mx.sum(self.w * x) + self.b
        out = mx.tanh(act)
        return out

    def parameters(self):
        return [self.w, self.b]

class Layer(nn.Module):
    def __init__(self, nin, nout):
        super().__init__()
        # Creating 'nout' neurons, each with 'nin' inputs
        # This structure is similar to PyTorch's nn.Linear layer
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        # Forward pass of the layer: compute output for each neuron
        # This is equivalent to a matrix multiplication in PyTorch
        outs = [n(x) for n in self.neurons]
        # If there's only one output, return it directly instead of a list
        # This helps in creating the final layer of the network
        return outs[0] if len(outs) == 1 else mx.stack(outs)
    
    def parameters(self):
        # Collecting parameters from all neurons in the layer
        # This flattens the list of parameters, similar to PyTorch's approach
        return [p for neuron in self.neurons for p in neuron.parameters()]

def mse_loss(pred, target):
    return mx.mean((pred - target) ** 2)

def forward_and_loss(params, x, target):
    layer = Layer(x.shape[0], 1)
    layer.neurons[0].w, layer.neurons[0].b = params
    pred = layer(x)
    loss = mse_loss(pred, target)
    return loss

if __name__ == "__main__":
    layer = Layer(3, 1)  # Layer with 3 inputs and 1 output (single neuron for simplicity)
    x = mx.array([2.0, 3.0, -1.0])
    target = mx.array([1.0])
    
    loss_and_grad_fn = mx.value_and_grad(forward_and_loss)
    
    # Training loop
    for epoch in range(20):  # 20 epochs for demonstration
        output = layer(x)
        params = layer.parameters()
        loss, grads = loss_and_grad_fn(params, x, target)
        
        print(f"Epoch {epoch}:")
        print(f"  Layer output: {output.item():.6f}")
        print(f"  Loss: {loss.item():.6f}")
        print(f"  Gradients of w: {grads[0]}")
        print(f"  Gradient of b: {grads[1].item():.6f}")
        
        # Parameter update (gradient descent)
        learning_rate = 0.1
        layer.neurons[0].w = layer.neurons[0].w - learning_rate * grads[0]
        layer.neurons[0].b = layer.neurons[0].b - learning_rate * grads[1]
        
        print(f"  Updated weights: {layer.neurons[0].w}")
        print(f"  Updated bias: {layer.neurons[0].b.item():.6f}")
        print()

    print("Final parameters:")
    print(f"Weights: {layer.neurons[0].w}")
    print(f"Bias: {layer.neurons[0].b.item():.6f}")

Epoch 0:
  Layer output: 0.958799
  Loss: 0.001698
  Gradients of w: array([-0.0133003, -0.0199504, 0.00665013], dtype=float32)
  Gradient of b: -0.006650
  Updated weights: array([-0.222269, 0.795279, 0.454978], dtype=float32)
  Updated bias: 0.454469

Epoch 1:
  Layer output: 0.959597
  Loss: 0.001632
  Gradients of w: array([-0.0127957, -0.0191935, 0.00639783], dtype=float32)
  Gradient of b: -0.006398
  Updated weights: array([-0.22099, 0.797199, 0.454338], dtype=float32)
  Updated bias: 0.455109

Epoch 2:
  Layer output: 0.960350
  Loss: 0.001572
  Gradients of w: array([-0.012328, -0.0184919, 0.00616398], dtype=float32)
  Gradient of b: -0.006164
  Updated weights: array([-0.219757, 0.799048, 0.453722], dtype=float32)
  Updated bias: 0.455725

Epoch 3:
  Layer output: 0.961062
  Loss: 0.001516
  Gradients of w: array([-0.0118933, -0.01784, 0.00594666], dtype=float32)
  Gradient of b: -0.005947
  Updated weights: array([-0.218568, 0.800832, 0.453127], dtype=float32)
  Updated bias

In [11]:
import mlx.core as mx
import mlx.nn as nn

class Neuron(nn.Module):
    def __init__(self, nin):
        super().__init__()
        self.w = mx.random.uniform(low=-1, high=1, shape=(nin,))
        self.b = mx.random.uniform(low=-1, high=1, shape=(1,))
    
    def __call__(self, x):
        act = mx.sum(self.w * x) + self.b
        return mx.tanh(act)

    def parameters(self):
        return [self.w, self.b]

class Layer(nn.Module):
    def __init__(self, nin, nout):
        super().__init__()
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else mx.stack(outs)
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP(nn.Module):
    def __init__(self, nin, nouts):
        super().__init__()
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

def mse_loss(pred, target):
    return mx.mean((pred - target) ** 2)

def forward_and_loss(params, mlp, x, target):
    # Reconstruct MLP with given parameters
    param_idx = 0
    for layer in mlp.layers:
        for neuron in layer.neurons:
            neuron.w, neuron.b = params[param_idx:param_idx+2]
            param_idx += 2
    
    pred = mlp(x)
    loss = mse_loss(pred, target)
    return loss

if __name__ == "__main__":
    # Create an MLP with 3 inputs, two hidden layers of 4 neurons each, and 1 output
    mlp = MLP(3, [4, 4, 1])
    x = mx.array([2.0, 3.0, -1.0])
    target = mx.array([1.0])
    
    loss_and_grad_fn = mx.value_and_grad(forward_and_loss)
    
    # Training loop
    for epoch in range(100):  # 100 epochs for demonstration
        params = mlp.parameters()
        loss, grads = loss_and_grad_fn(params, mlp, x, target)
        
        if epoch % 10 == 0:
            output = mlp(x)
            print(f"Epoch {epoch}:")
            print(f"  MLP output: {output.item():.6f}")
            print(f"  Loss: {loss.item():.6f}")
        
        # Parameter update (gradient descent)
        learning_rate = 0.01
        param_idx = 0
        for layer in mlp.layers:
            for neuron in layer.neurons:
                neuron.w = neuron.w - learning_rate * grads[param_idx]
                neuron.b = neuron.b - learning_rate * grads[param_idx + 1]
                param_idx += 2

    print("\nFinal prediction:")
    print(f"MLP output: {mlp(x).item():.6f}")
    print(f"Target: {target.item():.6f}")

Epoch 0:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 10:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 20:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 30:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 40:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 50:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 60:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 70:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 80:
  MLP output: 0.999581
  Loss: 0.000000
Epoch 90:
  MLP output: 0.999581
  Loss: 0.000000

Final prediction:
MLP output: 0.999581
Target: 1.000000


In [12]:
import mlx.core as mx
import mlx.nn as nn

class Neuron(nn.Module):
    def __init__(self, nin):
        super().__init__()
        self.w = mx.random.uniform(low=-1, high=1, shape=(nin,))
        self.b = mx.random.uniform(low=-1, high=1, shape=(1,))
    
    def __call__(self, x):
        act = mx.sum(self.w * x) + self.b
        return mx.tanh(act)

class Layer(nn.Module):
    def __init__(self, nin, nout):
        super().__init__()
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else mx.stack(outs)

class MLP(nn.Module):
    def __init__(self, nin, nouts):
        super().__init__()
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

# Example usage of the MLP
if __name__ == "__main__":
    # Creating a network with 3 inputs, two hidden layers of 4 neurons each, and 1 output
    x = mx.array([2.0, 3.0, -1.0])
    n = MLP(3, [4, 4, 1])
    
    # Forward pass
    output = n(x)
    
    print("MLP Structure:")
    print(f"Input: 3 neurons")
    print(f"Hidden layer 1: 4 neurons")
    print(f"Hidden layer 2: 4 neurons")
    print(f"Output layer: 1 neuron")
    print(f"\nInput: {x}")
    print(f"Output: {output.item():.6f}")

    # Demonstrating multiple forward passes
    print("\nMultiple forward passes:")
    for i in range(5):
        output = n(x)
        print(f"Pass {i+1} output: {output.item():.6f}")

    # Demonstrating with different inputs
    print("\nDifferent inputs:")
    inputs = [
        mx.array([1.0, 2.0, 3.0]),
        mx.array([-1.0, 0.5, 2.0]),
        mx.array([0.0, 0.0, 0.0])
    ]
    for i, inp in enumerate(inputs):
        output = n(inp)
        print(f"Input {i+1} {inp}: Output: {output.item():.6f}")

MLP Structure:
Input: 3 neurons
Hidden layer 1: 4 neurons
Hidden layer 2: 4 neurons
Output layer: 1 neuron

Input: array([2, 3, -1], dtype=float32)
Output: -0.740931

Multiple forward passes:
Pass 1 output: -0.740931
Pass 2 output: -0.740931
Pass 3 output: -0.740931
Pass 4 output: -0.740931
Pass 5 output: -0.740931

Different inputs:
Input 1 array([1, 2, 3], dtype=float32): Output: -0.328161
Input 2 array([-1, 0.5, 2], dtype=float32): Output: -0.531142
Input 3 array([0, 0, 0], dtype=float32): Output: -0.748314
