In [None]:
import sys
import os
import math
import random
import graphviz  # For visualization (optional, but very helpful)
# Add the src directory to the Python path
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
import engine
from engine import Value
import nn
from nn import Module, Neuron, Layer, MLP

In [None]:
# --- Visualization Function (Optional, but Recommended) ---
def trace(root):
    """
    Builds a set of all nodes and edges in a graph, starting from the given root.
    This is for visualization using graphviz.
    """
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges


def draw_dot(root, format='svg', rankdir='LR'):
    """
    Generates a Graphviz visualization of the computational graph.
    Requires the graphviz library.
    """
    assert rankdir in ['LR', 'TB']
    nodes, edges = trace(root)
    dot = graphviz.Digraph(format=format, graph_attr={'rankdir': rankdir})

    for n in nodes:
        # For each node, create a rectangular ('record') node for it
        dot.node(name=str(id(n)), label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
        if n._op:
            # If this Value is the result of some operation, create an operation node for it,
            # and connect Value nodes to this op node.
            dot.node(name=str(id(n)) + n._op, label=n._op)
            dot.edge(str(id(n)) + n._op, str(id(n)))

    for n1, n2 in edges:
        # Connect n1 to the op node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot

In [None]:
# --- Example 1: Simple Expression ---
print("Example 1: Simple Expression")
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
e = a * b; e.label = 'e'
d = e + c; d.label = 'd'
f = Value(-2.0, label='f')
L = d * f; L.label = 'L'

print(f"L: {L}")  # Value(data=-8.0)

# Compute gradients
L.backward()

# Visualize (Optional)
# dot = draw_dot(L)
# dot.render(directory='notebooks', view=True, filename='example1_graph')  # Saves to a file and opens it
#To see the graphviz output, uncomment the previous two lines and make sure graphviz is working.

In [None]:
# --- Example 2: Manual Neuron ---
print("\nExample 2: Manual Neuron")
# inputs x1,x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
# weights w1,w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
# bias of the neuron
b = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
o = n.tanh(); o.label = 'o'

print(f"o: {o}")  # Value(data=0.7071)

# Compute gradients
o.backward()

# Visualize
# dot = draw_dot(o)
# dot.render(directory='notebooks', view=True, filename='example2_graph')

In [None]:
# --- Example 3: Neuron with exp and division ---
print("\nExample 3: Neuron with exp and division")
# inputs x1,x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
# weights w1,w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
# bias of the neuron
b = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'

# ----
e = (2*n).exp(); e.label='e'
o = (e-1)/(e+1)
# ----
o.label = 'o'
o.backward()
print(f"o: {o}")  # Value(data=0.7071)

In [None]:
# --- Example 4: Using the nn module ---
print("\nExample 4: Using the nn module")

# Binary classification
# Input data
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]  # Desired targets

In [None]:
# Create an MLP
model = MLP(3, [4, 4, 1])  # 3 inputs, two hidden layers with 4 neurons each, 1 output
print(model)
print(f"Number of parameters: {len(model.parameters())}")

In [None]:
# Training loop
n_epochs = 20
learning_rate = 0.05 # Start with a reasonable learning rate

In [None]:
for k in range(n_epochs):
    # Forward pass
    ypred = [model(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))

    # Backward pass
    model.zero_grad()
    loss.backward()

    # Update parameters
    for p in model.parameters():
        p.data -= learning_rate * p.grad

    if (k+1)%(n_epochs//5) == 0 or k == 0:
        print(f"Epoch {k + 1}, Loss: {loss.data}")

print(f"Predictions: {[yp.data for yp in ypred]}")

In [None]:
# --- Example 5: Using the nn module, Cross Entropy Loss ---
print("\nExample 5: Cross Entropy")
# Example data: 2 samples, 3 classes
logits = [
    [2.0, -1.0, 3.0], #model output for first input
    [0.5, 2.0, -1.5]  #model output for the second input
    ]

targets = [0,1] #class 0 for first input, class 1 for second input

#Convert to Value objects
logits_v = [[Value(l) for l in logit] for logit in logits]
targets_v = [t for t in targets]

In [None]:
def cross_entropy_loss(logits, targets):
    """
    Computes the cross-entropy loss.

    Args:
      logits: A list of lists of Value objects, where each inner list
        represents the output logits from the model for a single input.
      targets: A list of integers representing the correct class indices.
    """
    losses = []
    for i, logit_row in enumerate(logits):
        # 1. Calculate softmax probabilities
        exp_logits = [l.exp() for l in logit_row]
        sum_exp_logits = sum(exp_logits, Value(0.0)) #important to start with Value type
        probs = [el / sum_exp_logits for el in exp_logits]

        # 2. Calculate the negative log likelihood for the correct class
        correct_class_index = targets[i]
        nll = -probs[correct_class_index].log()  # We don't have log, you can derive.
        losses.append(nll)
    return sum(losses, Value(0.0)) / Value(float(len(losses))) #return the mean loss

loss = cross_entropy_loss(logits_v, targets_v)
print(f"Cross-entropy loss: {loss.data}")

loss.backward()

#Now you can access gradients:
# print(logits_v[0][0].grad)
# print(logits_v[0][1].grad)