In [8]:
import tinygrad.nn
import numpy as np
from tinygrad import nn , Tensor



def layer_init(layer: nn.Linear, std=np.sqrt(2), bias_const=0.0):
    """CleanRL's default layer initialization"""
    layer.weight = tiny_orthogonal_(layer.weight, std)
    layer.bias = tiny_constant_(layer.bias, bias_const)
    return layer

from tinygrad import nn 
def tiny_orthogonal_(tensor: Tensor, gain=1, generator=None):
    """
    NOTE: Since initialization occurs only once, we are being lazy and using numpy linear algebra to perform certain operations.
    """
    if tensor.ndim < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

    if tensor.numel() == 0:
        return tensor # no-op for empty tensors

    rows, cols = tensor.shape[0], tensor.numel() // tensor.shape[0]
    flattened = Tensor.randn(rows, cols) # figure out if it has the same device configs as the input tensor

    if rows < cols:
        flattened = flattened.transpose()

    # for now, we use numpy to compute the qr factorization
    q, r = np.linalg.qr(flattened.numpy())

    d = np.diag(r, 0)
    ph = np.sign(d)
    q *= ph

    if rows < cols:
        q.transpose()

    return Tensor(q).mul(gain)

def tiny_constant_(tensor: Tensor, val: float):
    """
    """
    return Tensor.ones(tensor.shape) * val
    

from tinygrad import nn 


class TinyPolicy:
    def __init__(self, policy):
        self.policy = policy
        self.is_continuous = hasattr(policy, 'is_continuous') and policy.is_continuous
    
    def __call__(self, x, action=None):
        return self.get_action_and_value(x, action)

    def get_value(self, x, state=None):
        _, value = self.policy(x)
        return value

    def get_action_and_value(self, x, action=None):
        logits, value = self.policy(x)
        action, logprob, entropy = sample_logits(logits, action, self.is_continuous)
        return action, logprob, entropy, value
    
class Critic:
    def __init__(self, obs_size, hidden_size):
        self.l1 = layer_init(tinygrad.nn.Linear(obs_size, hidden_size))
        self.l2 = layer_init(tinygrad.nn.Linear(hidden_size, hidden_size))
        self.l3 = layer_init(tinygrad.nn.Linear(hidden_size, 1))

    def __call__(self, x: Tensor):
        x = self.l1(x).tanh()
        x = self.l2(x).tanh()
        return self.l3(x)

class ActorEncoder:
    def __init__(self, obs_size, hidden_size):
        self.l1 = layer_init(tinygrad.nn.Linear(obs_size, hidden_size))
        self.l2 = layer_init(tinygrad.nn.Linear(hidden_size, hidden_size))

    def __call__(self, x: Tensor):
        x = self.l1(x).tanh()
        return self.l2(x).tanh()

class TinyCleanRLPolicy(TinyPolicy):
    def __init__(self, envs, hidden_size=64):
        super().__init__(policy=None)  # Just to get the right init
        self.is_continuous = True

        # self.obs_size = np.array(envs.single_observation_space.shape).prod()
        # action_size = np.prod(envs.single_action_space.shape)

        ## figuring out how to normalize observations will be an important step, but leaving it out for now
        action_size = 1
        self.obs_size = 1
        self.critic = Critic(self.obs_size, hidden_size)
        self.actor_encoder = ActorEncoder(self.obs_size, hidden_size)
        self.actor_decoder_mean = layer_init(tinygrad.nn.Linear(hidden_size, action_size), std=0.01)
        self.actor_decoder_logstd = Tensor.zeros(1, action_size)



policy = TinyCleanRLPolicy("hypothetical env", hidden_size=64)



In [20]:
from torch.distributions import Normal
import torch
a = Normal(torch.tensor([1.0, 2.0]), torch.tensor([1.0, 1.0]))



In [23]:
a.log_prob(torch.tensor([1.0, 2.0])).sum(1)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [26]:
from tinygrad import TinyJit, Tensor
weight = Tensor.randn(10, 3)
@TinyJit
def forward(x: Tensor, indices: list[int]):
  c = (x[indices] * weight).contiguous()
  print(f"shape of c {c.shape}")
  c.sum(0).realize()

x = Tensor.randn(10)
forward(x, [0, 1, 2])

shape of c (10, 3)


In [None]:
from tinygrad impor

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
import numpy as np

# Simple CNN
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.fc = nn.Linear(1600, 10)
        
    def forward(self, x):
        return self.fc(self.conv(x).reshape(-1, 1600))

# Data
transform = transforms.ToTensor()
train_data = datasets.MNIST('./', train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)

def train(device):
    model = Net().to(device)
    opt = torch.optim.Adam(model.parameters())
    start = time.time()
    
    for epoch in range(1):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            loss = nn.functional.cross_entropy(model(data), target)
            loss.backward()
            opt.step()
            opt.zero_grad()
            
    return time.time() - start

# Test CPU 20 times
cpu_times = []
for _ in range(20):
    cpu_times.append(train('cpu'))
avg_cpu_time = np.mean(cpu_times)
print(f"Average CPU time over 20 runs: {avg_cpu_time:.2f}s")

# Test MPS if available
if torch.backends.mps.is_available():
    mps_times = []
    for _ in range(20):
        mps_times.append(train('mps'))
    avg_mps_time = np.mean(mps_times)
    print(f"Average MPS time over 20 runs: {avg_mps_time:.2f}s")


Average CPU time over 20 runs: 11.24s
Average MPS time over 20 runs: 2.25s


In [None]:
# TinyGrad implementation
from tinygrad import Tensor, nn, Device
from tinygrad.nn.datasets import mnist
import time
import numpy as np

print("Starting TinyGrad implementation...")

Device.DEFAULT = "METAL"
print(f"Using device: {Device.DEFAULT}")

class TinyNet:
    def __init__(self):
        print("Initializing TinyNet model...")
        self.l1 = nn.Conv2d(1, 32, kernel_size=(3,3))
        self.l2 = nn.Conv2d(32, 64, kernel_size=(3,3))
        self.l3 = nn.Linear(1600, 10)

    def __call__(self, x:Tensor) -> Tensor:
        x = self.l1(x).relu().max_pool2d((2,2))
        x = self.l2(x).relu().max_pool2d((2,2))
        return self.l3(x.flatten(1))

# Get MNIST data
print("Loading MNIST dataset...")
X_train, Y_train, X_test, Y_test = mnist()
print(f"Training data shape: {X_train.shape}")

@Tensor.train()
def train_tiny(device=None):  # device param kept for consistency but not used since tinygrad handles it
    print("\nStarting training...")
    model = TinyNet()
    optim = nn.optim.Adam(nn.state.get_parameters(model))
    start = time.time()
    
    for epoch in range(1):  # Match PyTorch's 1 epoch
        print(f"\nEpoch {epoch+1}")
        for i in range(0, len(X_train), 128):  # Match PyTorch's batch size
            samp = slice(i, i+128)
            X, Y = X_train[samp], Y_train[samp]
            optim.zero_grad()
            out = model(X)
            loss = out.sparse_categorical_crossentropy(Y)
            loss.backward()
            optim.step()
            

    training_time = time.time() - start
    print(f"Training completed in {training_time:.2f}s")
    return training_time

# Test CPU 20 times
tiny_cpu_times = []

print("\nStarting timing runs...")
num_runs = 1
for run in range(num_runs):
    print(f"\nRun {run+1}/{num_runs}")
    tiny_cpu_times.append(train_tiny())
avg_tiny_cpu_time = np.mean(tiny_cpu_times)
print(f"\nAverage TinyGrad CPU time over {num_runs} runs: {avg_tiny_cpu_time:.2f}s")

KeyboardInterrupt: 