In [None]:
%pip install torch

In [1]:
import torch

# 1. Simple gradient calculation with a scalar
print("=== Simple Scalar Gradient ===")
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2  # Simple quadratic function
y.backward()  # Calculate gradient
print(f"dy/dx at x=2: {x.grad}")  # Should be 4 (derivative of x^2 is 2x)

# 2. Gradient with vector input
print("\n=== Vector Gradient ===")
x = torch.tensor([2.0, 3.0], requires_grad=True)
y = x.sum() ** 2
y.backward()
print(f"Gradient with respect to x: {x.grad}")

# 3. More complex function example
print("\n=== Complex Function Gradient ===")
x = torch.tensor([1.0], requires_grad=True)
y = torch.sin(x) * torch.exp(-x)
y.backward()
print(f"Gradient of sin(x)*e^(-x) at x=1: {x.grad}")

# 4. Gradient with neural network-like operations
print("\n=== Neural Network-like Operations ===")
weights = torch.tensor([1.0, 2.0], requires_grad=True)
inputs = torch.tensor([1.0, 1.0])
output = torch.sum(weights * inputs)
output.backward()
print(f"Gradient with respect to weights: {weights.grad}")

  device: torch.device = torch.device(torch._C._get_default_device()),  # torch.device('cpu'),


=== Simple Scalar Gradient ===
dy/dx at x=2: tensor([4.])

=== Vector Gradient ===
Gradient with respect to x: tensor([10., 10.])

=== Complex Function Gradient ===
Gradient of sin(x)*e^(-x) at x=1: tensor([-0.1108])

=== Neural Network-like Operations ===
Gradient with respect to weights: tensor([1., 1.])


In [9]:
# Define the network parameters
# First layer weights and biases
W1 = torch.tensor([[0.1, 0.2], 
                   [0.3, 0.4]], requires_grad=True)
b1 = torch.tensor([0.1, 0.1], requires_grad=True)

# Second layer weights and biases
W2 = torch.tensor([[0.5, 0.6],
                   [0.7, 0.8]], requires_grad=True)
b2 = torch.tensor([0.2, 0.2], requires_grad=True)

# Third layer weights and biases
W3 = torch.tensor([[0.9, 1.0],
                   [1.1, 1.2]], requires_grad=True)
b3 = torch.tensor([0.3, 0.3], requires_grad=True)

# Input data
x = torch.tensor([-1.0, -2.0])

# Forward pass
# First layer with ReLU activation
h1 = torch.relu(torch.matmul(W1, x) + b1)
print(f"h1: {h1}"   )

# Second layer with ReLU activation
h2 = torch.relu(torch.matmul(W2, h1) + b2)
print(f"h2: {h2}")
# Output layer (no activation for simplicity)
output = torch.matmul(W3, h2) + b3
print(f"output: {output}")

print(f"Input shape: {x.shape}")
print(f"Output shape: {output.shape}")
print(f"Output values: {output}")

# Calculate gradients (example with sum of outputs as loss)
loss = output.sum()
print(f"W1 grad: {W1.grad}")
loss.backward()
print(f"h1: {h1}, {h1.grad}")

print("\nGradients:")
print(f"W1 grad: {W1.grad}")
print(f"W2 grad: {W2.grad}")
print(f"W3 grad: {W3.grad}")

print(f"b1 grad: {b1.grad}")
print(f"b2 grad: {b2.grad}")
print(f"b3 grad: {b3.grad}")

h1: tensor([0., 0.], grad_fn=<ReluBackward0>)
h2: tensor([0.2000, 0.2000], grad_fn=<ReluBackward0>)
output: tensor([0.6800, 0.7600], grad_fn=<AddBackward0>)
Input shape: torch.Size([2])
Output shape: torch.Size([2])
Output values: tensor([0.6800, 0.7600], grad_fn=<AddBackward0>)
W1 grad: None
h1: tensor([0., 0.], grad_fn=<ReluBackward0>), None

Gradients:
W1 grad: tensor([[-0., -0.],
        [-0., -0.]])
W2 grad: tensor([[0., 0.],
        [0., 0.]])
W3 grad: tensor([[0.2000, 0.2000],
        [0.2000, 0.2000]])
b1 grad: tensor([0., 0.])
b2 grad: tensor([2.0000, 2.2000])
b3 grad: tensor([1., 1.])


  print(f"h1: {h1}, {h1.grad}")


In [13]:
hello = "a"
try:
    assert isinstance(hello, int), f"Expected int but got {type(hello)}"
except Exception as e:
    print(e)



Expected int but got <class 'str'>


In [14]:
hello = "a"
try:
    assert isinstance(hello, int)
except AssertionError as e:  # More specific exception
    print(f"AssertionError: {e}")  # Add more descriptive message
    print(f"Expected int but got {type(hello)}")

AssertionError: 
Expected int but got <class 'str'>


In [15]:

class Value:
    """ stores a single scalar value and its gradient """

    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0
        # internal variables used for autograd graph construction
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op # the op that produced this node, for graphviz / debugging / etc

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    def backward(self):

        # topological order all of the children in the graph
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # go one variable at a time and apply the chain rule to get its gradient
        self.grad = 1
        for v in reversed(topo):
            v._backward()

    def __neg__(self): # -self
        return self * -1

    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)

    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"

In [35]:
w1 = Value(1.0)
w2 = Value(2.0)
b1 = Value(0.1)
x1 = Value(-1.0)
x2 = Value(-2.0)

h1 = w1 * x1 + b1
h2 = w2 * x2

# o = h1 * h2
o = h1 * h2**2



In [36]:
o.backward()
print(f"o: {o}")
print(f"w1 {w1}")
print(f"w2 {w2}")
print(f"b1 {b1}")
print(f"x1 {x1}")
print(f"x2 {x2}")
print(f"h1 {h1}")
print(f"h2 {h2}")


o: Value(data=-14.4, grad=1)
w1 Value(data=1.0, grad=-16.0)
w2 Value(data=2.0, grad=-14.4)
b1 Value(data=0.1, grad=16.0)
x1 Value(data=-1.0, grad=16.0)
x2 Value(data=-2.0, grad=14.4)
h1 Value(data=-0.9, grad=16.0)
h2 Value(data=-4.0, grad=7.2)


In [None]:
import torch.nn as nn

# Basic neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

In [None]:
from torch.utils.data import Dataset, DataLoader

# Custom dataset
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [None]:
import torch.optim as optim

# Common optimizers
optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 1. Define the model
class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            nn.Linear(28 * 28, 128),  # MNIST images are 28x28
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 10)  # 10 classes (digits 0-9)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        return self.layers(x)

# 2. Prepare the data
def get_data_loaders(batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
    ])
    
    # Download and load training data
    train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Download and load test data
    test_dataset = datasets.MNIST('data', train=False, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    return train_loader, test_loader

# 3. Training function
def train_model(model, train_loader, test_loader, epochs=5):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(data)
            loss = criterion(outputs, targets)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            if batch_idx % 100 == 99:
                print(f'Epoch: {epoch+1}, Batch: {batch_idx+1}, Loss: {running_loss/100:.3f}, '
                      f'Accuracy: {100.*correct/total:.2f}%')
                running_loss = 0.0
        
        # Test the model after each epoch
        test_model(model, test_loader, device)

# 4. Testing function
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    accuracy = 100. * correct / total
    print(f'\nTest Accuracy: {accuracy:.2f}%\n')

# 5. Main execution
def main():
    # Hyperparameters
    BATCH_SIZE = 64
    EPOCHS = 5
    
    # Get data loaders
    train_loader, test_loader = get_data_loaders(BATCH_SIZE)
    
    # Create model
    model = SimpleNN()
    
    # Train the model
    train_model(model, train_loader, test_loader, EPOCHS)
    
    # Save the model
    torch.save(model.state_dict(), 'mnist_model.pth')

if __name__ == '__main__':
    main()

  device: torch.device = torch.device(torch._C._get_default_device()),  # torch.device('cpu'),


ModuleNotFoundError: No module named 'torchvision'