In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assume there are 3 unique sites, each represented by a 2D embedding:
num_sites = 3
embedding_dim = 2
embeddings = nn.Embedding(num_sites, embedding_dim)

# A simple linear layer that takes the embedding and an extra feature as input:
linear = nn.Linear(embedding_dim + 1, 1)  # +1 for the extra feature

# Optimizer that updates both embeddings and linear layer parameters:
optimizer = optim.SGD(list(embeddings.parameters()) + list(linear.parameters()), lr=0.1)

# Assume we have the following data:
site_ids = torch.tensor([0, 1])  # Two samples with site IDs 0 and 1
extra_feature = torch.tensor([[1.0], [2.0]])  # Extra feature for each sample
target = torch.tensor([[0.5], [1.0]])  # Target values

# Forward pass:
site_emb = embeddings(site_ids)  # shape: (2,2)
x = torch.cat([extra_feature, site_emb], dim=1)  # Concatenate extra feature and embedding shape: (2,3)
y_pred = linear(x)

# Loss computation:
loss = nn.MSELoss()(y_pred, target)
print("Loss before backward:", loss.item())

# Backward pass and optimization:
loss.backward()

# Check gradients:
print("Embedding gradient:\n", embeddings.weight.grad)
print("Linear weight gradient:\n", linear.weight.grad)

# Update parameters:
optimizer.step()
print("Updated embedding:\n", embeddings.weight)

Loss before backward: 3.037322998046875
Embedding gradient:
 tensor([[-0.7321,  0.6070],
        [-1.2052,  0.9992],
        [ 0.0000,  0.0000]])
Linear weight gradient:
 tensor([[-5.4926, -0.1791, -5.0770]])
Updated embedding:
 Parameter containing:
tensor([[-0.7200,  0.0258],
        [ 0.6874,  2.2577],
        [-0.6475, -0.7962]], requires_grad=True)


In [6]:
import torch
from torch import nn
import torchexplorer


class AttachModule(nn.Module):
	def __init__(self):
		super().__init__()
		self.fc1 = nn.Linear(10, 10)
		self.fc2 = nn.Linear(10, 10)


	def forward(self, x):
		x = self.fc1(x)
		x = torchexplorer.attach(x, self, 'intermediate')
		return self.fc2(x)


model = AttachModule()
dummy_X = torch.randn(5, 10)
torchexplorer.watch(model, log_freq=1, backend='standalone')
model(dummy_X).sum().backward()
# Your model will be available at http://localhost:8080

Starting TorchExplorer at http://localhost:8080


In [8]:
import torch
import torchvision
import torchexplorer

model = torchvision.models.resnet18(weights=False)
# Histograms won't populate if model isn't in training mode
model.train()
dummy_X = torch.randn(5, 3, 32, 32)

# Only log input/output and parameter histograms, if you don't want these set log=[].
torchexplorer.watch(model, log_freq=1, log=['io', 'params'], backend='standalone')

# To log also gradients, set log = ['io', 'io_grad', 'params', 'params_grad'] (default).
# This doesn't work with in-place operations (see "Common errors #1" in README.md).
# So we must disable in-place activations, and ignore modules with residual connections.
# Here we're using random data on an untrained model, so gradients aren't very useful.
# residual_class = torchvision.models.resnet.BasicBlock
# torchexplorer.watch(
#     model, log_freq=1, disable_inplace=True,
#     log=['io', 'io_grad', 'params', 'params_grad'],
#     ignore_io_grad_classes=[residual_class], backend='standalone'
# )

# Do one forwards and backwards pass
model(dummy_X).sum().backward()

# Your model will be available at http://localhost:8080

Address already in use
Port 8080 is in use by another program. Either identify and stop that program, or start the server with a different port.


Starting TorchExplorer at http://localhost:8080
