In [22]:
import torch
import heat as ht
import heat.nn.functional as F
import time

In [23]:
import heat.optim as optim
from heat.utils.data.mnist import MNISTDataset
from heat.utils import vision_transforms

from heat.optim.lr_scheduler import StepLR

In [21]:
class Net(ht.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = ht.nn.Conv2d(1, 32, 3, 1)
        self.conv2 = ht.nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = ht.nn.Dropout2d(0.25)
        self.dropout2 = ht.nn.Dropout2d(0.5)
        self.fc1 = ht.nn.Linear(9216, 128)
        self.fc2 = ht.nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


In [38]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    t_list = []
    for batch_idx, (data, target) in enumerate(train_loader):
        t = time.perf_counter()
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 1000 == 0:
            print(
                f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "
                f"({100.0 * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
            )

        t_list.append(time.perf_counter() - t)
    print("average time", sum(t_list) / len(t_list))

In [34]:
transform = ht.utils.vision_transforms.Compose(
        [vision_transforms.ToTensor(), vision_transforms.Normalize((0.1307,), (0.3081,))]
    )

dataset1 = MNISTDataset("./datasets", train=True, transform=transform, ishuffle=False)

In [31]:
train_loader = ht.utils.data.datatools.DataLoader(dataset=dataset1)

In [17]:
device = torch.device("cpu")

In [28]:
model = Net().to(device)

In [19]:
optimizer = optim.Adadelta(model.parameters(), lr=1.0)

dp_optim = ht.optim.DataParallelOptimizer(optimizer, blocking=False)

AttributeError: 'collections.OrderedDict' object has no attribute 'parameters'

In [34]:
scheduler = StepLR(optimizer, step_size=1, gamma=.7)

In [35]:
dp_model = ht.nn.DataParallel(
        model, comm=dataset1.comm, optimizer=dp_optim, blocking_parameter_updates=False
    )

In [42]:
train(model=dp_model, device=device, train_loader=train_loader, optimizer=dp_optim, epoch=1)











































































average time 0.021322667195955727


In [43]:
torch.save(model.state_dict(), "mnist_cnn.pt")

In [44]:
! ls 

datasets  eddl.ipynb   heat.ipynb  mnist_cnn.pt  resnet18.onnx	torch.model
docs	  eddlShit.py  mlruns	   model.pickle  tmp		vgg16.onnx


In [8]:
with open('mnist_cnn.pt', 'rb') as f:
    model = torch.load(f)

In [10]:
#mlflow.pytorch.autolog()

In [11]:
## simpler case

In [60]:
class SimpleNet(ht.nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = torch.nn.Linear(28*28, 10)
        
    def forward(self, x):
        return torch.relu(self.l1(x.view(x.size(0), -1)))
    
    def trainig_step(self, batch, batch_nb):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        pred = logits.argmax(dim=1)
        
        self.log('train_loss', loss, on_epoch=True)
        
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=.01)
    
    

In [24]:
dataset1 = MNISTDataset("./datasets", train=True, ishuffle=False)

In [47]:
simple_model = SimpleNet().to(device)

In [48]:

dp_optim = ht.optim.DataParallelOptimizer(simple_model.configure_optimizers(), blocking=False)

In [49]:
dp_model = ht.nn.DataParallel(
        simple_model, comm=dataset1.comm, optimizer=dp_optim, 
    blocking_parameter_updates=False
    )

In [35]:
train_loader = ht.utils.data.datatools.DataLoader(dataset=dataset1)

In [64]:
def small_train(model, device, train_loader, optimizer, epoch):
    model.train()
    t_list = []
    for batch_idx, (data, target) in enumerate(train_loader):
        t = time.perf_counter()
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        mlflow.log_metric('train_loss', value=loss)
        
       

In [31]:
import mlflow
import mlflow.pytorch

In [None]:
# only works with lightining
#mlflow.pytorch.autolog()

In [66]:
with mlflow.start_run() as run:
    for i, p in enumerate(simple_model.parameters()):
        mlflow.log_param(key=f"param{i}", value=p.size())
        
    small_train(model=dp_model, device=device, train_loader=train_loader, optimizer=dp_optim, epoch=1)
    
    mlflow.pytorch.log_model(pytorch_model=simple_model, artifact_path='path')

In [51]:
mlflow.end_run()



<mlflow.models.model.ModelInfo at 0x7f3b8cb811b0>

In [41]:
# mlflow.pytorch.load_model