In [None]:
import torch 
from DianaModules.utils.BaseModules import DianaModule
from DianaModules.models.cifar10.LargeResnet import resnet20
import torchvision
import torchvision.datasets as ds 
from DianaModules.utils.BaseModules import DianaModule
from DianaModules.utils.serialization.Loader import ModulesLoader
from DianaModules.utils.serialization.Serializer import ModulesSerializer
from DianaModules.core.Operations import DIANAReLU
from pathlib import Path
import torch.utils.data as ut
output_weights_path = str(Path("zoo/cifar10/workshop/resnet20").absolute())
train_dataset =  ds.CIFAR10('./data/cifar10/train', train =True ,download=True, transform=torchvision.transforms.Compose([torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.RandomCrop(32, 4),torchvision.transforms.ToTensor() ,torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]))
test_dataset =  ds.CIFAR10('./data/cifar10/validation', train =False,download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] ) )
data_loader = {'train': ut.DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True , num_workers=8) , 'validate' : ut.DataLoader(test_dataset, batch_size=128, shuffle=True  ,pin_memory=True, num_workers=8)}
train_scale = torch.Tensor([0.03125]) #found by having train_dataset go thorugh 8-bit quantizer (check datasetscale file) 

# Floating Point Model 
First step of using the training framework is defining your own PyTorch model like in the example below. You can choose to train or load the model's weights as you usually would.  

In [None]:
from DianaModules.models.cifar10.LargeResnet import resnet20 

custom_model = resnet20() 

FP_weights = output_weights_path +  "/FP_weights.pth"
custom_model.load_state_dict(DianaModule.remove_data_parallel(torch.load(FP_weights, map_location='cpu')['state_dict']) )# To load previously trained weights

# Conversion Process 
## Fake-Quantization 
Now that you have your floating point PyTorch model defined, we can start with the conversion process. The first step in the conversion process is to fake-quantize the original floating-point model. Each layer has certain characteristics that describe it, i.g. the core choice (analog core or digital core). We allow the user to determine some of these characteristics by editing the yaml file generated by the serialized fake-quantized model. If you don't have a serialized file for the model, then you can run the model serialization step first or just use the conversion with the default behaviour. Notice that there are some constraints that will be clarified in the serialization process in later updates to the training framework.
#### Model Description Loader

In [None]:
module_descriptions_pth = str(Path("serialized_models/resnet20.yaml").absolute())

loader = ModulesLoader()
module_descriptions = loader.load(module_descriptions_pth) 

#### Model Conversion


In [None]:
fake_quantized_model = DianaModule.from_trainedfp_model(model=custom_model , modules_descriptors=module_descriptions)

 After the conversion we will need to run some functions on the fake-quantized model; for this, we can use the DianaModule class that can be used as a wrapper to the model and use some of the useful functions it implements.  

In [None]:
Mixed_model = DianaModule(fake_quantized_model) #Mixed_model.gmodule = fake_quantized_model
Mixed_model.attach_train_dataset(train_dataset, train_scale)
Mixed_model.attach_validation_dataset(test_dataset,train_scale)

#### Model Serialization

In [None]:
serializer = ModulesSerializer(Mixed_model.gmodule)  
serializer.dump(module_descriptions_pth) 

#### Quantization Initialization
By running forward passes and observing the weights and activations, we can initialize our quantization parameters. You can quantize only the linear layers, only the activations(ReLU & Identity) , or both. However, if you quantize both activations and layers and you decide to retrain the model, be careful because the learning rate is usually to high for training the ReLU's clipping parameters (we use PACT), so it's best to freeze the training of ReLU layers and once some of the accuracy is recouped, you unfreeze the ReLU layers 

In [None]:
Mixed_model.initialize_quantization(count=1)# for initialization of both linear layers and activations
#Mixed_model.initialize_quantization_no_activation(count=1)# for initialization linear layers
#Mixed_model.initialize_quantization_activations(count=1)# for initialization of activations

#### Training Example
You can skip this example and just load some of the pretrained weights

In [None]:
# If both linear layers and activation we quantized layers are quantized simultaneously
for _, module in Mixed_model.named_modules(): 
    if isinstance(module, DIANAReLU)  :
        module.freeze() 
FQ_weights = '' #path you want to save your weights on 
optimizer = torch.optim.SGD(Mixed_model.gmodule.parameters() , lr=0.1 , momentum=0.4) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer= optimizer, mode='max' , factor=0.1, patience=4)
params =  DianaModule.train(Mixed_model.gmodule,optimizer,data_loader, epochs=120, model_save_path=FQ_weights , scheduler = scheduler) # training with scale of layer before relu clipped to 2 


# If both linear layers and activation we quantized layers are quantized simultaneously
for _, module in Mixed_model.named_modules(): 
    if isinstance(module, DIANAReLU)  :
        module.thaw() 

# retrain again to use the activation-quantization training algorithms like PACT 

Or load the pretrained_weights

In [None]:
FQ_weights_act = output_weights_path + "/FQ_weights_act.pth" 
Mixed_model.gmodule.load_state_dict(DianaModule.remove_data_parallel(torch.load(FQ_weights_act, map_location='cpu')['state_dict']) )

## Hardware Mapping 
Note: While training, you might have moved the model to a different device, but be sure to return the model to the cpu for the conversion step.
#### Model Conversion 

In [None]:
Mixed_model.map_to_hw()
HWmapped_weights = output_weights_path + "/HWmapped_weights.pth" 
Mixed_model.gmodule.load_state_dict(DianaModule.remove_data_parallel(torch.load(HWmapped_weights, map_location='cpu')['state_dict'])) 

#### Re-Training 
Same as before, you can train the model as you usually would train a standard PyTorch model

## Layer Integrization
Same as before, be sure to return the model to the cpu for the conversion step.

In [None]:
Mixed_model.integrize_layers()

# ONNX Export
For the final step, we export out integrized model as an ONNX file. Note: You'll get an error for the DORY Annotator, but that's fine you can ignore it. It will be fixed later

In [None]:
data_folder = Path("backend/cifar10/resnet20")

Mixed_model.gmodule.to('cpu')
Mixed_model.export_model(str(data_folder.absolute()))