In [1]:
import sys
import logging
import os
from pathlib import Path
from pprint import pprint as pp

# figure out the correct path
machop_path = Path(".").resolve().parent.parent /"machop"
assert machop_path.exists(), "Failed to find machop at: {}".format(machop_path)
sys.path.append(str(machop_path))

from chop.tools.checkpoint_load import load_model
from chop.dataset import MaseDataModule, get_dataset_info
from chop.tools.logger import set_logging_verbosity

from chop.passes.graph.analysis import (
    report_node_meta_param_analysis_pass,
    profile_statistics_analysis_pass,
)
from chop.passes.graph import (
    add_common_metadata_analysis_pass,
    init_metadata_analysis_pass,
    add_software_metadata_analysis_pass,
)
from chop.tools.get_input import InputGenerator
from chop.ir.graph.mase_graph import MaseGraph

from chop.models import get_model_info, get_model

set_logging_verbosity("info")


[32mINFO    [0m [34mSet logging level to info[0m


In [2]:
batch_size = 512
model_name = "jsc-tiny"
dataset_name = "jsc"

data_module = MaseDataModule(
    name=dataset_name,
    batch_size=batch_size,
    model_name=model_name,
    num_workers=0,
    # custom_dataset_cache_path="../../chop/dataset"
)
data_module.prepare_data()
data_module.setup()

dataset_info = get_dataset_info(dataset_name)

model_info = get_model_info(model_name)
model = get_model(
    model_name,
    task="cls",
    dataset_info=dataset_info,
    pretrained=False,
    checkpoint = None)


# LAB1_CUSTOM_PATH = "/home/bkt123/dev/advanced-deep-learning-systems/mase/mase_output/lab-1_jsc-custom/software/training_ckpts/best.ckpt"
# model = load_model(load_name=LAB1_CUSTOM_PATH, load_type="pl", model=model)

In [3]:
from chop.actions import test, train
import torch

# print(isinstance(mg.model, torch.nn.Module))

task = "cls"

train_params = {
    "model": model,
    "model_info": model_info,
    "data_module": data_module,
    "dataset_info": dataset_info,
    "task": task,
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "weight_decay": 0,
    "plt_trainer_args": {
        "max_epochs": 5,
    }, 
    "auto_requeue": False,
    "save_path": None,
    "visualizer": None,
    "load_name": None,
    "load_type": None
}

train(**train_params)

test(**train_params)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | JSC_Tiny           | 127   
1 | loss_fn   | CrossEntropyLoss   | 0     
2 | acc_train | MulticlassAccuracy | 0     
3 | acc_val   | MulticlassAccuracy | 0     
4 | acc_test  | MulticlassAccuracy | 0     
5 | loss_val  | MeanMetric         | 0     
6 | loss_test | MeanMetric         | 0     
-------------------------------------------------
127       Trainable params
0         Non-trainable params
127       Total params
0.001     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.
/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_acc_epoch         0.7024604678153992
     test_loss_epoch        0.8855997920036316
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [4]:
quantize_args = {
    "by": "name",
    "default": {"config": {"name": None}},
    "linear": {
        "config": {
            "name": "integer",
            # data
            "data_in_width": 2,
            "data_in_frac_width": 2,
            # weight
            "weight_width": 2,
            "weight_frac_width": 2,
            # bias
            "bias_width": 2,
            "bias_frac_width": 2,
        }
    }
}

train_args = {
    "name": "accuracy",
    "data_loader": "train_dataloader",
    "num_samples": 100000,
    "max_epochs": 5,
    "lr_scheduler": "linear",
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "num_warmup_steps": 0,
}

In [5]:
from chop.actions import quantize_model

config = {
    "quantization": {
        "quantization_config": quantize_args,
        "train": train_args,
    }
}

model, results = quantize_model(
    model,
    model_info,
    "cls",
    dataset_info,
    data_module,
    config,
)

print(results)

train_params["model"] = model

test(**train_params)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


{'loss': 0.8984857797622681, 'accuracy': 0.695439338684082}


/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_acc_epoch         0.7018828392028809
     test_loss_epoch        0.8859482407569885
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [15]:
from chop.passes.graph.utils import get_mase_op


mg = MaseGraph(model)

input_generator = InputGenerator(
    data_module=data_module,
    model_info=model_info,
    task="cls",
    which_dataloader="train",
    max_batches=1
)
dummy_in = next(iter(input_generator))

mg, _ = init_metadata_analysis_pass(mg, None)
mg, _ = add_common_metadata_analysis_pass(mg, {"dummy_in": dummy_in})
mg, _ = add_software_metadata_analysis_pass(mg, None)

for node in mg.fx_graph.nodes:
    if get_mase_op(node) == "linear":
        print(node.meta['mase'].parameters['common']['args']['weight'])

{'type': 'float', 'precision': [32], 'shape': [5, 16], 'from': None, 'value': Parameter containing:
tensor([[ 0.0829,  0.1509, -0.2380,  0.0673, -0.2939,  0.0683,  0.0586, -0.0245,
         -0.0167,  0.1401, -0.0761,  0.0796,  0.0603, -0.0731,  0.3072,  0.1545],
        [ 0.1668,  0.1587, -0.2411,  0.3641, -0.1361, -0.1130,  0.0558, -0.1061,
         -0.0147,  0.2215, -0.0951,  0.0275,  0.0612, -0.0682, -0.1065, -0.0513],
        [-0.0161,  0.0423, -0.1503,  0.2443,  0.2151,  0.0151,  0.0615, -0.1499,
         -0.1429,  0.0594, -0.0060, -0.0306, -0.0233,  0.0678, -0.4285, -0.2221],
        [-0.0341, -0.0056,  0.2740, -0.5499, -0.0401,  0.1539, -0.0400, -0.0583,
          0.1088, -0.0397, -0.0177,  0.0126,  0.0581,  0.0063,  0.2324, -0.1571],
        [-0.1369, -0.0669,  0.4017, -0.1437,  0.0970,  0.0752, -0.1080,  0.2031,
         -0.0098, -0.3225, -0.0134,  0.1232, -0.2251,  0.0833, -0.0542, -0.0496]],
       requires_grad=True)}


In [14]:
from chop.passes.graph.transforms.quantize.quantized_modules.linear import _LinearBase
from chop.passes.graph.utils import get_node_actual_target


for node in mg.fx_graph.nodes:
    module = get_node_actual_target(node)
    if isinstance(module, _LinearBase):
        print(module.weight)
        print(module.w_quantizer(module.weight))