In [1]:
import sys
import logging
import os
from pathlib import Path
from pprint import pprint as pp

# figure out the correct path
machop_path = Path(".").resolve().parent.parent /"machop"
assert machop_path.exists(), "Failed to find machop at: {}".format(machop_path)
sys.path.append(str(machop_path))

from chop.tools.checkpoint_load import load_model
from chop.dataset import MaseDataModule, get_dataset_info
from chop.tools.logger import set_logging_verbosity

from chop.passes.graph.analysis import (
    report_node_meta_param_analysis_pass,
    profile_statistics_analysis_pass,
)
from chop.passes.graph import (
    add_common_metadata_analysis_pass,
    init_metadata_analysis_pass,
    add_software_metadata_analysis_pass,
)
from chop.tools.get_input import InputGenerator
from chop.ir.graph.mase_graph import MaseGraph

from chop.models import get_model_info, get_model

set_logging_verbosity("info")


[32mINFO    [0m [34mSet logging level to info[0m


In [2]:
batch_size = 512
model_name = "jsc-tiny"
dataset_name = "jsc"

data_module = MaseDataModule(
    name=dataset_name,
    batch_size=batch_size,
    model_name=model_name,
    num_workers=0,
    # custom_dataset_cache_path="../../chop/dataset"
)
data_module.prepare_data()
data_module.setup()

model_info = get_model_info(model_name)
model = get_model(
    model_name,
    task="cls",
    dataset_info=data_module.dataset_info,
    pretrained=False,
    checkpoint = None)

# LAB1_CUSTOM_PATH = "/home/bkt123/dev/advanced-deep-learning-systems/mase/mase_output/lab-1_jsc-custom/software/training_ckpts/best.ckpt"
# model = load_model(load_name=LAB1_CUSTOM_PATH, load_type="pl", model=model)

input_generator = InputGenerator(
    data_module=data_module,
    model_info=model_info,
    task="cls",
    which_dataloader="train",
    max_batches=1
)

dummy_in = next(iter(input_generator))
_ = model(**dummy_in)

# generate the mase graph and initialize node metadata
mg = MaseGraph(model=model)

In [3]:
from chop.actions import train
import torch

# print(isinstance(mg.model, torch.nn.Module))

model = mg.model
model_info = get_model_info('jsc-tiny')
dataset_info = get_dataset_info('jsc')
task = "cls"

train_params = {
    "model": model,
    "model_info": model_info,
    "data_module": data_module,
    "dataset_info": dataset_info,
    "task": task,
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "weight_decay": 0,
    "plt_trainer_args": {
        "max_epochs": 1,
    }, 
    "auto_requeue": False,
    "save_path": None,
    "visualizer": None,
    "load_name": None,
    "load_type": None
}

train(**train_params)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | GraphModule        | 127   
1 | loss_fn   | CrossEntropyLoss   | 0     
2 | acc_train | MulticlassAccuracy | 0     
3 | acc_val   | MulticlassAccuracy | 0     
4 | acc_test  | MulticlassAccuracy | 0     
5 | loss_val  | MeanMetric         | 0     
6 | loss_test | MeanMetric         | 0     
-------------------------------------------------
127       Trainable params
0         Non-trainable params
127       Total params
0.001     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.
/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


In [4]:
from pprint import pprint

from chop.passes.graph.utils import get_mase_op

mg, _ = init_metadata_analysis_pass(mg, None)
mg, _ = add_common_metadata_analysis_pass(mg, {"dummy_in": dummy_in})
mg, _ = add_software_metadata_analysis_pass(mg, None)

# pprint(mg.meta['mase'].__dict__)

for node in mg.fx_graph.nodes:
    if get_mase_op(node) == 'linear':
        print(node.name)
        print(50*'-')
        # print(node.meta['mase'].parameters['common']['args']['data_in_0']['value'])
        # print(node.meta['mase'].parameters['common']['args']['data_in_0']['value'])
        # print(node.meta['mase'].parameters['common']['args']['weight']['value'])
        # print(node.meta['mase'].parameters['common']['results']['data_out_0']['value'])
        pprint(mg.modules[node.target].weight)
        print(50*'-')

seq_blocks_2
--------------------------------------------------
Parameter containing:
tensor([[ 2.1387e-03,  7.5056e-02, -2.8624e-01,  1.7213e-02, -1.9997e-01,
          1.7816e-03, -4.2284e-03,  5.1670e-02, -2.9266e-02,  1.1370e-02,
         -3.6431e-02,  3.0126e-02,  1.4913e-01, -8.9499e-02,  3.1914e-01,
          1.9919e-01],
        [ 1.2857e-01,  1.0549e-01, -4.1898e-01,  4.0291e-01, -5.2149e-02,
         -7.8776e-02, -7.8595e-02, -2.7975e-02,  8.6640e-02, -1.2171e-01,
          2.0111e-01, -9.9952e-02,  1.1763e-01,  3.0851e-02,  2.6543e-02,
         -1.3726e-01],
        [-1.2600e-02,  1.4879e-02,  4.6097e-03,  1.7954e-01,  2.7007e-01,
         -5.1302e-02,  1.7064e-01, -1.4796e-01, -1.7629e-01,  9.6660e-02,
          1.0504e-02, -8.1087e-02, -6.5705e-02,  1.2889e-01, -4.7654e-01,
         -2.1522e-01],
        [ 5.4981e-05, -2.4064e-02,  2.3299e-01, -4.7142e-01,  7.0458e-02,
         -8.1796e-03,  8.9741e-03, -7.6902e-02,  3.6819e-02,  3.9025e-02,
          3.2290e-02, -3.0554e-

In [7]:
from chop.passes.graph.transforms import (
    prune_transform_pass,
)
pass_args = {
    "weight":{
        "scope" : "global",
        "granularity" : "elementwise",
        "method" :  "l1-norm",
        "sparsity" : 0.5,
    },
    "activation":{
        "scope" : "global",
        "granularity" : "elementwise",
        "method" : "l1-norm",
        "sparsity" : 0.5,
    },
}
 
mg, _ = prune_transform_pass(mg, pass_args)


In [8]:
from pprint import pprint

from chop.passes.graph.utils import get_mase_op

# pprint(mg.meta['mase'].__dict__)

for node in mg.fx_graph.nodes:
    if get_mase_op(node) == 'linear':
        print(f"Layer: {node.name}")
        # pprint(node.meta['mase'].parameters['common'])
        # pprint(node.meta['mase'].parameters['common']['args']['data_in_0']['value'])
        # pprint(node.meta['mase'].parameters['common']['args']['weight']['value'])
        # pprint(mg.modules[node.target].weight)
        pprint(mg.modules[node.target].parametrizations['weight'][0].mask)
        pprint(mg.modules[node.target].parametrizations['weight'][1].mask)
        # pprint(node.meta['mase'].parameters['common']['results']['data_out_0']['value'])
        print(mg.modules[node.target].parametrizations['weight'][0].mask == mg.modules[node.target].parametrizations['weight'][1].mask)
        total_w = 0
        pruned_w = 0
        w = mg.modules[node.target].weight
        for s in w:
            total_w += s.numel()
            pruned_w += s.numel() - s.nonzero().numel()

        pruned_percent = pruned_w / total_w
        print(f"Pruned percent: {pruned_percent}")

        print(50*'-')


Layer: seq_blocks_2
tensor([[False, False,  True, False,  True, False, False, False, False, False,
         False, False,  True,  True,  True,  True],
        [ True,  True,  True,  True, False,  True,  True, False,  True,  True,
          True,  True,  True, False, False,  True],
        [False, False, False,  True,  True, False,  True,  True,  True,  True,
         False,  True, False,  True,  True,  True],
        [False, False,  True,  True, False, False, False, False, False, False,
         False, False,  True,  True,  True,  True],
        [False, False,  True, False, False, False,  True,  True, False, False,
          True,  True,  True, False, False, False]])
tensor([[False, False,  True, False,  True, False, False, False, False, False,
         False, False,  True, False,  True,  True],
        [False, False,  True,  True, False, False, False, False, False, False,
          True, False, False, False, False, False],
        [False, False, False,  True,  True, False,  True, Fals

In [27]:
from pprint import pprint

from chop.passes.graph.utils import get_mase_op

# pprint(mg.meta['mase'].__dict__)

for node in mg.fx_graph.nodes:
    if get_mase_op(node) == 'linear':
        print(f"Layer: {node.name}")
        # pprint(node.meta['mase'].parameters['common'])
        # pprint(node.meta['mase'].parameters['common']['args']['data_in_0']['value'])
        # pprint(node.meta['mase'].parameters['common']['args']['weight']['value'])
        pprint(mg.modules[node.target].weight)
        # pprint(mg.modules[node.target].parametrizations['weight'][0].mask)
        # pprint(node.meta['mase'].parameters['common']['results']['data_out_0']['value'])

        total_w = 0
        pruned_w = 0
        mask_2= mg.modules[node.target].parametrizations['weight'][0].mask
        for s in mask_2:
            total_w += s.numel()
            pruned_w += s.numel() - s.nonzero().numel()

        pruned_percent = pruned_w / total_w
        print(f"Pruned percent: {pruned_percent}")

        print(50*'-')


Layer: seq_blocks_2
tensor([[-0.0000,  0.1373, -0.4103,  0.1213, -0.0665, -0.0000,  0.0000,  0.0000,
         -0.0698,  0.0000, -0.0000,  0.0000,  0.1044, -0.0617,  0.3110,  0.2112],
        [ 0.0896,  0.1528, -0.3329,  0.3900,  0.0669, -0.2552,  0.0713, -0.0000,
         -0.0000,  0.1347,  0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.1270],
        [-0.0000,  0.0927,  0.1679,  0.1217, -0.0000,  0.0858,  0.0000, -0.0716,
          0.0000, -0.0000,  0.0000, -0.0000,  0.0000,  0.0687, -0.4497, -0.2138],
        [-0.0000,  0.0000,  0.1436, -0.4103, -0.0000,  0.0689, -0.0000, -0.0000,
          0.0000,  0.0000, -0.0000,  0.0000,  0.0000, -0.0000,  0.2211, -0.1743],
        [-0.0000, -0.0000,  0.3617,  0.0000,  0.0000, -0.0000, -0.2324,  0.1435,
          0.0803, -0.1727, -0.1531,  0.1713, -0.1773,  0.0000, -0.0675,  0.0000]],
       grad_fn=<MulBackward0>)
Pruned percent: 0.5
--------------------------------------------------


In [19]:
from chop.actions import train
import torch

# print(isinstance(mg.model, torch.nn.Module))

model = mg.model
model_info = get_model_info('jsc-tiny')
dataset_info = get_dataset_info('jsc')
task = "cls"

train_params = {
    "model": model,
    "model_info": model_info,
    "data_module": data_module,
    "dataset_info": dataset_info,
    "task": task,
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "weight_decay": 0,
    "plt_trainer_args": {
        "max_epochs": 1,
    }, 
    "auto_requeue": False,
    "save_path": None,
    "visualizer": None,
    "load_name": None,
    "load_type": None
}

train(**train_params)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | GraphModule        | 127   
1 | loss_fn   | CrossEntropyLoss   | 0     
2 | acc_train | MulticlassAccuracy | 0     
3 | acc_val   | MulticlassAccuracy | 0     
4 | acc_test  | MulticlassAccuracy | 0     
5 | loss_val  | MeanMetric         | 0     
6 | loss_test | MeanMetric         | 0     
-------------------------------------------------
127       Trainable params
0         Non-trainable params
127       Total params
0.001     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.
/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

/home/bkt123/anaconda3/envs/mase/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [33]:
from pprint import pprint

from chop.passes.graph.utils import get_mase_op


# pprint(mg.meta['mase'].__dict__)

for node in mg.fx_graph.nodes:
    if get_mase_op(node) == 'linear':
        print(node.name)
        print(50*'-')
        # pprint(node.meta['mase'].parameters['common'])
        # pprint(node.meta['mase'].parameters['common']['args']['data_in_0']['value'])
        # pprint(node.meta['mase'].parameters['common']['args']['weight']['value'])
        pprint(mg.modules[node.target].weight)
        pprint(mg.modules[node.target].parametrizations['weight'][0].mask)
        # pprint(node.meta['mase'].parameters['common']['results']['data_out_0']['value'])

        print(50*'-')

seq_blocks_0
--------------------------------------------------
tensor([[-0.0000, -0.0000, -1.6369,  0.5552, -0.7419, -0.0000,  0.0000,  0.0000,
         -0.0000,  0.0000,  0.7397, -0.0000,  0.0000,  0.0000,  0.7932,  1.3671],
        [ 0.8332,  0.0000, -0.4090, -1.0126, -0.0000, -0.0000, -0.0000, -0.0000,
          0.0000,  0.0000,  0.6031, -0.1658,  0.4106, -0.2976, -0.7187, -1.2277],
        [ 0.0000,  0.0189,  0.0000, -0.1738,  0.2096, -1.7474, -0.0000, -0.3942,
          0.0000, -0.0000,  0.0000, -0.0905,  0.3919, -0.0000,  0.0000, -1.5387],
        [-0.0000, -0.0000,  0.2736, -0.5186,  0.0000, -1.3009, -0.0000, -0.0000,
          0.0000,  0.0000, -0.0000,  0.0000,  0.0000, -0.0000,  0.2842, -1.3053],
        [-0.0000,  0.1588,  1.3117, -0.0839,  0.2535,  0.0833, -0.0000,  0.2678,
         -0.4250, -0.1340, -0.2888,  0.2900, -0.3746,  0.2911, -0.0000,  0.0000]],
       grad_fn=<MulBackward0>)
tensor([[False, False,  True,  True,  True, False, False, False, False, False,
          