# Examples of different fusion strategies

In [1]:
import torch

N = 100
T = 12

D1 = 5
D2 = 3
D3 = 8

V1 = torch.rand(N, T, D1) #view 1
V2 = torch.rand(N, T, D2) #view 2
V3 = torch.rand(N, D3) #view 3 with different shape

n_labels = 5
labels = torch.randint(n_labels, size=(N,1))

## Input Fusion

In [2]:
from mvlearning.single.models import create_model

TOTAL_DIMS_STACKED = D1+D2
DIM_EMBEDDING = 32

encoder_model = create_model(TOTAL_DIMS_STACKED, DIM_EMBEDDING, model_type="lstm")
prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)
full_model = torch.nn.Sequential(encoder_model, prediction_head)
full_model

Sequential(
  (0): Generic_Encoder(
    (pre_encoder): RNNet(
      (rnn): LSTM(8, 128, num_layers=2, batch_first=True)
      (fc): Sequential(
        (0): Identity()
      )
    )
    (linear_layer): Linear(in_features=128, out_features=32, bias=True)
    (normalization_layer): Identity()
  )
  (1): Generic_Decoder(
    (pre_decoder): MLP(
      (layers): Sequential(
        (0): Sequential(
          (0): Linear(in_features=32, out_features=128, bias=True)
          (1): ReLU()
          (2): Identity()
          (3): Identity()
        )
        (1): Sequential(
          (0): Linear(in_features=128, out_features=128, bias=True)
          (1): ReLU()
          (2): Identity()
          (3): Identity()
        )
      )
    )
    (linear_layer): Linear(in_features=128, out_features=5, bias=True)
  )
)

In [3]:
from mvlearning.fusion import InputFusion

mv_model = InputFusion(full_model, view_names=["view 1", "view 2"])
mv_model

InputFusion(
  (views_encoder): ModuleDict(
    (view 1): Identity()
    (view 2): Identity()
  )
  (merge_module): MergeModule(
    (concater_function): Concatenate_()
  )
  (prediction_head): Sequential(
    (0): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(8, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (1): Generic_Decoder(
      (pre_decoder): MLP(
        (layers): Sequential(
          (0): Sequential(
            (0): Linear(in_features=32, out_features=128, bias=True)
            (1): ReLU()
            (2): Identity()
            (3): Identity()
          )
          (1): Sequential(
            (0): Linear(in_features=128, out_features=128, bias=True)
            (1): ReLU()
            (2): Identity()
            (3): Identity()
          )
        )
      )
      (linear

In [4]:
output_ = mv_model( {"view 1":V1, "view 2": V2})

for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})

{'prediction': torch.Size([100, 5])}
views:rep ----
{'view 1': torch.Size([100, 12, 5])}
{'view 2': torch.Size([100, 12, 3])}
{'joint_rep': torch.Size([100, 12, 8])}


In [5]:
mv_model = InputFusion(full_model, view_names=["view 1", "view 2"], loss_function=torch.nn.CrossEntropyLoss())
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2},
    "target": labels
})

{'objective': tensor(1.6172, grad_fn=<NllLossBackward0>)}

## Decision Fusion

In [6]:
from mvlearning.single.models import create_model

DIM_EMBEDDING = 32


prediction_models= {}
for name, inp_dim in {"view 1": D1, "view 2":D2}.items():
    encoder_model = create_model(inp_dim, DIM_EMBEDDING, model_type="lstm")
    prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)
    prediction_models[name] = torch.nn.Sequential(encoder_model, prediction_head)

encoder_model = create_model(D3, DIM_EMBEDDING, model_type="mlp") #different model architecture
prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)
prediction_models["view 3"] = torch.nn.Sequential(encoder_model, prediction_head)

prediction_models.keys()

dict_keys(['view 1', 'view 2', 'view 3'])

In [7]:
from mvlearning.fusion import DecisionFusion

mv_model = DecisionFusion(prediction_models) #just take the avg
mv_model

DecisionFusion(
  (views_encoder): ModuleDict(
    (view 1): Sequential(
      (0): Generic_Encoder(
        (pre_encoder): RNNet(
          (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
          (fc): Sequential(
            (0): Identity()
          )
        )
        (linear_layer): Linear(in_features=128, out_features=32, bias=True)
        (normalization_layer): Identity()
      )
      (1): Generic_Decoder(
        (pre_decoder): MLP(
          (layers): Sequential(
            (0): Sequential(
              (0): Linear(in_features=32, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
            (1): Sequential(
              (0): Linear(in_features=128, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
          )
        )
        (linear_layer): Linear(in_features=128, out_features=5, bias=True)
      )
    )
   

In [8]:
output_ = mv_model( {"view 1":V1, "view 2": V2, "view 3": V3})
for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})

{'prediction': torch.Size([100, 5])}
views:rep ----
{'view 1': torch.Size([100, 5])}
{'view 2': torch.Size([100, 5])}
{'view 3': torch.Size([100, 5])}
{'joint_rep': torch.Size([100, 5])}


In [9]:
mv_model = DecisionFusion(prediction_models, loss_function=torch.nn.CrossEntropyLoss())
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

{'objective': tensor(1.6142, grad_fn=<NllLossBackward0>)}

### Including Multi-loss

In [10]:
from mvlearning.fusion import DecisionFusionMultiLoss
mv_model = DecisionFusionMultiLoss(prediction_models,loss_function=torch.nn.CrossEntropyLoss(), multiloss_weights=3)
mv_model

DecisionFusionMultiLoss(
  (views_encoder): ModuleDict(
    (view 1): Sequential(
      (0): Generic_Encoder(
        (pre_encoder): RNNet(
          (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
          (fc): Sequential(
            (0): Identity()
          )
        )
        (linear_layer): Linear(in_features=128, out_features=32, bias=True)
        (normalization_layer): Identity()
      )
      (1): Generic_Decoder(
        (pre_decoder): MLP(
          (layers): Sequential(
            (0): Sequential(
              (0): Linear(in_features=32, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
            (1): Sequential(
              (0): Linear(in_features=128, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
          )
        )
        (linear_layer): Linear(in_features=128, out_features=5, bias=True)
      )


In [11]:
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

{'objective': tensor(6.4609, grad_fn=<AddBackward0>),
 'lossmain': tensor(1.6142, grad_fn=<NllLossBackward0>),
 'lossaux': tensor(14.5401, grad_fn=<AddBackward0>),
 'lossview 1': tensor(4.8643, grad_fn=<MulBackward0>),
 'lossview 2': tensor(4.8399, grad_fn=<MulBackward0>),
 'lossview 3': tensor(4.8358, grad_fn=<MulBackward0>)}

## Feature Fusion

In [12]:
from mvlearning.single.models import create_model
from mvlearning.merge_module import MergeModule
from mvlearning.utils import get_dic_emb_dims

DIM_EMBEDDING = 32

encoder_models = {}
for name, inp_dim in {"view 1": D1, "view 2":D2}.items():
    encoder_models[name] = create_model(inp_dim, DIM_EMBEDDING, model_type="lstm")
encoder_models["view 3"] = create_model(D3, DIM_EMBEDDING, model_type="mlp") #different model architecture
print(encoder_models.keys())

EMBEDDING_DIC= get_dic_emb_dims(encoder_models)
print(EMBEDDING_DIC)
merge_function = MergeModule(EMBEDDING_DIC, mode="concat")

prediction_head = create_model(sum(EMBEDDING_DIC.values()), n_labels, model_type="mlp", encoder=False)

merge_function

dict_keys(['view 1', 'view 2', 'view 3'])
{'view 1': 32, 'view 2': 32, 'view 3': 32}


MergeModule(
  (concater_function): Concatenate_()
)

In [13]:
from mvlearning.fusion import FeatureFusion

mv_model = FeatureFusion(encoder_models, merge_function, prediction_head, loss_function=torch.nn.CrossEntropyLoss())
mv_model

FeatureFusion(
  (views_encoder): ModuleDict(
    (view 1): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 2): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(3, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 3): Generic_Encoder(
      (pre_encoder): MLP(
        (layers): Sequential(
          (0): Sequential(
            (0): Linear(in_features=8, out_features=128, bias=True)
            (1): ReLU()
            (2): Identity()
            (3): Identity()
          )
          (1): Sequential(
            (0): Linear(in_features

In [14]:
output_ = mv_model( {"view 1":V1, "view 2": V2, "view 3": V3})
for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})

{'prediction': torch.Size([100, 5])}
views:rep ----
{'view 1': torch.Size([100, 32])}
{'view 2': torch.Size([100, 32])}
{'view 3': torch.Size([100, 32])}
{'joint_rep': torch.Size([100, 96])}


In [15]:
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

{'objective': tensor(1.6117, grad_fn=<NllLossBackward0>)}

### Pooling fusion modules

In [16]:
prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)

In [17]:
merge_function = MergeModule(EMBEDDING_DIC, mode="avg")
mv_model = FeatureFusion(encoder_models, merge_function, prediction_head, loss_function=torch.nn.CrossEntropyLoss())

for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})

{'prediction': torch.Size([100, 5])}
views:rep ----
{'view 1': torch.Size([100, 32])}
{'view 2': torch.Size([100, 32])}
{'view 3': torch.Size([100, 32])}
{'joint_rep': torch.Size([100, 96])}


In [18]:
merge_function = MergeModule(EMBEDDING_DIC, mode="concat", adaptive=True, features=False)

mv_model = FeatureFusion(encoder_models, merge_function, prediction_head)

output_ = mv_model( {"view 1":V1, "view 2": V2, "view 3": V3})
for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})

{'prediction': torch.Size([100, 5])}
views:rep ----
{'view 1': torch.Size([100, 32])}
{'view 2': torch.Size([100, 32])}
{'view 3': torch.Size([100, 32])}
{'joint_rep': torch.Size([100, 32])}
{'att_views': torch.Size([100, 3, 1])}


### Including Multi-loss

In [19]:
from mvlearning.fusion import FeatureFusionMultiLoss

mv_model = FeatureFusionMultiLoss(encoder_models, merge_function, prediction_head, loss_function=torch.nn.CrossEntropyLoss(),
                                  multiloss_weights={"view 1": 1, "view 2": 0 , "view 3": 0.1})
mv_model

FeatureFusionMultiLoss(
  (views_encoder): ModuleDict(
    (view 1): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 2): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(3, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 3): Generic_Encoder(
      (pre_encoder): MLP(
        (layers): Sequential(
          (0): Sequential(
            (0): Linear(in_features=8, out_features=128, bias=True)
            (1): ReLU()
            (2): Identity()
            (3): Identity()
          )
          (1): Sequential(
            (0): Linear(in

In [20]:
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

{'objective': tensor(2.2039, grad_fn=<AddBackward0>),
 'lossmain': tensor(1.6128, grad_fn=<NllLossBackward0>),
 'lossaux': tensor(1.7733, grad_fn=<AddBackward0>),
 'lossview 1': tensor(1.6120, grad_fn=<MulBackward0>),
 'lossview 3': tensor(0.1612, grad_fn=<MulBackward0>)}

## Hybrid Fusion

In [22]:
from mvlearning.single.models import create_model
from mvlearning.merge_module import MergeModule
from mvlearning.utils import get_dic_emb_dims

DIM_EMBEDDING = 32

encoder_models = {}
for name, inp_dim in {"view 1": D1, "view 2":D2}.items():
    encoder_models[name] = create_model(inp_dim, DIM_EMBEDDING, model_type="lstm")
encoder_models["view 3"] = create_model(D3, DIM_EMBEDDING, model_type="mlp") #different model architecture
print(encoder_models.keys())

EMBEDDING_DIC= get_dic_emb_dims(encoder_models)
print(EMBEDDING_DIC)


merge_function = MergeModule(EMBEDDING_DIC, mode="concat", adaptive=True, features=False)
prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)

#fusion_module = MergeModule(EMBEDDING_DIC, mode="concat")
#prediction_head = create_model(sum(EMBEDDING_DIC.values()), n_labels, model_type="mlp", encoder=False)

merge_function

dict_keys(['view 1', 'view 2', 'view 3'])
{'view 1': 32, 'view 2': 32, 'view 3': 32}


MergeModule(
  (stacker_function): Stacking_()
  (concater_function): Concatenate_()
  (attention_function): Linear(in_features=96, out_features=3, bias=True)
)

In [23]:
from mvlearning.fusion import HybridFusion_FD

mv_model = HybridFusion_FD(encoder_models, merge_function, prediction_head, loss_function=torch.nn.CrossEntropyLoss())
mv_model

HybridFusion_FD(
  (views_encoder): ModuleDict(
    (view 1): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 2): Generic_Encoder(
      (pre_encoder): RNNet(
        (rnn): LSTM(3, 128, num_layers=2, batch_first=True)
        (fc): Sequential(
          (0): Identity()
        )
      )
      (linear_layer): Linear(in_features=128, out_features=32, bias=True)
      (normalization_layer): Identity()
    )
    (view 3): Generic_Encoder(
      (pre_encoder): MLP(
        (layers): Sequential(
          (0): Sequential(
            (0): Linear(in_features=8, out_features=128, bias=True)
            (1): ReLU()
            (2): Identity()
            (3): Identity()
          )
          (1): Sequential(
            (0): Linear(in_featur

In [24]:
output_ = mv_model( {"view 1":V1, "view 2": V2, "view 3": V3})
for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})
        
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

views:rep ----
{'view 1': torch.Size([100, 32])}
{'view 2': torch.Size([100, 32])}
{'view 3': torch.Size([100, 32])}
{'joint_rep': torch.Size([100, 32])}
{'att_views': torch.Size([100, 3, 1])}
views:prediction ----
{'view 1': torch.Size([100, 5])}
{'view 2': torch.Size([100, 5])}
{'view 3': torch.Size([100, 5])}
fusion:prediction ----
{'feat': torch.Size([100, 5])}
{'dec': torch.Size([100, 5])}
{'prediction': torch.Size([100, 5])}


{'objective': tensor(1.6100, grad_fn=<AddBackward0>),
 'lossmain': tensor(1.6100, grad_fn=<NllLossBackward0>),
 'lossaux': 0}

## Ensemble aggregation
> or pooling of single-view models

In [25]:
from mvlearning.single.models import create_model

DIM_EMBEDDING = 32

prediction_models= {}
for name, inp_dim in {"view 1": D1, "view 2":D2}.items():
    encoder_model = create_model(inp_dim, DIM_EMBEDDING, model_type="lstm")
    prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)
    prediction_models[name] = torch.nn.Sequential(encoder_model, prediction_head)

encoder_model = create_model(D3, DIM_EMBEDDING, model_type="mlp") #different model architecture
prediction_head = create_model(DIM_EMBEDDING, n_labels, model_type="mlp", encoder=False)
prediction_models["view 3"] = torch.nn.Sequential(encoder_model, prediction_head)

prediction_models.keys()

dict_keys(['view 1', 'view 2', 'view 3'])

In [26]:
from mvlearning.fusion import SingleViewPool

mv_model = SingleViewPool(prediction_models, loss_function=torch.nn.CrossEntropyLoss())
mv_model

SingleViewPool(
  (views_encoder): ModuleDict(
    (view 1): Sequential(
      (0): Generic_Encoder(
        (pre_encoder): RNNet(
          (rnn): LSTM(5, 128, num_layers=2, batch_first=True)
          (fc): Sequential(
            (0): Identity()
          )
        )
        (linear_layer): Linear(in_features=128, out_features=32, bias=True)
        (normalization_layer): Identity()
      )
      (1): Generic_Decoder(
        (pre_decoder): MLP(
          (layers): Sequential(
            (0): Sequential(
              (0): Linear(in_features=32, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
            (1): Sequential(
              (0): Linear(in_features=128, out_features=128, bias=True)
              (1): ReLU()
              (2): Identity()
              (3): Identity()
            )
          )
        )
        (linear_layer): Linear(in_features=128, out_features=5, bias=True)
      )
    )
   

In [27]:
output_ = mv_model( {"view 1":V1, "view 2": V2, "view 3": V3})
for k,v in output_.items():
    if type(v) == dict:
        print(k, "----")
        for k_, v_ in v.items():
            print({k_: v_.size()})
    else:
        print({k: v.size()})
        
mv_model.loss_batch({
    "views": {"view 1":V1, "view 2": V2, "view 3": V3},
    "target": labels
})

views:prediction ----
{'view 1': torch.Size([100, 5])}
{'view 2': torch.Size([100, 5])}
{'view 3': torch.Size([100, 5])}


{'objective': tensor(4.8503, grad_fn=<AddBackward0>),
 'lossview 1': tensor(1.6117, grad_fn=<NllLossBackward0>),
 'lossview 2': tensor(1.6145, grad_fn=<NllLossBackward0>),
 'lossview 3': tensor(1.6241, grad_fn=<NllLossBackward0>)}