In [1]:
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch
from fusion_models import TFN, LMF, AdaptiveRankFusion
from datasets import get_cmu_mosi_dataset

In [2]:
device='cuda'
DTYPE=torch.float32
train_set, valid_set, test_set = get_cmu_mosi_dataset(binary=True, device=device, dtype=DTYPE)

In [3]:
batch_size=32
train_dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_set, batch_size=len(valid_set))
test_dataloader = DataLoader(test_set, batch_size=len(test_set))

In [4]:
epochs = 50
input_dims = (5, 20, 300)
hidden_dims = (4, 16, 128)
text_out = 64
dropouts = (0.3, 0.3, 0.3, 0.3)
rank = 4
output_dim = 1
model = AdaptiveRankFusion(input_dims, hidden_dims, dropouts, output_size=1, max_rank=10, prior_type='half_cauchy', eta=0.01, device=device, dtype=DTYPE)



In [5]:
model.fusion_layer.weight_tensor.factor_prior_distributions[0].log_prob(model.fusion_layer.weight_tensor.factors[0])

tensor(-6.3729, device='cuda:0', grad_fn=<SumBackward1>)

In [6]:
criterion = nn.BCEWithLogitsLoss()
factor_lr = 0.0005
lr = 0.001
subnet_params = list(model.audio_subnet.parameters()) + list(model.video_subnet.parameters()) + list(model.text_subnet.parameters())
optimizer = optim.Adam([{"params": subnet_params, "lr": lr}, 
                        {"params": list(model.fusion_layer.parameters()), "lr": factor_lr}])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, verbose=True)
epochs = 300
for e in range(epochs):
    print('Epoch {}'.format(e))
    train_loss = 0.0
    model.train()
    print(model.fusion_layer.weight_tensor.rank_parameter)
    for text, audio, vision, label in train_dataloader:
        model.zero_grad()
        output = model(audio, vision, text)
        loss = criterion(output, label) - 0.0001 * model.fusion_layer.get_log_prior()
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(train_loss)
    model.eval()
    for text, audio, vision, label in valid_dataloader:
        output = model(audio, vision, text)
        valid_loss = criterion(output, label).item()
    print(valid_loss)
    scheduler.step(valid_loss)

Epoch 0
Parameter containing:
tensor([[0.2760, 0.2565, 0.2493, 0.2274, 0.2766, 0.2914, 0.2627, 0.2551, 0.2370,
         0.2497]], device='cuda:0', requires_grad=True)
28.095858335494995
0.6251713633537292
Epoch 1
Parameter containing:
tensor([[0.2535, 0.2343, 0.2265, 0.2048, 0.2537, 0.2689, 0.2399, 0.2323, 0.2143,
         0.2269]], device='cuda:0', requires_grad=True)
24.905487626791
0.5518817901611328
Epoch 2
Parameter containing:
tensor([[0.2262, 0.2183, 0.1971, 0.1753, 0.2242, 0.2411, 0.2106, 0.2039, 0.1852,
         0.1974]], device='cuda:0', requires_grad=True)
23.234469801187515
0.5189859867095947
Epoch 3
Parameter containing:
tensor([[0.1983, 0.2013, 0.1662, 0.1442, 0.1930, 0.2126, 0.1800, 0.1743, 0.1553,
         0.1664]], device='cuda:0', requires_grad=True)
21.854266434907913
0.48302486538887024
Epoch 4
Parameter containing:
tensor([[0.1712, 0.1845, 0.1351, 0.1127, 0.1614, 0.1847, 0.1488, 0.1445, 0.1266,
         0.1349]], device='cuda:0', requires_grad=True)
20.378897994756

In [4]:
model.fusion_layer.weight_tensor.factors[0].device

device(type='cuda', index=0)

In [6]:
batch_size = 32
epochs = 50
# settings from https://github.com/Justin1904/TensorFusionNetworks/blob/master/train.py
input_dims = (5, 20, 300)
hidden_dims = (4, 16, 128)
text_out = 64
dropouts = (0.3, 0.3, 0.3, 0.3)
post_fusion_dim = 32
model = TFN(input_dims, hidden_dims, text_out, dropouts, post_fusion_dim, device=device, dtype=DTYPE)

In [7]:
epochs = 30
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(list(model.parameters())[2:])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)
for e in range(epochs):
    print('Epoch {}'.format(e))
    train_loss = 0.0
    model.train()
    for text, audio, vision, label in train_dataloader:
        model.zero_grad()
        output = model(audio, vision, text)
        loss = criterion(output, label)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(train_loss)
    
    model.eval()
    for text, audio, vision, label in valid_dataloader:
        output = model(audio, vision, text)
        valid_loss = criterion(output, label).item()
    scheduler.step(valid_loss)
    print(valid_loss)

Epoch 0
27.746791183948517
0.5983000993728638
Epoch 1
22.599841356277466
0.5741891860961914
Epoch 2
20.9776431620121
0.5182520747184753
Epoch 3
18.447107419371605
0.5316178798675537
Epoch 4
16.16508023440838
0.5781940817832947
Epoch 5
13.750149548053741
0.5866302847862244
Epoch 6
12.41133339703083
Epoch     7: reducing learning rate of group 0 to 1.0000e-04.
0.5868079662322998
Epoch 7
9.790406368672848
0.6064910292625427
Epoch 8
9.155158430337906
0.6138084530830383
Epoch 9
8.670896269381046
0.6321585178375244
Epoch 10
8.77745607495308
Epoch    11: reducing learning rate of group 0 to 1.0000e-05.
0.6368790864944458
Epoch 11
8.624453119933605
0.6399129629135132
Epoch 12
8.29000923037529
0.6426395177841187
Epoch 13
8.115225300192833
0.6466211080551147
Epoch 14
7.943665310740471
Epoch    15: reducing learning rate of group 0 to 1.0000e-06.
0.64751797914505
Epoch 15
7.860414206981659
0.6442150473594666
Epoch 16
8.334999352693558
0.6477405428886414
Epoch 17
7.960166782140732
0.64896446466445

In [7]:
batch_size = 32
epochs = 50
# settings from https://github.com/Justin1904/TensorFusionNetworks/blob/master/train.py
input_dims = (5, 20, 300)
hidden_dims = (4, 16, 128)
text_out = 64
dropouts = (0.3, 0.3, 0.3, 0.3)
rank = 4
output_dim = 1
model = LMF(input_dims, hidden_dims, text_out, dropouts, output_dim, rank, use_softmax=False, device=device, dtype=DTYPE)

In [8]:
criterion = nn.BCEWithLogitsLoss()
factors = list(model.parameters())[:3]
other = list(model.parameters())[3:]
factor_lr = 0.0005
lr = 0.001
optimizer = optim.Adam([{"params": factors, "lr": factor_lr}, {"params": other, "lr": lr}])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)
for e in range(epochs):
    print('Epoch {}'.format(e))
    train_loss = 0.0
    model.train()
    for text, audio, vision, label in train_dataloader:
        model.zero_grad()
        output = model(audio, vision, text)
        loss = criterion(output, label)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(train_loss)
    
    model.eval()
    for text, audio, vision, label in valid_dataloader:
        output = model(audio, vision, text)
        valid_loss = criterion(output, label).item()
    scheduler.step(valid_loss)
    print(valid_loss)

Epoch 0
27.707884550094604
0.6103724837303162
Epoch 1
25.751307010650635
0.5993456244468689
Epoch 2
21.946386218070984
0.506530225276947
Epoch 3
19.738655865192413
0.5119958519935608
Epoch 4
17.642503082752228
0.4785168170928955
Epoch 5
16.091687828302383
0.5398954153060913
Epoch 6
14.24790771305561
0.5091980695724487
Epoch 7
11.752081230282784
0.49786096811294556
Epoch 8
10.936970323324203
0.6509965658187866
Epoch 9
8.470754906535149
0.7462157011032104
Epoch 10
6.700329817831516
Epoch    11: reducing learning rate of group 0 to 5.0000e-05.
Epoch    11: reducing learning rate of group 1 to 1.0000e-04.
0.6205970048904419
Epoch 11
4.487218387424946
0.656859278678894
Epoch 12
4.011319886893034
0.7138518691062927
Epoch 13
2.7622850690968335
0.7538882493972778
Epoch 14
2.608662152197212
0.8387965559959412
Epoch 15
2.2656643863301724
0.8688085675239563
Epoch 16
1.9815785735845566
Epoch    17: reducing learning rate of group 0 to 5.0000e-06.
Epoch    17: reducing learning rate of group 1 to 1

In [11]:
model.fusion_layer.weight_tensor.factors[0].dtype

torch.float32

In [27]:
list(model.fusion_layer.rank_param)

[tensor(0.0361, grad_fn=<UnbindBackward>),
 tensor(0.4688, grad_fn=<UnbindBackward>),
 tensor(0.1688, grad_fn=<UnbindBackward>),
 tensor(0.7413, grad_fn=<UnbindBackward>),
 tensor(0.5441, grad_fn=<UnbindBackward>),
 tensor(0.8754, grad_fn=<UnbindBackward>),
 tensor(0.3675, grad_fn=<UnbindBackward>),
 tensor(0.9385, grad_fn=<UnbindBackward>),
 tensor(0.8079, grad_fn=<UnbindBackward>),
 tensor(0.3892, grad_fn=<UnbindBackward>)]

In [16]:
model.fusion_layer.rank_param

Parameter containing:
tensor([0.0560, 0.3947, 0.4498, 0.4022, 0.3426, 0.5294, 0.5075, 0.6149, 0.6144,
        0.5896], requires_grad=True)