In [1]:
import sys
import os

# 获取当前 Notebook 的目录（根目录）
notebook_dir = os.getcwd()

# 将 ALMT/models 目录添加到 sys.path
sys.path.append(os.path.join(notebook_dir, "ALMT", "models"))
sys.path.append(os.path.join(notebook_dir, "ALMT", "core"))
sys.path.append(os.path.join(notebook_dir, "ALMT"))

In [10]:
del sys.modules['almt_layer']
del sys.modules['dataset']

In [2]:
import torch
from torch import nn
from almt_layer import Transformer,CrossTransformerEncoder, HhyperLearningEncoder, CrossTransformer, Transformer
from bert import BertTextEncoder
from einops import repeat
from dataset import MMDataset
from core.scheduler import GradualWarmupScheduler
from metric import MetricsTop


import numpy as np
import torch.optim as optim
from torchvision import datasets, models, transforms

from torch.utils.data import DataLoader, Dataset
import torch.optim.lr_scheduler as lr_scheduler
from PIL import Image

import time
from tqdm import tqdm
from torch.distributions import Bernoulli
import csv



2025-04-24 19:01:33.714981: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-24 19:01:33.935352: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


PRO TIP 💡 Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.



In [74]:
import gc

def clear_memory():
    torch.cuda.empty_cache()  # 释放 GPU 缓存
    gc.collect()  # 释放 CPU 内存

clear_memory()

In [4]:
train_dataset = MMDataset(dataset='chsims',mode='train', image_num=5, generate_num = 4)
valid_dataset = MMDataset(dataset='chsims',mode='test', image_num=5, generate_num = 1)

100%|██████████| 1368/1368 [28:40<00:00,  1.26s/it]
100%|██████████| 457/457 [02:30<00:00,  3.04it/s]


In [75]:
valid_dataset = MMDataset(dataset='chsims',mode='valid', image_num=5, generate_num = 1)

100%|██████████| 456/456 [02:35<00:00,  2.93it/s]


In [9]:
torch.save(train_dataset, '/scratch/song.xinwe/chsims/train_dataset.pt')
torch.save(valid_dataset, '/scratch/song.xinwe/chsims/valid_dataset.pt')

In [4]:
train_dataset = torch.load('/scratch/song.xinwe/chsims/train_dataset.pt')
valid_dataset = torch.load('/scratch/song.xinwe/chsims/valid_dataset.pt')

  train_dataset = torch.load('/scratch/song.xinwe/chsims/train_dataset.pt')
  valid_dataset = torch.load('/scratch/song.xinwe/chsims/valid_dataset.pt')


In [76]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
val_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=8)

In [77]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [78]:
class CrossAttentionTA(nn.Module):
    def __init__(self, dim=128, heads=8, mlp_dim=128):
        super().__init__()
        self.cross_attn = CrossTransformer(source_num_frames=8, tgt_num_frames=8, dim=dim, depth=1, heads=heads, mlp_dim=mlp_dim)

    def forward(self, h_t, h_a):
        out = self.cross_attn(h_t, h_a)  # [B, 9, D]
        return out  # keep [CLS] and body for downstream split

class GateController(nn.Module):
    def __init__(self, dim=128):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(dim, dim),
            nn.ReLU(),
            nn.Linear(dim, 1),
            #nn.Sigmoid()
        )

    def forward(self, state):
        # GateController.forward 中添加
        temperature = 1.
        p = torch.sigmoid(self.fc(state) / temperature)  
        #p = self.fc(state)
        dist = Bernoulli(probs=p)
        action = dist.sample()  # [B, 1]
        log_prob = dist.log_prob(action)  # [B, 1]
        return action, log_prob, p
        

In [79]:
class EMALMTBlock(nn.Module):
    def __init__(self, dim=128, heads=8, mlp_dim=128, dropout=0.):
        super(EMALMTBlock, self).__init__()

        # 融合模块（ALMT核心）
        self.fusion = HhyperLearningEncoder(dim=dim, depth=1, heads=heads, dim_head=16, dropout=dropout)

        # 视觉生成模块（E步）
        self.visual_predictor = Transformer(
            num_frames=16,
            save_hidden=False,
            token_len=None,  # 直接输出 token_len 长度的序列
            dim=dim,
            depth=1,
            heads=heads,
            mlp_dim=mlp_dim,
            dropout=dropout
        )

        # 视觉反馈映射模块（M步）
        self.feedback_proj = nn.Sequential(
            nn.Linear(dim, dim),
            nn.GELU(),
            nn.Linear(dim, dim)
        )
        
        self.cross_ta = CrossAttentionTA(dim=dim, heads=heads, mlp_dim=mlp_dim)
        self.gate_controller = GateController(dim=2*dim)  # one per layer


    def forward(self, h_v_list, h_l, h_a, h_hyper_v, layer_idx):
        """
        输入：
        - h_v_list: List of vision encoder layer outputs
        - h_l, h_a: 融合输入
        - h_hyper_v: 上一层融合结果
        - layer_idx: 当前层 index，用于选择要更新的视觉层
        输出：
        - h_hyper_v: 当前层融合输出
        - h_v_list: 修正后的视觉层列表
        """
        # 获取 TA 融合的 [CLS] token 表示


        h_ta_full = self.cross_ta(h_l, h_a)  # [B, 9, D] with [CLS] + 8 tokens
        gate_input = torch.cat([h_hyper_v[:,0],h_ta_full[:, 0]] ,dim=-1) 
        h_ta = h_ta_full[:, 1:]  # [B, 8, D] - remaining tokens for gated fusion
        gate, log_prob, prob = self.gate_controller(gate_input)  # [B, 1]
        gate = gate.unsqueeze(2)  # [B, 1, 1]

        # Apply gate to h_ta
        h_ta_gated = gate * h_ta

        h_hyper_v = self.fusion([h_v_list[layer_idx]], h_l, h_a, h_hyper_v)
        
        h_hyper_v = h_hyper_v + h_ta_gated

        # === E步：基于融合结果预测视觉表示
        h_v_feature = self.visual_predictor(h_hyper_v)  # (B, token_len, dim)
        
        if layer_idx+1<len(h_v_list):
            # === M步：将 h_v_feature 映射并反馈更新 h_v_list[layer_idx]
            h_v_feedback = self.feedback_proj(h_v_feature)  # (B, token_len, dim)

            h_v_list[layer_idx+1] = h_v_list[layer_idx+1] + h_v_feedback
            

        #return h_hyper_v, h_v_list, h_v_feature, log_prob, gate
        return h_hyper_v, h_v_list, h_v_feature, log_prob, gate, prob





class VisionFeatureAggregator(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=dim, nhead=8),
            num_layers=1
        )

    def forward(self, x):  
        return self.transformer(x)
    
    
class ResNetWithDropout(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super().__init__()
        resnet18 = models.resnet18(pretrained=True)
        self.features = nn.Sequential(*list(resnet18.children())[:-1])  # 去掉最后一层 FC
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.features(x)
        x = x.squeeze(-1).squeeze(-1)
        x = self.dropout(x)
        return x


class Model(nn.Module):
    def __init__(self, AHL_depth=3, fusion_layer_depth=4, bert_pretrained='bert-base-chinese'):
        super(Model, self).__init__()

        self.h_hyper = nn.Parameter(torch.ones(1, 8, 128))

        # 基础编码器
        self.bertmodel = BertTextEncoder(use_finetune=True, transformers='bert', pretrained=bert_pretrained)
        self.img_extractor = ResNetWithDropout(dropout_rate=0.1)
        self.vf_aggregator = VisionFeatureAggregator(dim=512)

        # 投影层
        self.proj_l0 = nn.Sequential(nn.Linear(768, 128),nn.Dropout(0.1))
        self.proj_a0 = nn.Sequential(nn.Linear(33, 128),nn.Dropout(0.1))
        self.proj_v0 = nn.Sequential(nn.Linear(512, 128),nn.Dropout(0.1))

        # 序列处理
        self.proj_l = Transformer(num_frames=39, save_hidden=False, token_len=8, dim=128, depth=1, heads=8, mlp_dim=128)
        self.proj_a = Transformer(num_frames=400, save_hidden=False, token_len=8, dim=128, depth=1, heads=8, mlp_dim=128)
        self.proj_v = Transformer(num_frames=5, save_hidden=False, token_len=8, dim=128, depth=1, heads=8, mlp_dim=128)

        

        # 视觉主干编码器（输出多个中间层）
        self.vision_encoder = Transformer(num_frames=8, save_hidden=True, token_len=None, dim=128, depth=AHL_depth-1, heads=8, mlp_dim=128)

        # 新增：多层 EMALMTBlock 替代 h_hyper_layer_v
        self.em_almt_blocks = nn.ModuleList([
            EMALMTBlock(dim=128, heads=8, mlp_dim=128, dropout=0.)
            for _ in range(AHL_depth)
        ])

        # 跨模态融合 & 情感预测
        self.fusion_layer = CrossTransformer(source_num_frames=8, tgt_num_frames=8, dim=128, depth=fusion_layer_depth, heads=8, mlp_dim=128)
        self.cls_head = nn.Linear(128, 1)
        
        self.vision_feature_extractor = nn.Sequential(
            Transformer(num_frames=8, save_hidden=False, token_len=1, dim=128, depth=1, heads=8, mlp_dim=64, dropout=0.),   
            #nn.Dropout(0.3)
        )

        
    def forward(self, x_visual, x_audio, x_text):
        
        log_probs, gates, probs = [], [], []

        b = x_visual.size(0)
        h_hyper_v = repeat(self.h_hyper, '1 n d -> b n d', b=b)

        # 视觉特征提取
        x_visual = self.img_extractor(x_visual.view(-1, 3, 224, 224))
        x_visual = x_visual.view(b, 5, 512)
        x_visual = self.proj_v0(self.vf_aggregator(x_visual))



        x_audio = self.proj_a0(x_audio)
        x_text = self.bertmodel(x_text)
        x_text = self.proj_l0(x_text)

        h_v = self.proj_v(x_visual)[:, :8]
        h_a = self.proj_a(x_audio)[:, :8]
        h_l = self.proj_l(x_text)[:, :8]
        
        
        
        h_v_list = list(self.vision_encoder(h_v))  # 多层视觉表示
        

        for i, block in enumerate(self.em_almt_blocks):
            h_hyper_v, h_v_list, h_v_feature, log_prob, gate, prob = block(h_v_list, h_l, h_a, h_hyper_v, layer_idx=i)
            log_probs.append(log_prob)
            gates.append(gate)
            probs.append(prob)

        # 情感预测
        feat = self.fusion_layer(h_hyper_v, h_v_list[-1])[:, 0]
        output = self.cls_head(feat)

        return output, self.vision_feature_extractor(h_v_feature)[:,0], log_probs, gates, probs


In [80]:
model = Model().to(device)

emo_loss_fn = torch.nn.MSELoss()

def SupervisedContrastiveLoss(h_v_cls, labels, sigma=0.1):
    """
    :param h_v_cls: (B, D) - visual embeddings after projection (final representation)
    :param labels: (B,) - continuous sentiment labels
    :param sigma: float - Gaussian kernel width (smaller = stricter similarity)
    :return: scalar contrastive loss
    """
    batch_size = h_v_cls.shape[0]

    # Normalize embeddings
    h_v_cls = torch.nn.functional.normalize(h_v_cls, dim=1)  # (B, D)

    # Cosine similarity matrix: (B, B)
    similarity_matrix = torch.cosine_similarity(h_v_cls.unsqueeze(1), h_v_cls.unsqueeze(0), dim=-1)

    # Compute label distance matrix
    labels = labels.contiguous().view(-1, 1)  # (B, 1)
    label_diff = labels - labels.T  # (B, B)

    # Gaussian weight based on label similarity
    weight_matrix = torch.exp(- (label_diff ** 2) / (2 * sigma ** 2)).to(h_v_cls.device)  # (B, B)

    # Exponential of cosine similarity
    exp_sim = torch.exp(similarity_matrix)  # (B, B)

    # Avoid self-comparison by masking diagonal to 0
    identity_mask = torch.eye(batch_size, device=h_v_cls.device)
    weight_matrix = weight_matrix * (1 - identity_mask)
    exp_sim = exp_sim * (1 - identity_mask)

    # Compute weighted contrastive loss
    numerator = torch.sum(exp_sim * weight_matrix, dim=1)  # (B,)
    denominator = torch.sum(exp_sim, dim=1) + 1e-8          # (B,)

    loss = -torch.log(numerator / denominator + 1e-8)       # (B,)
    return loss.mean()




In [40]:
def count_parameters(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total:,}")
    print(f"Trainable parameters: {trainable:,}")

# 示例用法
count_parameters(model)


Total parameters: 122,167,556
Trainable parameters: 122,167,556


In [81]:
def save_validation_metrics(epoch, metrics, save_path="metrics_log_chsims.csv"):
    """
    保存每轮验证的指标到 CSV 文件。
    
    Args:
        epoch (int): 当前 epoch
        metrics (dict): 包含各类验证指标的字典
        save_path (str): 保存路径
    """
    file_exists = os.path.exists(save_path)
    header = ["epoch"] + list(metrics.keys())

    with open(save_path, "a", newline="") as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(header)
        writer.writerow([epoch] + [metrics[k] for k in header[1:]])


In [82]:
best_val_mae = float("inf")

def save_best_model(model, mae, save_path='best_model_chsims.pt'):
    global best_val_mae
    if mae < best_val_mae:
        best_val_mae = mae
        torch.save(model.state_dict(), save_path)
        print(f"✅ Saved best model (MAE = {mae:.4f}) to {save_path}")


In [83]:
# =========================
# 设置训练参数
# =========================
epochs = 200
train_mae, val_mae = [], []

optimizer = torch.optim.AdamW(model.parameters(),lr=1e-4, weight_decay=1e-4)
scheduler_steplr = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=0.9 * epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=1, total_epoch=0.05 * epochs, after_scheduler=scheduler_steplr)


# =========================
# AverageMeter 类用于记录损失
# =========================
class AverageMeter(object):
    def __init__(self):
        self.value = 0
        self.value_avg = 0
        self.value_sum = 0
        self.count = 0

    def reset(self):
        self.value = 0
        self.value_avg = 0
        self.value_sum = 0
        self.count = 0

    def update(self, value, count):
        self.value = value
        self.value_sum += value * count
        self.count += count
        self.value_avg = self.value_sum / self.count

# =========================
# Train 函数
# =========================
def train(model, train_loader, optimizer, epoch):
    train_pbar = tqdm(enumerate(train_loader), total=len(train_loader), dynamic_ncols=True)
    losses = AverageMeter()
    l1s = AverageMeter()
    l2s = AverageMeter()
    rls = AverageMeter()

    y_pred, y_true = [], []

    model.train()
    moving_baseline = 0.6
    alpha = 0.9
    
    lambda_recon = 0.3
    

        
    lambda_rl = 0.5

    for cur_iter, data in train_pbar:
        img = data['images'].to(device)
        audio = data['audio'].to(device)
        text = data['text'].to(device)
        label = data['labels'].to(device).view(-1, 1)
        batchsize = img.shape[0]

        optimizer.zero_grad()

        output, h_v_generated, log_probs, gates, probs = model(img, audio, text)

        l1 = emo_loss_fn(output, label)
        l2 = SupervisedContrastiveLoss(h_v_generated, label)

        # === REINFORCE ===
        mae_clamped = l1.detach().clamp(0, 2)
        reward = 1.0 - mae_clamped / 2.0
        advantage = reward - moving_baseline
        loss_rl = sum([-(lp.squeeze() * advantage).mean() for lp in log_probs])
        moving_baseline = alpha * moving_baseline + (1 - alpha) * reward.item()

       

        # === 总损失 ===
        loss = l1 + lambda_recon * l2 + lambda_rl * loss_rl
        loss.backward()
        optimizer.step()

        losses.update(loss.item(), batchsize)
        l1s.update(l1.item(), batchsize)
        l2s.update(l2.item(), batchsize)
        rls.update(loss_rl.item(), batchsize)

        y_pred.append(output.cpu())
        y_true.append(label.cpu())

        train_pbar.set_description(f'train [Epoch {epoch}]')
        train_pbar.set_postfix({
            'loss': '{:.5f}'.format(losses.value_avg),
            'emo_loss': '{:.4f}'.format(l1s.value_avg),
            'visual_loss': '{:.4f}'.format(l2s.value_avg),
            'RL_loss': '{:.4f}'.format(rls.value_avg),
            'λ_recon': '{:.2f}'.format(lambda_recon),
            'λ_RL': '{:.2f}'.format(lambda_rl),
            'lr': '{:.2e}'.format(optimizer.param_groups[0]['lr'])
        }, refresh=False)

    pred, true = torch.cat(y_pred), torch.cat(y_true)
    mae = torch.mean(torch.abs(pred - true)).item()
    tqdm.write(f"train MAE: {mae:.4f}")
    train_mae.append(mae)



# =========================
# Evaluate 函数
# =========================
def evaluate(model, eval_loader, optimizer, epoch):
    metric = MetricsTop()
    test_pbar = tqdm(enumerate(eval_loader), total=len(eval_loader))
    losses = AverageMeter()
    l1s = AverageMeter()
    l2s = AverageMeter()

    y_pred, y_true = [], []

    model.eval()
    with torch.no_grad():
        for cur_iter, data in test_pbar:
            img = data['images'].to(device)
            audio = data['audio'].to(device)
            text = data['text'].to(device)
            label = data['labels'].to(device).view(-1, 1)
            batchsize = img.shape[0]

            output, h_v_generated, log_probs, gates, probs = model(img, audio, text)           

            l1 = emo_loss_fn(output, label)
            l2 = SupervisedContrastiveLoss(h_v_generated, label)

            
            lambda_recon = 0.3
            
                
            loss = l1 + lambda_recon * l2
            
            y_pred.append(output.cpu())
            y_true.append(label.cpu())

            losses.update(loss.item(), batchsize)
            l1s.update(l1.item(), batchsize)
            l2s.update(l2.item(), batchsize)


            test_pbar.set_description('eval')
            test_pbar.set_postfix({
                'loss': '{:.5f}'.format(losses.value_avg),
                'emo_loss': '{:.4f}'.format(l1s.value_avg),
                'visual_loss': '{:.4f}'.format(l2s.value_avg),
                'lambda_recon': '{:.2f}'.format(lambda_recon),
                'lr': '{:.2e}'.format(optimizer.param_groups[0]['lr'])
            }, refresh=False)

        pred, true = torch.cat(y_pred), torch.cat(y_true)
        eval_func = metric.getMetics("SIMS")
        eval_results = eval_func(pred, true)
        
        print(f"Evaluation Results:")
        for key,value in eval_results.items():
            print(f"{key}: {value:.4f}")
        
        gate_stats = torch.cat(gates, dim=1)  # [B, num_layers]
        gate_ratio = gate_stats.float().mean(dim=0).cpu().numpy()

        print('Gate Activation Ratio:', gate_ratio)
        
        save_validation_metrics(epoch,eval_results)
        save_best_model(model, eval_results['MAE'])


In [None]:
for epoch in range(1, epochs + 1):
    train(model, train_loader, optimizer, epoch)
    evaluate(model, val_loader, optimizer, epoch)  
    scheduler_warmup.step()

train [Epoch 1]: 100%|██████████| 86/86 [01:20<00:00,  1.07it/s, loss=1.35668, emo_loss=0.7340, visual_loss=2.0604, RL_loss=0.0091, λ_recon=0.30, λ_RL=0.50, lr=0.00e+00]


train MAE: 0.7392


eval: 100%|██████████| 8/8 [00:09<00:00,  1.23s/it, loss=1.36490, emo_loss=0.7323, visual_loss=2.1087, lambda_recon=0.30, lr=0.00e+00]


Evaluation Results:
Mult_acc_2: 0.3048
Mult_acc_3: 0.3048
Mult_acc_5: 0.1513
F1_score: 0.4672
MAE: 0.7397
Corr: -0.0019
Gate Activation Ratio: [[       0.25]
 [      0.625]
 [        0.5]]
✅ Saved best model (MAE = 0.7397) to best_model_chsims.pt


train [Epoch 2]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=1.07665, emo_loss=0.4281, visual_loss=2.0582, RL_loss=0.0621, λ_recon=0.30, λ_RL=0.50, lr=1.00e-05]


train MAE: 0.5500


eval: 100%|██████████| 8/8 [00:09<00:00,  1.18s/it, loss=0.96359, emo_loss=0.3313, visual_loss=2.1076, lambda_recon=0.30, lr=1.00e-05]


Evaluation Results:
Mult_acc_2: 0.7390
Mult_acc_3: 0.6228
Mult_acc_5: 0.2741
F1_score: 0.7330
MAE: 0.4802
Corr: 0.5566
Gate Activation Ratio: [[      0.625]
 [       0.75]
 [      0.625]]
✅ Saved best model (MAE = 0.4802) to best_model_chsims.pt


train [Epoch 3]: 100%|██████████| 86/86 [01:17<00:00,  1.12it/s, loss=0.75355, emo_loss=0.1414, visual_loss=1.8953, RL_loss=0.0871, λ_recon=0.30, λ_RL=0.50, lr=2.00e-05]


train MAE: 0.2898


eval: 100%|██████████| 8/8 [00:10<00:00,  1.27s/it, loss=0.94416, emo_loss=0.3451, visual_loss=1.9970, lambda_recon=0.30, lr=2.00e-05]


Evaluation Results:
Mult_acc_2: 0.7303
Mult_acc_3: 0.6491
Mult_acc_5: 0.4057
F1_score: 0.7250
MAE: 0.4294
Corr: 0.5765
Gate Activation Ratio: [[       0.25]
 [      0.375]
 [       0.25]]
✅ Saved best model (MAE = 0.4294) to best_model_chsims.pt


train [Epoch 4]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.56492, emo_loss=0.0428, visual_loss=1.5857, RL_loss=0.0928, λ_recon=0.30, λ_RL=0.50, lr=3.00e-05]


train MAE: 0.1497


eval: 100%|██████████| 8/8 [00:11<00:00,  1.39s/it, loss=0.92735, emo_loss=0.3371, visual_loss=1.9674, lambda_recon=0.30, lr=3.00e-05]


Evaluation Results:
Mult_acc_2: 0.7390
Mult_acc_3: 0.6382
Mult_acc_5: 0.3991
F1_score: 0.7324
MAE: 0.4289
Corr: 0.5972
Gate Activation Ratio: [[        0.5]
 [      0.625]
 [      0.125]]
✅ Saved best model (MAE = 0.4289) to best_model_chsims.pt


train [Epoch 5]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.51054, emo_loss=0.0225, visual_loss=1.4716, RL_loss=0.0932, λ_recon=0.30, λ_RL=0.50, lr=4.00e-05]


train MAE: 0.1103


eval: 100%|██████████| 8/8 [00:09<00:00,  1.18s/it, loss=0.94289, emo_loss=0.3559, visual_loss=1.9566, lambda_recon=0.30, lr=4.00e-05]

Evaluation Results:
Mult_acc_2: 0.6952
Mult_acc_3: 0.5987
Mult_acc_5: 0.3925
F1_score: 0.6841
MAE: 0.4376
Corr: 0.5756
Gate Activation Ratio: [[       0.25]
 [        0.5]
 [      0.125]]



train [Epoch 6]: 100%|██████████| 86/86 [01:14<00:00,  1.16it/s, loss=0.48343, emo_loss=0.0167, visual_loss=1.4042, RL_loss=0.0909, λ_recon=0.30, λ_RL=0.50, lr=5.00e-05]

train MAE: 0.0949



eval: 100%|██████████| 8/8 [00:10<00:00,  1.28s/it, loss=0.92933, emo_loss=0.3399, visual_loss=1.9648, lambda_recon=0.30, lr=5.00e-05]


Evaluation Results:
Mult_acc_2: 0.7478
Mult_acc_3: 0.6579
Mult_acc_5: 0.4035
F1_score: 0.7423
MAE: 0.4249
Corr: 0.5897
Gate Activation Ratio: [[      0.375]
 [      0.625]
 [      0.125]]
✅ Saved best model (MAE = 0.4249) to best_model_chsims.pt


train [Epoch 7]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.47299, emo_loss=0.0147, visual_loss=1.3823, RL_loss=0.0873, λ_recon=0.30, λ_RL=0.50, lr=6.00e-05]


train MAE: 0.0896


eval: 100%|██████████| 8/8 [00:09<00:00,  1.19s/it, loss=0.92569, emo_loss=0.3319, visual_loss=1.9793, lambda_recon=0.30, lr=6.00e-05]

Evaluation Results:
Mult_acc_2: 0.7741
Mult_acc_3: 0.6623
Mult_acc_5: 0.4013
F1_score: 0.7774
MAE: 0.4298
Corr: 0.5741
Gate Activation Ratio: [[      0.375]
 [       0.25]
 [       0.25]]



train [Epoch 8]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.45536, emo_loss=0.0088, visual_loss=1.3507, RL_loss=0.0828, λ_recon=0.30, λ_RL=0.50, lr=7.00e-05]


train MAE: 0.0699


eval: 100%|██████████| 8/8 [00:09<00:00,  1.24s/it, loss=0.95196, emo_loss=0.3624, visual_loss=1.9651, lambda_recon=0.30, lr=7.00e-05]

Evaluation Results:
Mult_acc_2: 0.7390
Mult_acc_3: 0.6491
Mult_acc_5: 0.4342
F1_score: 0.7364
MAE: 0.4261
Corr: 0.5685
Gate Activation Ratio: [[       0.25]
 [       0.25]
 [      0.125]]



train [Epoch 9]: 100%|██████████| 86/86 [01:15<00:00,  1.15it/s, loss=0.47628, emo_loss=0.0243, visual_loss=1.3776, RL_loss=0.0775, λ_recon=0.30, λ_RL=0.50, lr=8.00e-05]


train MAE: 0.0973


eval: 100%|██████████| 8/8 [00:09<00:00,  1.17s/it, loss=1.00204, emo_loss=0.3960, visual_loss=2.0203, lambda_recon=0.30, lr=8.00e-05]

Evaluation Results:
Mult_acc_2: 0.6798
Mult_acc_3: 0.5702
Mult_acc_5: 0.3026
F1_score: 0.6702
MAE: 0.4981
Corr: 0.4545
Gate Activation Ratio: [[      0.375]
 [      0.625]
 [       0.25]]



train [Epoch 10]: 100%|██████████| 86/86 [01:17<00:00,  1.12it/s, loss=0.57731, emo_loss=0.0809, visual_loss=1.5277, RL_loss=0.0761, λ_recon=0.30, λ_RL=0.50, lr=9.00e-05]

train MAE: 0.1971



eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=0.98490, emo_loss=0.3873, visual_loss=1.9918, lambda_recon=0.30, lr=9.00e-05]

Evaluation Results:
Mult_acc_2: 0.7303
Mult_acc_3: 0.6228
Mult_acc_5: 0.3750
F1_score: 0.7295
MAE: 0.4696
Corr: 0.5148
Gate Activation Ratio: [[       0.25]
 [       0.75]
 [      0.125]]



train [Epoch 11]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.49206, emo_loss=0.0328, visual_loss=1.4079, RL_loss=0.0737, λ_recon=0.30, λ_RL=0.50, lr=1.00e-04]


train MAE: 0.1246


eval: 100%|██████████| 8/8 [00:10<00:00,  1.29s/it, loss=1.06949, emo_loss=0.4613, visual_loss=2.0273, lambda_recon=0.30, lr=1.00e-04]

Evaluation Results:
Mult_acc_2: 0.6689
Mult_acc_3: 0.5877
Mult_acc_5: 0.3750
F1_score: 0.6565
MAE: 0.4957
Corr: 0.4764
Gate Activation Ratio: [[       0.25]
 [      0.375]
 [      0.375]]



train [Epoch 12]: 100%|██████████| 86/86 [01:15<00:00,  1.14it/s, loss=0.53218, emo_loss=0.0653, visual_loss=1.4547, RL_loss=0.0610, λ_recon=0.30, λ_RL=0.50, lr=1.00e-04]

train MAE: 0.1627



eval: 100%|██████████| 8/8 [00:10<00:00,  1.29s/it, loss=1.10352, emo_loss=0.4916, visual_loss=2.0397, lambda_recon=0.30, lr=1.00e-04]

Evaluation Results:
Mult_acc_2: 0.6535
Mult_acc_3: 0.5811
Mult_acc_5: 0.3289
F1_score: 0.6398
MAE: 0.5277
Corr: 0.5083
Gate Activation Ratio: [[          0]
 [      0.375]
 [        0.5]]



train [Epoch 13]: 100%|██████████| 86/86 [01:14<00:00,  1.15it/s, loss=0.53380, emo_loss=0.0642, visual_loss=1.4517, RL_loss=0.0682, λ_recon=0.30, λ_RL=0.50, lr=1.00e-04]


train MAE: 0.1761


eval: 100%|██████████| 8/8 [00:09<00:00,  1.22s/it, loss=0.97135, emo_loss=0.3639, visual_loss=2.0247, lambda_recon=0.30, lr=1.00e-04]

Evaluation Results:
Mult_acc_2: 0.7412
Mult_acc_3: 0.6250
Mult_acc_5: 0.3925
F1_score: 0.7371
MAE: 0.4528
Corr: 0.5457
Gate Activation Ratio: [[          0]
 [      0.625]
 [      0.125]]



train [Epoch 14]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.45533, emo_loss=0.0175, visual_loss=1.3508, RL_loss=0.0652, λ_recon=0.30, λ_RL=0.50, lr=1.00e-04]


train MAE: 0.0892


eval: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it, loss=0.96727, emo_loss=0.3637, visual_loss=2.0119, lambda_recon=0.30, lr=1.00e-04]

Evaluation Results:
Mult_acc_2: 0.7478
Mult_acc_3: 0.6469
Mult_acc_5: 0.4123
F1_score: 0.7448
MAE: 0.4420
Corr: 0.5472
Gate Activation Ratio: [[          0]
 [       0.25]
 [       0.25]]



train [Epoch 15]: 100%|██████████| 86/86 [01:14<00:00,  1.15it/s, loss=0.43128, emo_loss=0.0057, visual_loss=1.3091, RL_loss=0.0657, λ_recon=0.30, λ_RL=0.50, lr=9.99e-05]


train MAE: 0.0536


eval: 100%|██████████| 8/8 [00:11<00:00,  1.45s/it, loss=0.98416, emo_loss=0.3817, visual_loss=2.0081, lambda_recon=0.30, lr=9.99e-05]

Evaluation Results:
Mult_acc_2: 0.7368
Mult_acc_3: 0.6272
Mult_acc_5: 0.4145
F1_score: 0.7297
MAE: 0.4450
Corr: 0.5533
Gate Activation Ratio: [[          0]
 [      0.625]
 [       0.25]]



train [Epoch 16]: 100%|██████████| 86/86 [01:17<00:00,  1.10it/s, loss=0.42742, emo_loss=0.0035, visual_loss=1.3074, RL_loss=0.0633, λ_recon=0.30, λ_RL=0.50, lr=9.99e-05]


train MAE: 0.0446


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=0.97992, emo_loss=0.3734, visual_loss=2.0217, lambda_recon=0.30, lr=9.99e-05]

Evaluation Results:
Mult_acc_2: 0.7434
Mult_acc_3: 0.6316
Mult_acc_5: 0.3991
F1_score: 0.7381
MAE: 0.4439
Corr: 0.5483
Gate Activation Ratio: [[          0]
 [      0.375]
 [      0.625]]



train [Epoch 17]: 100%|██████████| 86/86 [01:21<00:00,  1.05it/s, loss=0.42227, emo_loss=0.0027, visual_loss=1.2979, RL_loss=0.0605, λ_recon=0.30, λ_RL=0.50, lr=9.98e-05]


train MAE: 0.0386


eval: 100%|██████████| 8/8 [00:09<00:00,  1.20s/it, loss=0.96629, emo_loss=0.3622, visual_loss=2.0138, lambda_recon=0.30, lr=9.98e-05]

Evaluation Results:
Mult_acc_2: 0.7149
Mult_acc_3: 0.6228
Mult_acc_5: 0.3991
F1_score: 0.7058
MAE: 0.4457
Corr: 0.5492
Gate Activation Ratio: [[          0]
 [      0.375]
 [       0.25]]



train [Epoch 18]: 100%|██████████| 86/86 [01:14<00:00,  1.15it/s, loss=0.41755, emo_loss=0.0026, visual_loss=1.3015, RL_loss=0.0490, λ_recon=0.30, λ_RL=0.50, lr=9.97e-05]


train MAE: 0.0385


eval: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it, loss=0.96989, emo_loss=0.3663, visual_loss=2.0119, lambda_recon=0.30, lr=9.97e-05]

Evaluation Results:
Mult_acc_2: 0.7215
Mult_acc_3: 0.6250
Mult_acc_5: 0.4123
F1_score: 0.7135
MAE: 0.4451
Corr: 0.5519
Gate Activation Ratio: [[          0]
 [      0.375]
 [       0.25]]



train [Epoch 19]: 100%|██████████| 86/86 [01:21<00:00,  1.05it/s, loss=0.41525, emo_loss=0.0019, visual_loss=1.2941, RL_loss=0.0503, λ_recon=0.30, λ_RL=0.50, lr=9.96e-05]


train MAE: 0.0320


eval: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it, loss=0.97586, emo_loss=0.3711, visual_loss=2.0160, lambda_recon=0.30, lr=9.96e-05]

Evaluation Results:
Mult_acc_2: 0.7259
Mult_acc_3: 0.6250
Mult_acc_5: 0.4123
F1_score: 0.7192
MAE: 0.4422
Corr: 0.5543
Gate Activation Ratio: [[          0]
 [        0.5]
 [          0]]



train [Epoch 20]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.41833, emo_loss=0.0024, visual_loss=1.2929, RL_loss=0.0561, λ_recon=0.30, λ_RL=0.50, lr=9.95e-05]


train MAE: 0.0381


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=0.98004, emo_loss=0.3780, visual_loss=2.0068, lambda_recon=0.30, lr=9.95e-05]

Evaluation Results:
Mult_acc_2: 0.7610
Mult_acc_3: 0.6557
Mult_acc_5: 0.4320
F1_score: 0.7594
MAE: 0.4385
Corr: 0.5586
Gate Activation Ratio: [[      0.125]
 [       0.25]
 [      0.125]]



train [Epoch 21]: 100%|██████████| 86/86 [01:14<00:00,  1.16it/s, loss=0.42098, emo_loss=0.0017, visual_loss=1.2943, RL_loss=0.0619, λ_recon=0.30, λ_RL=0.50, lr=9.94e-05]


train MAE: 0.0314


eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=0.97443, emo_loss=0.3704, visual_loss=2.0134, lambda_recon=0.30, lr=9.94e-05]

Evaluation Results:
Mult_acc_2: 0.7522
Mult_acc_3: 0.6316
Mult_acc_5: 0.4211
F1_score: 0.7489
MAE: 0.4393
Corr: 0.5517
Gate Activation Ratio: [[          0]
 [      0.125]
 [       0.25]]



train [Epoch 22]: 100%|██████████| 86/86 [01:19<00:00,  1.09it/s, loss=0.42152, emo_loss=0.0015, visual_loss=1.2892, RL_loss=0.0665, λ_recon=0.30, λ_RL=0.50, lr=9.92e-05]


train MAE: 0.0294


eval: 100%|██████████| 8/8 [00:11<00:00,  1.38s/it, loss=0.97144, emo_loss=0.3682, visual_loss=2.0106, lambda_recon=0.30, lr=9.92e-05]

Evaluation Results:
Mult_acc_2: 0.7566
Mult_acc_3: 0.6425
Mult_acc_5: 0.4232
F1_score: 0.7533
MAE: 0.4361
Corr: 0.5610
Gate Activation Ratio: [[          0]
 [        0.5]
 [        0.5]]



train [Epoch 23]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.41622, emo_loss=0.0013, visual_loss=1.2781, RL_loss=0.0629, λ_recon=0.30, λ_RL=0.50, lr=9.91e-05]

train MAE: 0.0275



eval: 100%|██████████| 8/8 [00:11<00:00,  1.45s/it, loss=0.97764, emo_loss=0.3727, visual_loss=2.0166, lambda_recon=0.30, lr=9.91e-05]

Evaluation Results:
Mult_acc_2: 0.7368
Mult_acc_3: 0.6206
Mult_acc_5: 0.4167
F1_score: 0.7309
MAE: 0.4385
Corr: 0.5605
Gate Activation Ratio: [[          0]
 [       0.25]
 [        0.5]]



train [Epoch 24]: 100%|██████████| 86/86 [01:14<00:00,  1.16it/s, loss=0.41697, emo_loss=0.0014, visual_loss=1.2849, RL_loss=0.0602, λ_recon=0.30, λ_RL=0.50, lr=9.89e-05]


train MAE: 0.0280


eval: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it, loss=0.98292, emo_loss=0.3768, visual_loss=2.0205, lambda_recon=0.30, lr=9.89e-05]

Evaluation Results:
Mult_acc_2: 0.7303
Mult_acc_3: 0.6075
Mult_acc_5: 0.3991
F1_score: 0.7240
MAE: 0.4401
Corr: 0.5540
Gate Activation Ratio: [[          0]
 [          0]
 [      0.875]]



train [Epoch 25]: 100%|██████████| 86/86 [01:20<00:00,  1.07it/s, loss=0.41456, emo_loss=0.0014, visual_loss=1.2826, RL_loss=0.0568, λ_recon=0.30, λ_RL=0.50, lr=9.87e-05]


train MAE: 0.0282


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=0.98201, emo_loss=0.3766, visual_loss=2.0179, lambda_recon=0.30, lr=9.87e-05]

Evaluation Results:
Mult_acc_2: 0.7500
Mult_acc_3: 0.6382
Mult_acc_5: 0.4167
F1_score: 0.7461
MAE: 0.4389
Corr: 0.5541
Gate Activation Ratio: [[      0.125]
 [       0.25]
 [      0.625]]



train [Epoch 26]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.41578, emo_loss=0.0010, visual_loss=1.2833, RL_loss=0.0595, λ_recon=0.30, λ_RL=0.50, lr=9.85e-05]


train MAE: 0.0238


eval: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it, loss=0.97936, emo_loss=0.3730, visual_loss=2.0213, lambda_recon=0.30, lr=9.85e-05]

Evaluation Results:
Mult_acc_2: 0.7325
Mult_acc_3: 0.6272
Mult_acc_5: 0.4167
F1_score: 0.7264
MAE: 0.4409
Corr: 0.5554
Gate Activation Ratio: [[          0]
 [      0.375]
 [      0.625]]



train [Epoch 27]: 100%|██████████| 86/86 [01:15<00:00,  1.13it/s, loss=0.41801, emo_loss=0.0009, visual_loss=1.2779, RL_loss=0.0674, λ_recon=0.30, λ_RL=0.50, lr=9.83e-05]


train MAE: 0.0229


eval: 100%|██████████| 8/8 [00:11<00:00,  1.42s/it, loss=0.97101, emo_loss=0.3679, visual_loss=2.0103, lambda_recon=0.30, lr=9.83e-05]

Evaluation Results:
Mult_acc_2: 0.7303
Mult_acc_3: 0.6250
Mult_acc_5: 0.4123
F1_score: 0.7234
MAE: 0.4354
Corr: 0.5667
Gate Activation Ratio: [[      0.125]
 [      0.125]
 [      0.625]]



train [Epoch 28]: 100%|██████████| 86/86 [01:13<00:00,  1.16it/s, loss=0.42013, emo_loss=0.0009, visual_loss=1.2771, RL_loss=0.0723, λ_recon=0.30, λ_RL=0.50, lr=9.81e-05]


train MAE: 0.0226


eval: 100%|██████████| 8/8 [00:09<00:00,  1.17s/it, loss=0.98211, emo_loss=0.3782, visual_loss=2.0131, lambda_recon=0.30, lr=9.81e-05]

Evaluation Results:
Mult_acc_2: 0.7478
Mult_acc_3: 0.6294
Mult_acc_5: 0.4232
F1_score: 0.7448
MAE: 0.4376
Corr: 0.5535
Gate Activation Ratio: [[      0.125]
 [      0.625]
 [       0.25]]



train [Epoch 29]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.42342, emo_loss=0.0010, visual_loss=1.2823, RL_loss=0.0755, λ_recon=0.30, λ_RL=0.50, lr=9.78e-05]


train MAE: 0.0239


eval: 100%|██████████| 8/8 [00:09<00:00,  1.20s/it, loss=0.97417, emo_loss=0.3702, visual_loss=2.0131, lambda_recon=0.30, lr=9.78e-05]

Evaluation Results:
Mult_acc_2: 0.7303
Mult_acc_3: 0.6206
Mult_acc_5: 0.4211
F1_score: 0.7237
MAE: 0.4403
Corr: 0.5585
Gate Activation Ratio: [[      0.125]
 [      0.625]
 [        0.5]]



train [Epoch 30]: 100%|██████████| 86/86 [01:21<00:00,  1.05it/s, loss=0.42076, emo_loss=0.0010, visual_loss=1.2730, RL_loss=0.0757, λ_recon=0.30, λ_RL=0.50, lr=9.76e-05]


train MAE: 0.0251


eval: 100%|██████████| 8/8 [00:10<00:00,  1.29s/it, loss=0.98235, emo_loss=0.3780, visual_loss=2.0144, lambda_recon=0.30, lr=9.76e-05]

Evaluation Results:
Mult_acc_2: 0.7325
Mult_acc_3: 0.6250
Mult_acc_5: 0.4123
F1_score: 0.7261
MAE: 0.4398
Corr: 0.5545
Gate Activation Ratio: [[          0]
 [      0.625]
 [       0.75]]



train [Epoch 31]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.42053, emo_loss=0.0007, visual_loss=1.2679, RL_loss=0.0788, λ_recon=0.30, λ_RL=0.50, lr=9.73e-05]


train MAE: 0.0209


eval: 100%|██████████| 8/8 [00:10<00:00,  1.31s/it, loss=0.98066, emo_loss=0.3740, visual_loss=2.0220, lambda_recon=0.30, lr=9.73e-05]

Evaluation Results:
Mult_acc_2: 0.7500
Mult_acc_3: 0.6382
Mult_acc_5: 0.4189
F1_score: 0.7457
MAE: 0.4403
Corr: 0.5514
Gate Activation Ratio: [[      0.125]
 [       0.75]
 [       0.75]]



train [Epoch 32]: 100%|██████████| 86/86 [01:17<00:00,  1.12it/s, loss=0.42027, emo_loss=0.0009, visual_loss=1.2740, RL_loss=0.0744, λ_recon=0.30, λ_RL=0.50, lr=9.70e-05]


train MAE: 0.0226


eval: 100%|██████████| 8/8 [00:10<00:00,  1.32s/it, loss=0.97510, emo_loss=0.3707, visual_loss=2.0147, lambda_recon=0.30, lr=9.70e-05]

Evaluation Results:
Mult_acc_2: 0.7193
Mult_acc_3: 0.6118
Mult_acc_5: 0.4035
F1_score: 0.7114
MAE: 0.4397
Corr: 0.5591
Gate Activation Ratio: [[      0.375]
 [        0.5]
 [       0.75]]



train [Epoch 33]: 100%|██████████| 86/86 [01:15<00:00,  1.13it/s, loss=0.41533, emo_loss=0.0008, visual_loss=1.2785, RL_loss=0.0619, λ_recon=0.30, λ_RL=0.50, lr=9.67e-05]


train MAE: 0.0211


eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=0.98017, emo_loss=0.3704, visual_loss=2.0327, lambda_recon=0.30, lr=9.67e-05]

Evaluation Results:
Mult_acc_2: 0.7149
Mult_acc_3: 0.6184
Mult_acc_5: 0.3838
F1_score: 0.7072
MAE: 0.4458
Corr: 0.5474
Gate Activation Ratio: [[          0]
 [      0.875]
 [          1]]



train [Epoch 34]: 100%|██████████| 86/86 [01:13<00:00,  1.18it/s, loss=0.41011, emo_loss=0.0008, visual_loss=1.2751, RL_loss=0.0535, λ_recon=0.30, λ_RL=0.50, lr=9.64e-05]


train MAE: 0.0226


eval: 100%|██████████| 8/8 [00:09<00:00,  1.24s/it, loss=0.97992, emo_loss=0.3749, visual_loss=2.0166, lambda_recon=0.30, lr=9.64e-05]

Evaluation Results:
Mult_acc_2: 0.7434
Mult_acc_3: 0.6338
Mult_acc_5: 0.4145
F1_score: 0.7388
MAE: 0.4429
Corr: 0.5485
Gate Activation Ratio: [[          0]
 [          1]
 [      0.875]]



train [Epoch 35]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.41081, emo_loss=0.0010, visual_loss=1.2805, RL_loss=0.0513, λ_recon=0.30, λ_RL=0.50, lr=9.60e-05]


train MAE: 0.0246


eval: 100%|██████████| 8/8 [00:09<00:00,  1.17s/it, loss=0.99508, emo_loss=0.3889, visual_loss=2.0205, lambda_recon=0.30, lr=9.60e-05]

Evaluation Results:
Mult_acc_2: 0.7215
Mult_acc_3: 0.6206
Mult_acc_5: 0.4189
F1_score: 0.7138
MAE: 0.4453
Corr: 0.5459
Gate Activation Ratio: [[      0.125]
 [      0.875]
 [      0.875]]



train [Epoch 36]: 100%|██████████| 86/86 [01:22<00:00,  1.05it/s, loss=0.40669, emo_loss=0.0007, visual_loss=1.2757, RL_loss=0.0466, λ_recon=0.30, λ_RL=0.50, lr=9.57e-05]


train MAE: 0.0204


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=0.99082, emo_loss=0.3837, visual_loss=2.0238, lambda_recon=0.30, lr=9.57e-05]

Evaluation Results:
Mult_acc_2: 0.7346
Mult_acc_3: 0.6294
Mult_acc_5: 0.4189
F1_score: 0.7288
MAE: 0.4415
Corr: 0.5504
Gate Activation Ratio: [[          0]
 [          1]
 [       0.75]]



train [Epoch 37]: 100%|██████████| 86/86 [01:15<00:00,  1.15it/s, loss=0.40568, emo_loss=0.0006, visual_loss=1.2694, RL_loss=0.0485, λ_recon=0.30, λ_RL=0.50, lr=9.53e-05]

train MAE: 0.0188



eval: 100%|██████████| 8/8 [00:10<00:00,  1.32s/it, loss=0.99942, emo_loss=0.3939, visual_loss=2.0183, lambda_recon=0.30, lr=9.53e-05]

Evaluation Results:
Mult_acc_2: 0.7215
Mult_acc_3: 0.6228
Mult_acc_5: 0.4101
F1_score: 0.7147
MAE: 0.4399
Corr: 0.5492
Gate Activation Ratio: [[      0.125]
 [          1]
 [       0.75]]



train [Epoch 38]: 100%|██████████| 86/86 [01:16<00:00,  1.13it/s, loss=0.40708, emo_loss=0.0006, visual_loss=1.2781, RL_loss=0.0461, λ_recon=0.30, λ_RL=0.50, lr=9.49e-05]

train MAE: 0.0196



eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=0.98504, emo_loss=0.3760, visual_loss=2.0301, lambda_recon=0.30, lr=9.49e-05]

Evaluation Results:
Mult_acc_2: 0.7434
Mult_acc_3: 0.6316
Mult_acc_5: 0.4145
F1_score: 0.7381
MAE: 0.4453
Corr: 0.5408
Gate Activation Ratio: [[          0]
 [          1]
 [      0.625]]



train [Epoch 39]: 100%|██████████| 86/86 [01:16<00:00,  1.13it/s, loss=0.40895, emo_loss=0.0010, visual_loss=1.2741, RL_loss=0.0516, λ_recon=0.30, λ_RL=0.50, lr=9.46e-05]


train MAE: 0.0229


eval: 100%|██████████| 8/8 [00:10<00:00,  1.29s/it, loss=0.98095, emo_loss=0.3755, visual_loss=2.0182, lambda_recon=0.30, lr=9.46e-05]

Evaluation Results:
Mult_acc_2: 0.7632
Mult_acc_3: 0.6250
Mult_acc_5: 0.4211
F1_score: 0.7609
MAE: 0.4325
Corr: 0.5518
Gate Activation Ratio: [[       0.25]
 [      0.875]
 [      0.125]]



train [Epoch 40]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.40768, emo_loss=0.0008, visual_loss=1.2758, RL_loss=0.0482, λ_recon=0.30, λ_RL=0.50, lr=9.41e-05]

train MAE: 0.0214



eval: 100%|██████████| 8/8 [00:09<00:00,  1.23s/it, loss=0.98842, emo_loss=0.3856, visual_loss=2.0094, lambda_recon=0.30, lr=9.41e-05]

Evaluation Results:
Mult_acc_2: 0.7434
Mult_acc_3: 0.6294
Mult_acc_5: 0.3991
F1_score: 0.7378
MAE: 0.4375
Corr: 0.5538
Gate Activation Ratio: [[      0.125]
 [          1]
 [       0.25]]



train [Epoch 41]: 100%|██████████| 86/86 [01:20<00:00,  1.06it/s, loss=0.41662, emo_loss=0.0026, visual_loss=1.2892, RL_loss=0.0546, λ_recon=0.30, λ_RL=0.50, lr=9.37e-05]


train MAE: 0.0299


eval: 100%|██████████| 8/8 [00:10<00:00,  1.29s/it, loss=0.95317, emo_loss=0.3478, visual_loss=2.0179, lambda_recon=0.30, lr=9.37e-05]

Evaluation Results:
Mult_acc_2: 0.7610
Mult_acc_3: 0.6250
Mult_acc_5: 0.3553
F1_score: 0.7612
MAE: 0.4511
Corr: 0.5494
Gate Activation Ratio: [[          0]
 [          1]
 [          0]]



train [Epoch 42]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.50675, emo_loss=0.0491, visual_loss=1.4380, RL_loss=0.0526, λ_recon=0.30, λ_RL=0.50, lr=9.33e-05]


train MAE: 0.1440


eval: 100%|██████████| 8/8 [00:10<00:00,  1.37s/it, loss=1.08421, emo_loss=0.4662, visual_loss=2.0601, lambda_recon=0.30, lr=9.33e-05]

Evaluation Results:
Mult_acc_2: 0.6447
Mult_acc_3: 0.5789
Mult_acc_5: 0.3158
F1_score: 0.6307
MAE: 0.5415
Corr: 0.4516
Gate Activation Ratio: [[      0.125]
 [          1]
 [       0.25]]



train [Epoch 43]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.55993, emo_loss=0.0763, visual_loss=1.5033, RL_loss=0.0653, λ_recon=0.30, λ_RL=0.50, lr=9.29e-05]


train MAE: 0.2005


eval: 100%|██████████| 8/8 [00:10<00:00,  1.35s/it, loss=1.10850, emo_loss=0.4878, visual_loss=2.0689, lambda_recon=0.30, lr=9.29e-05]

Evaluation Results:
Mult_acc_2: 0.7039
Mult_acc_3: 0.5987
Mult_acc_5: 0.3531
F1_score: 0.7049
MAE: 0.5235
Corr: 0.4124
Gate Activation Ratio: [[      0.125]
 [      0.625]
 [      0.375]]



train [Epoch 44]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.50640, emo_loss=0.0381, visual_loss=1.4309, RL_loss=0.0781, λ_recon=0.30, λ_RL=0.50, lr=9.24e-05]


train MAE: 0.1414


eval: 100%|██████████| 8/8 [00:11<00:00,  1.44s/it, loss=1.06178, emo_loss=0.4552, visual_loss=2.0220, lambda_recon=0.30, lr=9.24e-05]

Evaluation Results:
Mult_acc_2: 0.7105
Mult_acc_3: 0.6009
Mult_acc_5: 0.3706
F1_score: 0.7060
MAE: 0.4969
Corr: 0.4620
Gate Activation Ratio: [[      0.375]
 [      0.375]
 [      0.375]]



train [Epoch 45]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.46115, emo_loss=0.0148, visual_loss=1.3472, RL_loss=0.0844, λ_recon=0.30, λ_RL=0.50, lr=9.19e-05]


train MAE: 0.0796


eval: 100%|██████████| 8/8 [00:09<00:00,  1.23s/it, loss=1.06040, emo_loss=0.4457, visual_loss=2.0490, lambda_recon=0.30, lr=9.19e-05]

Evaluation Results:
Mult_acc_2: 0.7018
Mult_acc_3: 0.5877
Mult_acc_5: 0.3640
F1_score: 0.6975
MAE: 0.4950
Corr: 0.4527
Gate Activation Ratio: [[      0.625]
 [      0.375]
 [       0.25]]



train [Epoch 46]: 100%|██████████| 86/86 [01:15<00:00,  1.14it/s, loss=0.47062, emo_loss=0.0197, visual_loss=1.3595, RL_loss=0.0861, λ_recon=0.30, λ_RL=0.50, lr=9.15e-05]


train MAE: 0.0936


eval: 100%|██████████| 8/8 [00:09<00:00,  1.23s/it, loss=1.04805, emo_loss=0.4327, visual_loss=2.0513, lambda_recon=0.30, lr=9.15e-05]

Evaluation Results:
Mult_acc_2: 0.6974
Mult_acc_3: 0.5658
Mult_acc_5: 0.3311
F1_score: 0.6909
MAE: 0.4969
Corr: 0.4611
Gate Activation Ratio: [[        0.5]
 [          1]
 [      0.375]]



train [Epoch 47]: 100%|██████████| 86/86 [01:20<00:00,  1.07it/s, loss=0.44237, emo_loss=0.0073, visual_loss=1.3093, RL_loss=0.0845, λ_recon=0.30, λ_RL=0.50, lr=9.10e-05]


train MAE: 0.0563


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=1.05257, emo_loss=0.4364, visual_loss=2.0538, lambda_recon=0.30, lr=9.10e-05]

Evaluation Results:
Mult_acc_2: 0.7149
Mult_acc_3: 0.5746
Mult_acc_5: 0.3333
F1_score: 0.7088
MAE: 0.4941
Corr: 0.4575
Gate Activation Ratio: [[      0.375]
 [      0.875]
 [       0.75]]



train [Epoch 48]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.43338, emo_loss=0.0054, visual_loss=1.2903, RL_loss=0.0818, λ_recon=0.30, λ_RL=0.50, lr=9.05e-05]


train MAE: 0.0452


eval: 100%|██████████| 8/8 [00:10<00:00,  1.32s/it, loss=1.03779, emo_loss=0.4206, visual_loss=2.0574, lambda_recon=0.30, lr=9.05e-05]

Evaluation Results:
Mult_acc_2: 0.7237
Mult_acc_3: 0.5965
Mult_acc_5: 0.3531
F1_score: 0.7211
MAE: 0.4815
Corr: 0.4830
Gate Activation Ratio: [[       0.75]
 [      0.875]
 [      0.375]]



train [Epoch 49]: 100%|██████████| 86/86 [01:12<00:00,  1.18it/s, loss=0.42997, emo_loss=0.0026, visual_loss=1.2960, RL_loss=0.0772, λ_recon=0.30, λ_RL=0.50, lr=8.99e-05]


train MAE: 0.0381


eval: 100%|██████████| 8/8 [00:11<00:00,  1.50s/it, loss=1.05223, emo_loss=0.4342, visual_loss=2.0601, lambda_recon=0.30, lr=8.99e-05]

Evaluation Results:
Mult_acc_2: 0.7083
Mult_acc_3: 0.5833
Mult_acc_5: 0.3487
F1_score: 0.7027
MAE: 0.4918
Corr: 0.4701
Gate Activation Ratio: [[          1]
 [      0.875]
 [          1]]



train [Epoch 50]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.42159, emo_loss=0.0025, visual_loss=1.2893, RL_loss=0.0646, λ_recon=0.30, λ_RL=0.50, lr=8.94e-05]


train MAE: 0.0367


eval: 100%|██████████| 8/8 [00:12<00:00,  1.59s/it, loss=1.06444, emo_loss=0.4455, visual_loss=2.0632, lambda_recon=0.30, lr=8.94e-05]

Evaluation Results:
Mult_acc_2: 0.7083
Mult_acc_3: 0.5680
Mult_acc_5: 0.3443
F1_score: 0.7049
MAE: 0.4951
Corr: 0.4495
Gate Activation Ratio: [[      0.875]
 [      0.125]
 [          1]]



train [Epoch 51]: 100%|██████████| 86/86 [01:22<00:00,  1.05it/s, loss=0.42033, emo_loss=0.0032, visual_loss=1.2875, RL_loss=0.0618, λ_recon=0.30, λ_RL=0.50, lr=8.89e-05]


train MAE: 0.0377


eval: 100%|██████████| 8/8 [00:12<00:00,  1.54s/it, loss=1.09306, emo_loss=0.4742, visual_loss=2.0628, lambda_recon=0.30, lr=8.89e-05]

Evaluation Results:
Mult_acc_2: 0.6645
Mult_acc_3: 0.5614
Mult_acc_5: 0.3355
F1_score: 0.6531
MAE: 0.5146
Corr: 0.4279
Gate Activation Ratio: [[      0.875]
 [        0.5]
 [      0.875]]



train [Epoch 52]: 100%|██████████| 86/86 [01:20<00:00,  1.07it/s, loss=0.42976, emo_loss=0.0075, visual_loss=1.2953, RL_loss=0.0673, λ_recon=0.30, λ_RL=0.50, lr=8.83e-05]


train MAE: 0.0493


eval: 100%|██████████| 8/8 [00:09<00:00,  1.20s/it, loss=1.05692, emo_loss=0.4397, visual_loss=2.0576, lambda_recon=0.30, lr=8.83e-05]

Evaluation Results:
Mult_acc_2: 0.7127
Mult_acc_3: 0.5943
Mult_acc_5: 0.3838
F1_score: 0.7113
MAE: 0.4814
Corr: 0.4697
Gate Activation Ratio: [[      0.625]
 [       0.75]
 [          1]]



train [Epoch 53]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.42339, emo_loss=0.0035, visual_loss=1.2909, RL_loss=0.0652, λ_recon=0.30, λ_RL=0.50, lr=8.77e-05]


train MAE: 0.0360


eval: 100%|██████████| 8/8 [00:10<00:00,  1.31s/it, loss=1.06736, emo_loss=0.4438, visual_loss=2.0785, lambda_recon=0.30, lr=8.77e-05]

Evaluation Results:
Mult_acc_2: 0.7193
Mult_acc_3: 0.5680
Mult_acc_5: 0.3289
F1_score: 0.7153
MAE: 0.5047
Corr: 0.4351
Gate Activation Ratio: [[      0.875]
 [        0.5]
 [          1]]



train [Epoch 54]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.42174, emo_loss=0.0036, visual_loss=1.2926, RL_loss=0.0607, λ_recon=0.30, λ_RL=0.50, lr=8.72e-05]


train MAE: 0.0387


eval: 100%|██████████| 8/8 [00:11<00:00,  1.46s/it, loss=1.08442, emo_loss=0.4655, visual_loss=2.0631, lambda_recon=0.30, lr=8.72e-05]

Evaluation Results:
Mult_acc_2: 0.7039
Mult_acc_3: 0.5877
Mult_acc_5: 0.3684
F1_score: 0.6964
MAE: 0.5030
Corr: 0.4545
Gate Activation Ratio: [[          1]
 [       0.25]
 [      0.875]]



train [Epoch 55]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.41670, emo_loss=0.0021, visual_loss=1.2794, RL_loss=0.0617, λ_recon=0.30, λ_RL=0.50, lr=8.66e-05]


train MAE: 0.0302


eval: 100%|██████████| 8/8 [00:11<00:00,  1.42s/it, loss=1.07893, emo_loss=0.4586, visual_loss=2.0678, lambda_recon=0.30, lr=8.66e-05]

Evaluation Results:
Mult_acc_2: 0.7018
Mult_acc_3: 0.5833
Mult_acc_5: 0.3838
F1_score: 0.6966
MAE: 0.4983
Corr: 0.4589
Gate Activation Ratio: [[      0.375]
 [       0.75]
 [      0.875]]



train [Epoch 56]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.41354, emo_loss=0.0009, visual_loss=1.2824, RL_loss=0.0557, λ_recon=0.30, λ_RL=0.50, lr=8.60e-05]

train MAE: 0.0231



eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=1.08101, emo_loss=0.4613, visual_loss=2.0658, lambda_recon=0.30, lr=8.60e-05]

Evaluation Results:
Mult_acc_2: 0.6886
Mult_acc_3: 0.5746
Mult_acc_5: 0.3662
F1_score: 0.6815
MAE: 0.4955
Corr: 0.4577
Gate Activation Ratio: [[      0.875]
 [       0.25]
 [      0.875]]



train [Epoch 57]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.40922, emo_loss=0.0010, visual_loss=1.2773, RL_loss=0.0500, λ_recon=0.30, λ_RL=0.50, lr=8.54e-05]


train MAE: 0.0238


eval: 100%|██████████| 8/8 [00:10<00:00,  1.31s/it, loss=1.06475, emo_loss=0.4450, visual_loss=2.0660, lambda_recon=0.30, lr=8.54e-05]

Evaluation Results:
Mult_acc_2: 0.7039
Mult_acc_3: 0.5811
Mult_acc_5: 0.3728
F1_score: 0.6991
MAE: 0.4955
Corr: 0.4607
Gate Activation Ratio: [[      0.625]
 [      0.375]
 [          1]]



train [Epoch 58]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.41423, emo_loss=0.0055, visual_loss=1.2832, RL_loss=0.0476, λ_recon=0.30, λ_RL=0.50, lr=8.47e-05]


train MAE: 0.0387


eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=1.07631, emo_loss=0.4567, visual_loss=2.0655, lambda_recon=0.30, lr=8.47e-05]

Evaluation Results:
Mult_acc_2: 0.7500
Mult_acc_3: 0.6096
Mult_acc_5: 0.3816
F1_score: 0.7587
MAE: 0.4878
Corr: 0.4602
Gate Activation Ratio: [[       0.75]
 [       0.25]
 [          1]]



train [Epoch 59]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.45640, emo_loss=0.0260, visual_loss=1.3541, RL_loss=0.0483, λ_recon=0.30, λ_RL=0.50, lr=8.41e-05]


train MAE: 0.1002


eval: 100%|██████████| 8/8 [00:10<00:00,  1.34s/it, loss=1.07122, emo_loss=0.4545, visual_loss=2.0556, lambda_recon=0.30, lr=8.41e-05]

Evaluation Results:
Mult_acc_2: 0.7105
Mult_acc_3: 0.6053
Mult_acc_5: 0.3640
F1_score: 0.7013
MAE: 0.4979
Corr: 0.4769
Gate Activation Ratio: [[       0.75]
 [      0.125]
 [          1]]



train [Epoch 60]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.41896, emo_loss=0.0060, visual_loss=1.3028, RL_loss=0.0442, λ_recon=0.30, λ_RL=0.50, lr=8.35e-05]


train MAE: 0.0458


eval: 100%|██████████| 8/8 [00:10<00:00,  1.26s/it, loss=1.05580, emo_loss=0.4368, visual_loss=2.0635, lambda_recon=0.30, lr=8.35e-05]

Evaluation Results:
Mult_acc_2: 0.7105
Mult_acc_3: 0.6118
Mult_acc_5: 0.3925
F1_score: 0.7060
MAE: 0.4841
Corr: 0.4818
Gate Activation Ratio: [[      0.875]
 [      0.375]
 [          1]]



train [Epoch 61]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.41101, emo_loss=0.0026, visual_loss=1.2862, RL_loss=0.0450, λ_recon=0.30, λ_RL=0.50, lr=8.28e-05]


train MAE: 0.0345


eval: 100%|██████████| 8/8 [00:09<00:00,  1.21s/it, loss=1.05834, emo_loss=0.4424, visual_loss=2.0532, lambda_recon=0.30, lr=8.28e-05]

Evaluation Results:
Mult_acc_2: 0.6996
Mult_acc_3: 0.5877
Mult_acc_5: 0.3728
F1_score: 0.6894
MAE: 0.4852
Corr: 0.4883
Gate Activation Ratio: [[      0.625]
 [      0.375]
 [          1]]



train [Epoch 62]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.40724, emo_loss=0.0016, visual_loss=1.2859, RL_loss=0.0398, λ_recon=0.30, λ_RL=0.50, lr=8.21e-05]


train MAE: 0.0261


eval: 100%|██████████| 8/8 [00:10<00:00,  1.30s/it, loss=1.08365, emo_loss=0.4625, visual_loss=2.0706, lambda_recon=0.30, lr=8.21e-05]

Evaluation Results:
Mult_acc_2: 0.6754
Mult_acc_3: 0.5921
Mult_acc_5: 0.3728
F1_score: 0.6660
MAE: 0.5049
Corr: 0.4636
Gate Activation Ratio: [[       0.75]
 [      0.375]
 [          1]]



train [Epoch 63]: 100%|██████████| 86/86 [01:17<00:00,  1.12it/s, loss=0.40146, emo_loss=0.0030, visual_loss=1.2754, RL_loss=0.0317, λ_recon=0.30, λ_RL=0.50, lr=8.15e-05]


train MAE: 0.0287


eval: 100%|██████████| 8/8 [00:10<00:00,  1.25s/it, loss=1.06243, emo_loss=0.4451, visual_loss=2.0577, lambda_recon=0.30, lr=8.15e-05]

Evaluation Results:
Mult_acc_2: 0.7149
Mult_acc_3: 0.5921
Mult_acc_5: 0.3794
F1_score: 0.7100
MAE: 0.4866
Corr: 0.4652
Gate Activation Ratio: [[      0.875]
 [      0.125]
 [          1]]



train [Epoch 64]: 100%|██████████| 86/86 [01:16<00:00,  1.13it/s, loss=0.39651, emo_loss=0.0020, visual_loss=1.2669, RL_loss=0.0288, λ_recon=0.30, λ_RL=0.50, lr=8.08e-05]


train MAE: 0.0267


eval: 100%|██████████| 8/8 [00:10<00:00,  1.37s/it, loss=1.07146, emo_loss=0.4537, visual_loss=2.0593, lambda_recon=0.30, lr=8.08e-05]

Evaluation Results:
Mult_acc_2: 0.7061
Mult_acc_3: 0.6053
Mult_acc_5: 0.3904
F1_score: 0.6999
MAE: 0.4911
Corr: 0.4633
Gate Activation Ratio: [[      0.625]
 [       0.25]
 [          1]]



train [Epoch 65]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.40346, emo_loss=0.0037, visual_loss=1.2799, RL_loss=0.0316, λ_recon=0.30, λ_RL=0.50, lr=8.01e-05]


train MAE: 0.0322


eval: 100%|██████████| 8/8 [00:12<00:00,  1.51s/it, loss=1.06015, emo_loss=0.4463, visual_loss=2.0461, lambda_recon=0.30, lr=8.01e-05]

Evaluation Results:
Mult_acc_2: 0.6952
Mult_acc_3: 0.5833
Mult_acc_5: 0.3728
F1_score: 0.6843
MAE: 0.4862
Corr: 0.4841
Gate Activation Ratio: [[      0.875]
 [          0]
 [      0.875]]



train [Epoch 66]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.40433, emo_loss=0.0023, visual_loss=1.2843, RL_loss=0.0334, λ_recon=0.30, λ_RL=0.50, lr=7.94e-05]


train MAE: 0.0297


eval: 100%|██████████| 8/8 [00:11<00:00,  1.45s/it, loss=1.06178, emo_loss=0.4435, visual_loss=2.0609, lambda_recon=0.30, lr=7.94e-05]

Evaluation Results:
Mult_acc_2: 0.6974
Mult_acc_3: 0.5746
Mult_acc_5: 0.3596
F1_score: 0.6875
MAE: 0.4860
Corr: 0.4762
Gate Activation Ratio: [[      0.875]
 [          0]
 [          1]]



train [Epoch 67]: 100%|██████████| 86/86 [01:16<00:00,  1.12it/s, loss=0.40193, emo_loss=0.0009, visual_loss=1.2765, RL_loss=0.0362, λ_recon=0.30, λ_RL=0.50, lr=7.87e-05]


train MAE: 0.0201


eval: 100%|██████████| 8/8 [00:10<00:00,  1.27s/it, loss=1.05269, emo_loss=0.4328, visual_loss=2.0664, lambda_recon=0.30, lr=7.87e-05]

Evaluation Results:
Mult_acc_2: 0.6798
Mult_acc_3: 0.5768
Mult_acc_5: 0.3596
F1_score: 0.6689
MAE: 0.4876
Corr: 0.4870
Gate Activation Ratio: [[      0.875]
 [          0]
 [          1]]



train [Epoch 68]: 100%|██████████| 86/86 [01:18<00:00,  1.10it/s, loss=0.40243, emo_loss=0.0010, visual_loss=1.2744, RL_loss=0.0382, λ_recon=0.30, λ_RL=0.50, lr=7.80e-05]


train MAE: 0.0219


eval: 100%|██████████| 8/8 [00:11<00:00,  1.41s/it, loss=1.07227, emo_loss=0.4534, visual_loss=2.0628, lambda_recon=0.30, lr=7.80e-05]

Evaluation Results:
Mult_acc_2: 0.7259
Mult_acc_3: 0.5768
Mult_acc_5: 0.3750
F1_score: 0.7213
MAE: 0.4896
Corr: 0.4665
Gate Activation Ratio: [[      0.875]
 [          0]
 [          1]]



train [Epoch 69]: 100%|██████████| 86/86 [01:14<00:00,  1.15it/s, loss=0.40093, emo_loss=0.0007, visual_loss=1.2730, RL_loss=0.0367, λ_recon=0.30, λ_RL=0.50, lr=7.72e-05]


train MAE: 0.0195


eval: 100%|██████████| 8/8 [00:12<00:00,  1.55s/it, loss=1.07236, emo_loss=0.4526, visual_loss=2.0657, lambda_recon=0.30, lr=7.72e-05]

Evaluation Results:
Mult_acc_2: 0.6754
Mult_acc_3: 0.5680
Mult_acc_5: 0.3706
F1_score: 0.6642
MAE: 0.4947
Corr: 0.4679
Gate Activation Ratio: [[      0.875]
 [      0.125]
 [          1]]



train [Epoch 70]: 100%|██████████| 86/86 [01:17<00:00,  1.11it/s, loss=0.40153, emo_loss=0.0006, visual_loss=1.2794, RL_loss=0.0341, λ_recon=0.30, λ_RL=0.50, lr=7.65e-05]


train MAE: 0.0190


eval: 100%|██████████| 8/8 [00:11<00:00,  1.42s/it, loss=1.07519, emo_loss=0.4574, visual_loss=2.0592, lambda_recon=0.30, lr=7.65e-05]

Evaluation Results:
Mult_acc_2: 0.7259
Mult_acc_3: 0.5811
Mult_acc_5: 0.3728
F1_score: 0.7213
MAE: 0.4889
Corr: 0.4754
Gate Activation Ratio: [[       0.75]
 [      0.125]
 [          1]]



train [Epoch 71]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.39823, emo_loss=0.0006, visual_loss=1.2746, RL_loss=0.0305, λ_recon=0.30, λ_RL=0.50, lr=7.58e-05]


train MAE: 0.0177


eval: 100%|██████████| 8/8 [00:10<00:00,  1.28s/it, loss=1.06936, emo_loss=0.4523, visual_loss=2.0568, lambda_recon=0.30, lr=7.58e-05]

Evaluation Results:
Mult_acc_2: 0.7039
Mult_acc_3: 0.5768
Mult_acc_5: 0.3684
F1_score: 0.6971
MAE: 0.4913
Corr: 0.4716
Gate Activation Ratio: [[          1]
 [      0.125]
 [          1]]



train [Epoch 72]: 100%|██████████| 86/86 [01:18<00:00,  1.09it/s, loss=0.39437, emo_loss=0.0008, visual_loss=1.2648, RL_loss=0.0282, λ_recon=0.30, λ_RL=0.50, lr=7.50e-05]


train MAE: 0.0210


eval: 100%|██████████| 8/8 [00:11<00:00,  1.44s/it, loss=1.06667, emo_loss=0.4480, visual_loss=2.0622, lambda_recon=0.30, lr=7.50e-05]

Evaluation Results:
Mult_acc_2: 0.7039
Mult_acc_3: 0.5768
Mult_acc_5: 0.3553
F1_score: 0.6949
MAE: 0.4895
Corr: 0.4765
Gate Activation Ratio: [[      0.875]
 [      0.125]
 [          1]]



train [Epoch 73]: 100%|██████████| 86/86 [01:16<00:00,  1.13it/s, loss=0.39606, emo_loss=0.0005, visual_loss=1.2689, RL_loss=0.0298, λ_recon=0.30, λ_RL=0.50, lr=7.42e-05]


train MAE: 0.0163


eval: 100%|██████████| 8/8 [00:09<00:00,  1.23s/it, loss=1.07520, emo_loss=0.4573, visual_loss=2.0597, lambda_recon=0.30, lr=7.42e-05]

Evaluation Results:
Mult_acc_2: 0.7171
Mult_acc_3: 0.5724
Mult_acc_5: 0.3662
F1_score: 0.7102
MAE: 0.4911
Corr: 0.4783
Gate Activation Ratio: [[       0.75]
 [      0.125]
 [          1]]



train [Epoch 74]: 100%|██████████| 86/86 [01:19<00:00,  1.08it/s, loss=0.39722, emo_loss=0.0005, visual_loss=1.2706, RL_loss=0.0310, λ_recon=0.30, λ_RL=0.50, lr=7.35e-05]


train MAE: 0.0175


eval: 100%|██████████| 8/8 [00:11<00:00,  1.44s/it, loss=1.08334, emo_loss=0.4650, visual_loss=2.0610, lambda_recon=0.30, lr=7.35e-05]

Evaluation Results:
Mult_acc_2: 0.7061
Mult_acc_3: 0.5592
Mult_acc_5: 0.3706
F1_score: 0.6976
MAE: 0.4937
Corr: 0.4719
Gate Activation Ratio: [[       0.75]
 [      0.125]
 [          1]]



train [Epoch 75]: 100%|██████████| 86/86 [01:20<00:00,  1.07it/s, loss=0.39397, emo_loss=0.0005, visual_loss=1.2636, RL_loss=0.0288, λ_recon=0.30, λ_RL=0.50, lr=7.27e-05]


train MAE: 0.0169


eval: 100%|██████████| 8/8 [00:10<00:00,  1.32s/it, loss=1.08618, emo_loss=0.4662, visual_loss=2.0666, lambda_recon=0.30, lr=7.27e-05]

Evaluation Results:
Mult_acc_2: 0.6974
Mult_acc_3: 0.5680
Mult_acc_5: 0.3662
F1_score: 0.6882
MAE: 0.4938
Corr: 0.4752
Gate Activation Ratio: [[      0.875]
 [          0]
 [          1]]



train [Epoch 76]: 100%|██████████| 86/86 [01:15<00:00,  1.13it/s, loss=0.39655, emo_loss=0.0004, visual_loss=1.2667, RL_loss=0.0322, λ_recon=0.30, λ_RL=0.50, lr=7.19e-05]


train MAE: 0.0158


eval: 100%|██████████| 8/8 [00:10<00:00,  1.32s/it, loss=1.08858, emo_loss=0.4710, visual_loss=2.0586, lambda_recon=0.30, lr=7.19e-05]

Evaluation Results:
Mult_acc_2: 0.6842
Mult_acc_3: 0.5680
Mult_acc_5: 0.3772
F1_score: 0.6739
MAE: 0.4945
Corr: 0.4715
Gate Activation Ratio: [[      0.875]
 [          0]
 [          1]]



train [Epoch 77]: 100%|██████████| 86/86 [01:15<00:00,  1.14it/s, loss=0.39543, emo_loss=0.0005, visual_loss=1.2648, RL_loss=0.0310, λ_recon=0.30, λ_RL=0.50, lr=7.11e-05]


train MAE: 0.0169


eval: 100%|██████████| 8/8 [00:11<00:00,  1.44s/it, loss=1.09056, emo_loss=0.4709, visual_loss=2.0655, lambda_recon=0.30, lr=7.11e-05]

Evaluation Results:
Mult_acc_2: 0.6996
Mult_acc_3: 0.5658
Mult_acc_5: 0.3662
F1_score: 0.6904
MAE: 0.4975
Corr: 0.4709
Gate Activation Ratio: [[          1]
 [          0]
 [          1]]



train [Epoch 78]: 100%|██████████| 86/86 [01:17<00:00,  1.10it/s, loss=0.39685, emo_loss=0.0004, visual_loss=1.2703, RL_loss=0.0308, λ_recon=0.30, λ_RL=0.50, lr=7.03e-05]


train MAE: 0.0139


eval: 100%|██████████| 8/8 [00:11<00:00,  1.41s/it, loss=1.08058, emo_loss=0.4608, visual_loss=2.0658, lambda_recon=0.30, lr=7.03e-05]

Evaluation Results:
Mult_acc_2: 0.7083
Mult_acc_3: 0.5768
Mult_acc_5: 0.3728
F1_score: 0.7005
MAE: 0.4950
Corr: 0.4715
Gate Activation Ratio: [[      0.625]
 [          0]
 [          1]]



train [Epoch 79]: 100%|██████████| 86/86 [01:24<00:00,  1.02it/s, loss=0.39385, emo_loss=0.0004, visual_loss=1.2644, RL_loss=0.0283, λ_recon=0.30, λ_RL=0.50, lr=6.95e-05]


train MAE: 0.0141
