# import

In [1]:
import torch
import torch.nn as nn
from vilt.modules import heads, objectives
import vilt.modules.vision_transformer as vit
import torch.nn.functional as F
import random
from typing import OrderedDict
import os
import pandas as pd
import numpy as np
from vilt.transforms import pixelbert_transform
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
from torch.utils.data import DataLoader
import gc
import torch.optim as optim
from torch.optim import lr_scheduler
from collections import defaultdict
import wandb

from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold
import warnings

# 禁用所有警告
warnings.filterwarnings("ignore")


  from .autonotebook import tqdm as notebook_tqdm


# config

In [2]:
class config:
    debug = True
    sensor_only = False
    label_col = "tsm1_k2"
    class_num = 4

    exp_name = "ViST-OffRoad"
    seed = 520
    batch_size = 4096  # this is a desired batch size; pl trainer will accumulate gradients when per step batch is smaller.
    train_batch_size = 64
    valid_batch_size = 64
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    n_fold = 5
    
    # weights = torch.tensor([11491, 864], dtype=torch.float32,device=device)
    weights = torch.tensor([1, 1], dtype=torch.float32,device=device)
    model_name = "sensorViLOnlyTransformerSS" #仅图片
    
    wandb_name = ""

    # Image setting
    train_transform_keys = ["pixelbert"]
    val_transform_keys = ["pixelbert"]
    img_size = 384
    max_image_len = -1
    patch_size = 32
    draw_false_image = 1
    image_only = False

    # Sensor
    # senser_input_num = 11 # 翔冠的传感器参数
    senser_input_num = 19 # 天航的传感器参数
    
    # Text Setting
    vqav2_label_size = 3129
    max_text_len = 40
    tokenizer = "bert-base-uncased"
    vocab_size = 30522 # vocabulary词汇数量
    whole_word_masking = False
    mlm_prob = 0.15
    draw_false_text = 0

    # Transformer Setting
    # vit = "vit_base_patch32_384"
    vit = "vit_base_patch32_384_SemanticEstimation"
    hidden_size = 768  # 嵌入向量大小
    num_heads = 12
    num_layers = 12
    mlp_ratio = 4
    drop_rate = 0.2

    # Optimizer Setting
    optim_type = "adamw"
    learning_rate = 1e-2 #0.0015#2e-3 #
    weight_decay = 1e-2 # 0.01 ->1e-4
    decay_power = 1
    max_epoch = 10
    max_steps = 25000
    # warmup_steps = 2500
    end_lr = 0
    lr_mult = 1  # multiply lr for downstream heads
    # T_max = 8000/train_batch_size*max_epoch 
    # T_max = 4632/train_batch_size*max_epoch # total 7237.5
    # T_max = 2126/train_batch_size*max_epoch # soybean 3321.875
    T_max = 9884/train_batch_size*max_epoch # soybean 3321.875

    # Downstream Setting
    get_recall_metric = False


    # below params varies with the environment
    data_root = ""
    log_dir = "result"
    per_gpu_batchsize = 0  # you should define this manually with per_gpu_batch_size=#
    num_gpus = 1
    num_nodes = 1
    load_path = "weights/vilt_200k_mlm_itm.ckpt"
    # load_path = "save_model_dict.pt"
    num_workers = 1
    precision = 16

    # CBP 算法1,random maclaurin Projection参数
    RMP_d = 10000



if config.debug:
    config.max_epoch = 2
print("当前device=",config.device)

当前device= cuda:1


In [3]:
def setup_seed(seed):

    torch.manual_seed(seed)  # 为CPU设置随机种子
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    # torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed(seed)  # 为当前GPU设置随机种子
    torch.cuda.manual_seed_all(seed)  # 为所有GPU设置随机种子
    #os.environ['PYTHONHASHSEED'] = str(seed)
    
setup_seed(config.seed)

# wandb

In [4]:
if config.debug == True:
    os.environ["WANDB_MODE"] = 'dryrun' # 离线模式
try:
    # wandb.log(key="*******") # if debug
    wandb.login() # storage in ~/.netrc file
    anonymous = None
except:
    anonymous = "must"
    print('\nGet your W&B access token from here: https://wandb.ai/authorize\n')

# os.environ["WANDB_MODE"] = 'dryrun' # 离线模式
# anonymous = None


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


# 数据

In [5]:
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
def fetch_df(label_col):
    """获取DataFrame

    Args:
        label_col (Enum): 标签那一列,枚举
        'tsm1_original', 'tsm1_k2', 'tsm1_k3', 'tsm1_k4'

    Returns:
        _type_: DataFrame
    """
    df_off_road = pd.read_csv("/home/junsheng/ViLT/data/off_road_tsm1_label_0.csv")
    df_off_road['image_path'] = df_off_road['file_path'].map(lambda x:os.path.join('/home/junsheng/data/off_road/Images/Images',x))
    # df_off_road['image_path'] = df_off_road['file_path'].map(lambda x:os.path.join('/mnt/data/junsheng/data/off_road/Images/Images',x))

    df_off_road['label'] = df_off_road[label_col]
    # 重采样
    # if config.label_col == 'tsm2_k2' :

    #     # 统计label为0和1的数量
    #     count_0 = df_off_road[df_off_road['label'] == 0].shape[0]
    #     count_1 = df_off_road[df_off_road['label'] == 1].shape[0]

    #     # 复制label为1的记录
    #     df_label_1 = df_off_road[df_off_road['label'] == 1].copy()

    #     # 复制8次label为1的记录
    #     df_label_1_repeated = pd.concat([df_label_1] * (count_0 // count_1), ignore_index=True)

    #     # 将复制后的记录添加到原始DataFrame中
    #     df_off_road = pd.concat([df_off_road, df_label_1_repeated], ignore_index=True)
    # else:
    class_counts = df_off_road['label'].value_counts()

    # 找到样本数量最少的类别
    minority_class = class_counts.idxmin()

    # 获取所有类别的索引
    all_classes = class_counts.index.tolist()

    # 创建过采样对象
    oversampler = RandomOverSampler(sampling_strategy='not majority')

    # 对少数类别进行过采样
    df_resampled, labels_resampled = oversampler.fit_resample(df_off_road.drop('label', axis=1), df_off_road['label'])

    # 将过采样后的数据集重新组合成DataFrame
    df_balanced = pd.DataFrame(df_resampled, columns=df_off_road.columns.drop('label'))
    df_balanced['label'] = labels_resampled
    df_off_road = df_balanced
    
    df_off_road = df_off_road.dropna()
    df_off_road = df_off_road.reset_index()

    # number_title = []
    # 归一化数值列
    # recorder = {}
    # for title in df_off_road:
    #     if title == label_col or title == 'label':
    #         continue
    #     if df_off_road[title].dtype != "object":
            
    #         number_title.append(title)
    #         x_min = df_off_road[title].min()
    #         x_max = df_off_road[title].max()
    #         recorder[title] = (x_min,x_max)
    #         df_off_road[title] = df_off_road[title].map(lambda x:(x-x_min + 0.01)/(x_max - x_min))

    # 选择传感器列
    # off_road_sensor = [
    # 'accel_x (counts)', 'accel_y (counts)', 'accel_z (counts)', 'calibrated_accel_x (g)', 'calibrated_accel_y (g)', 'calibrated_accel_z (g)', 'calibrated_accel_x (m/s^2)', 'calibrated_accel_y (m/s^2)', 'calibrated_accel_z (m/s^2)', 'position_lat (semicircles)', 'position_long (semicircles)', 'enhanced_altitude (m)', 'enhanced_speed (m/s)', 'heading (degrees)', 'gyro_x (counts)', 'gyro_y (counts)', 'gyro_z (counts)', 'calibrated_gyro_x (deg/s)', 'calibrated_gyro_y (deg/s)', 'calibrated_gyro_z (deg/s)', 'mag_x (counts)', 'mag_y (counts)', 'mag_z (counts)', 'velocity (m/s)1', 'velocity (m/s)2', 'velocity (m/s)3'
    # ]
    off_road_sensor = [
    'accel_x (counts)', 'accel_y (counts)', 'accel_z (counts)', 'calibrated_accel_x (g)', 'calibrated_accel_y (g)', 'calibrated_accel_z (g)', 'gyro_x (counts)', 'gyro_y (counts)', 'gyro_z (counts)', 'calibrated_gyro_x (deg/s)', 'calibrated_gyro_y (deg/s)', 'calibrated_gyro_z (deg/s)', 'mag_x (counts)', 'mag_y (counts)', 'mag_z (counts)'
    ]

    # 标准化传感器列
    df_off_road[off_road_sensor] = StandardScaler().fit_transform(df_off_road[off_road_sensor])

    config.senser_input_num = len(off_road_sensor)

    df_off_road['sensor'] = df_off_road[off_road_sensor].values.tolist()
    print("input dim:",len(off_road_sensor))
    
    # 筛选仅传感器信息
    # if config.sensor_only:
    #     df_tianhang.drop_duplicates(subset=['pic_key'],inplace=True,ignore_index=True)
    
    # debug 特判
    df=df_off_road
    if config.debug:
        df = df[:200]
    return df



create folds

In [6]:
def creat_folds(df):
    skf = StratifiedKFold(n_splits=config.n_fold, shuffle=True, random_state=config.seed)  
    for fold, (train_idx, val_idx) in enumerate(skf.split(df,df.filename)):
        df.loc[val_idx, 'fold'] = fold
    print(df.groupby(['fold'])['label'].count())   
    return df 


test


In [7]:
df = fetch_df(config.label_col)
df = creat_folds(df)

df.to_csv("test.csv",index=False)



input dim: 15
fold
0.0    40
1.0    40
2.0    40
3.0    40
4.0    40
Name: label, dtype: int64


In [8]:
print(df['tsm1_original'].value_counts())
print(df['tsm1_k2'].value_counts())
print(df['tsm1_k3'].value_counts())
print(df['tsm1_k4'].value_counts())

3.0    92
1.0    88
2.0    20
Name: tsm1_original, dtype: int64
1.0    112
0.0     88
Name: tsm1_k2, dtype: int64
2.0    92
0.0    88
1.0    20
Name: tsm1_k3, dtype: int64
3.0    92
0.0    68
1.0    20
2.0    20
Name: tsm1_k4, dtype: int64


# dataset


In [9]:
myTransforms = transforms.Compose([
    transforms.Resize((config.img_size,config.img_size)),
    transforms.ToTensor(),
    transforms.Normalize(
    mean=[0.3552, 0.3744, 0.3293],
    std=[0.2038, 0.2201, 0.2194],
)
])

def load_img(path):
    img =  Image.open(path).convert('RGB')
    img = myTransforms(img)
    return img

class BuildDataset(torch.utils.data.Dataset):
    def __init__(self, df, label=True, transforms=None):
        self.df         = df
        self.label      = label
        self.sensors = df['sensor'].tolist()
        self.img_paths  = df['image_path'].tolist()   
        if self.label:
            self.labels = df['label'].tolist()
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        img = load_img(img_path)
        sensor = self.sensors[index]
        sensor = torch.tensor(sensor).unsqueeze(0) #[1,n]
        if self.label:
            label = self.labels[index]
            return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float),torch.tensor(label).to(torch.float)
         

        else:
            return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float)


# dataloader

In [10]:
def fetch_dataloader(fold:int,df):
    train_df = df.query("fold!=@fold").reset_index(drop=True)

    valid_df = df.query("fold==@fold").reset_index(drop=True)

    df = fetch_df(config.label_col)


    print("train_df.shape:",train_df.shape)
    print("valid_df.shape:",valid_df.shape)

    train_data  = BuildDataset(df=train_df,label=True)
    valid_data = BuildDataset(df=valid_df,label=True)

    train_loader = DataLoader(train_data, batch_size=config.train_batch_size,shuffle=True)
    valid_loader = DataLoader(valid_data, batch_size=config.valid_batch_size,shuffle=True)
    # test_loader = DataLoader(test_data, batch_size=config.test_batch_size,shuffle=False)
    return train_loader,valid_loader,train_df

def fetch_dataloader_ubiquatous():
    train_df = pd.concat((fetch_df('soybean'),fetch_df('rice')),axis=0,join='inner').reset_index(drop=True)

    valid_df = fetch_df('corn').reset_index(drop=True)
    print("train_df.shape:",train_df.shape)
    print("valid_df.shape:",valid_df.shape)

    train_data  = BuildDataset(df=train_df,label=True)
    valid_data = BuildDataset(df=valid_df,label=True)

    train_loader = DataLoader(train_data, batch_size=config.train_batch_size,shuffle=True)
    valid_loader = DataLoader(valid_data, batch_size=config.valid_batch_size,shuffle=False)
    # test_loader = DataLoader(test_data, batch_size=config.test_batch_size,shuffle=False)
    return train_loader,valid_loader
def fetch_dataloader_ubiquatous_single_crop_test(fold:int,df,crop_name):#以一种作物为测试
    train_df = df.query("fold!=@fold").reset_index(drop=True)
    if crop_name == "soybean":
        valid_df = pd.read_csv("/home/junsheng/ViLT/data/ubiquitous_soybean.csv")
    elif crop_name == "corn":
        valid_df = pd.read_csv("/home/junsheng/ViLT/data/ubiquitous_corn.csv")
    elif crop_name == "rice":
        valid_df = pd.read_csv("/home/junsheng/ViLT/data/ubiquitous_rice.csv")
    print("train_df.shape:",train_df.shape)
    print("valid_df.shape:",valid_df.shape)

    train_data  = BuildDataset(df=train_df,label=True)
    valid_data = BuildDataset(df=valid_df,label=True)

    train_loader = DataLoader(train_data, batch_size=config.train_batch_size,shuffle=True)
    valid_loader = DataLoader(valid_data, batch_size=config.valid_batch_size,shuffle=False)
    # test_loader = DataLoader(test_data, batch_size=config.test_batch_size,shuffle=False)
    return train_loader,valid_loader

计算图像均值标准差

In [11]:
def get_mean_std_value(loader):
    '''
    求数据集的均值和标准差
    :param loader:
    :return:
    '''
    data_sum,data_squared_sum,num_batches = 0,0,0
       
    pbar = tqdm(enumerate(loader), total=len(loader), desc='caculating ')    
    # for data,sensor,label  in loader:
    for step,(data,sensor,label)  in pbar:
        # data: [batch_size,channels,height,width]
        # 计算dim=0,2,3维度的均值和，dim=1为通道数量，不用参与计算
        # data_sum += torch.mean(data,dim=[0,2,3])    # [batch_size,channels,height,width]
        data_sum += torch.mean(data,dim=[0,2,3])    # [batch_size,height,width,channels]
        # 计算dim=0,2,3维度的平方均值和，dim=1为通道数量，不用参与计算
        # data_squared_sum += torch.mean(data**2,dim=[0,2,3])  # [batch_size,channels,height,width]
        data_squared_sum += torch.mean(data**2,dim=[0,2,3])  # [batch_size,height,width,channels]
        # 统计batch的数量
        num_batches += 1

       
    # 计算均值
    mean = data_sum/num_batches
    # 计算标准差
    std = (data_squared_sum/num_batches - mean**2)**0.5
    return mean,std
# df = fetch_df(config.label_col)
# df = creat_folds(df)
# train_loader,_ = fetch_dataloader(fold=0,df=df)
# mean,std = get_mean_std_value(train_loader)
# print('mean = {},std = {}'.format(mean,std))

# model

## model build

In [12]:
import pretrainedmodels
from efficientnet_pytorch import EfficientNet
import models

from models.CNNTransformer import CNNTransformer
from models.RiceFusion import RiceFusion
from models.RiceTransformer import RiceTransformer
from models.ViST import *
from models.resnet import *
from models.vilt_ import *
from models.DNNF1 import *
from models.DNNF2 import *
from models.RiceFusionMLP import *
from models.RiceFusionCNN import *
from models.BilinearPooling import *
from models.CompactBilinearPoolingRMP import *
from models.CompactBilinearPoolingTSP import *
from models.SemanticEstimation import SemanticEstimation

def build_model(model_name: str,pre_train):
    if model_name[:6] == "resnet50":
        model = pretrainedmodels.__dict__[config.model_name](
            num_classes=1000, pretrained='imagenet')
        dim_feats = model.last_linear.in_features  # =2048
        nb_classes = 1
        model.last_linear = nn.Linear(dim_feats, nb_classes)
        return model
    if model_name == "se_resnet50":
        model = pretrainedmodels.__dict__[config.model_name](
            num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(204800, 1,bias=True)
        return model
    if model_name == "efficientnet-b4": # efficient net
        # refer:https://github.com/lukemelas/EfficientNet-PyTorch#example-classification
        nb_classes = 1
        if pre_train:
            model = EfficientNet.from_pretrained(config.model_name)# 'efficientnet-b4'
        else:
            model = EfficientNet.from_name(config.model_name)# 'efficientnet-b4'
        model._fc = nn.Linear(1792, nb_classes)
        return model
        
    if model_name == "ViST":
        model = ViST(sensor_class_n= config.senser_input_num,output_class_n = config.class_num,config=config)
        return model
    if model_name == "ViST2":
        model = ViST2(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
    if model_name == "sensorViST":
        model = sensorViST(sensor_class_n= config.senser_input_num,output_class_n = config.class_num,config=config)
        return model
    if model_name == "imageViST":
        model = imageViST(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
        
    if model_name == "sensorOnlyViLTransformerSS": #仅传感器
        model = sensorOnlyViLTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
    if model_name == "sensorViLOnlyTransformerSS": # 仅vit图像
        model = sensorViLOnlyTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
        
    if model_name == "sensorResnet50TransformerSS":
        model = sensorResnet50TransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
    if model_name == "sensorResnet101TransformerSS":
        model = sensorResnet101TransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model

    if model_name == "sensorViLTransformerSS":
        model = sensorViLTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model

    if model_name == "DNNF1":
        model = DNNF1(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF1PictureOnly":
        model = DNNF1PictureOnly(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF1SensorOnly":
        model = DNNF1SensorOnly(sensor_nums=config.senser_input_num,config=config)
        return model
        
    if model_name == "DNNF2":
        model = DNNF2(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF2PictureOnly":
        model = DNNF2PictureOnly(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF2SensorOnly":
        model = DNNF2SensorOnly(sensor_nums=config.senser_input_num,config=config)
        return model
    # RiceFusion对比模型
    if model_name == "RiceFusionMLP":
        model = RiceFusionMLP(sensor_nums=config.senser_input_num,config=config)
        return model

    if model_name == "RiceFusionCNN":
        model = RiceFusionCNN(config=config)
        return model
    
    if model_name == "RiceFusion":
        return RiceFusion(sensor_nums=config.senser_input_num,config=config)
    if model_name == "RiceTransformer":
        return RiceTransformer(sensor_nums=config.senser_input_num,config=config)
    if model_name == "CNNTransformer":
        return CNNTransformer(sensor_nums=config.senser_input_num,config=config)
    
    if model_name == "BilinearPooling":
        return BilinearPooling(sensor_nums=config.senser_input_num,config=config)
    
    if model_name == "CompactBilinearPoolingRMP":
        return CompactBilinearPoolingRMP(sensor_nums=config.senser_input_num,config=config)
    if model_name == "CompactBilinearPoolingTSP":
        return CompactBilinearPoolingTSP(sensor_nums=config.senser_input_num,config=config)
    if model_name == "SemanticEstimation":
        return SemanticEstimation(sensor_class_n=config.senser_input_num,output_class_n = config.class_num,config=config)
    raise Exception("模型未定义")
    

# 损失函数

In [13]:
weights = config.weights / config.weights.sum()

# criterion = nn.CrossEntropyLoss(weight=weights) #交叉熵损失函数
criterion = nn.CrossEntropyLoss(weight=weights) #交叉熵损失函数

criterion_mae = nn.L1Loss()
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error
def MAPE(y_true,y_pred):
    """计算MAPE误差，除数如果为0或者太小，则返回数值会很大

    Args:
        y_true (_type_): ground truth
        y_pred (_type_): 预测值

    Returns:
        _type_: _description_
    """
    return mean_absolute_percentage_error(y_true,y_pred)

def SMAPE(y_true, y_pred):
    """计算smape

    Args:
        y_true (torch.tensor): 真实值
        y_pred (torch.tensor): 预测值

    Returns:
        tensor: 一个数，如返回50，则表示50%
    """
    return 2.0 * torch.mean(torch.abs(y_pred - y_true) / (torch.abs(y_pred) + torch.abs(y_true))) * 100.0

def average_accuracy(predictions, targets, num_classes):
    """
    计算平均类别准确率。

    :param predictions: 模型给出的预测结果 (logits)，应该是浮点数类型。
    :param targets: 实际结果的标签。
    :param num_classes: 类别的数量。
    :return: 平均类别准确率。
    """
    predictions = predictions.float()  # 确保为浮点数类型
    # probs = F.softmax(predictions, dim=1)
    _, predicted_classes = torch.max(predictions, 1)
    
    class_accuracies = []
    
    for i in range(num_classes):
        class_targets = (targets == i)
        class_predictions = (predicted_classes == i)
        class_correct_samples = (class_predictions & class_targets).sum().item()
        class_total_samples = class_targets.sum().item()
        
        if class_total_samples > 0:
            class_accuracy = class_correct_samples / class_total_samples
            class_accuracies.append(class_accuracy)
        else:
            # 考虑到某些类别可能不出现在targets里面（即class_total_samples为0）
            # 为了不影响平均准确率的计算, 可以选择跳过这个类别或给予默认准确率值（例如0）
            # 这里选择跳过该类别
            continue
    
    # aa = sum(class_accuracies) / len(class_accuracies)
    aa = sum(class_accuracies) / num_classes
    return aa

def overall_accuracy(predictions, targets):
    """
    计算预测结果的总体准确率，基于最大概率。

    :param predictions: 模型给出的预测结果 (logits)。
    :param targets: 实际结果的标签。
    :return: 总体准确率。
    """
    predictions = predictions.float()
    # 使用 softmax 将 logits 转换为概率
    probs = F.softmax(predictions, dim=1)
    # 取概率最大值的索引，即预测的类别
    _, predicted_classes = torch.max(probs, 1)
    # 计算准确的样本数量
    correct_samples = (predicted_classes == targets).sum().item()
    # 计算样本总数
    total_samples = targets.size(0)
    # 计算整体准确率
    accuracy = correct_samples / total_samples
    return accuracy



计算精确率，召回率，F1分数

In [14]:
from sklearn.metrics import precision_recall_fscore_support
def precision_recall_fscore(y_true ,y_pred ):
    """average参数在precision_recall_fscore_support函数中用于决定如何计算多类别问题的精确率、召回率和F1分数。不同的average参数值会影响计算的结果。下面是一些可用的参数选项和它们的含义：
None：不做平均处理。返回每个类别的精确率、召回率和F1分数。
'binary'：只报告针对指定的正类的结果。在二分类任务中使用，或者在多分类任务中指定一个类别视为正类。
'micro'：通过先计算总的真正例、假正例和假负例的数量，然后计算精确率、召回率和F1分数。这种方法给每个样本和每个类别赋予相同的权重，即总体性能。
'macro'：分别为每个类别计算指标，然后计算它们的未加权平均值。这种方法对所有类别都赋予相等的权重，无论它们的支持（样本数量）如何。不考虑标签不平衡。
'weighted'：为每个类别计算指标，然后计算它们的平均值，使用每个类别的支持（出现的样本数量）作为权重。这意味着对于每个标签的分数乘以它在数据中的实际出现频率。这种方法适用于标签不平衡的情况。
'samples'：仅用于多标签分类问题。计算每个实例的指标，然后找到它们的平均值（每行只统计一次）。
选择哪个average参数取决于您的数据集特性和分析目标。例如，如果您关心所有类别的整体平均性能，那么'macro'可能是合适的；如果您的数据集类别非常不平衡，'weighted'会更合适，因为它考虑了每个类别的支持度；而'micro'通过累计整个数据集的性能，提供了总体的性能指标。在实际应用时，您可以根据任务的具体需求和数据集的特征来选择最合适的average参数。

    Args:
        targets (_type_): y_true
        predictions (_type_): y_prediction
    """
    y_pred_labels = torch.argmax(y_pred, dim=1)
    # 使用softmax将输出转换为概率分布，并取得概率最大的索引作为预测的类别
    y_pred_labels = torch.argmax(y_pred, dim=1)

    # 将PyTorch Tensors转换为NumPy数组
    y_pred_np = y_pred_labels.cpu().numpy()
    y_true_np = y_true.squeeze().cpu().numpy()  # 使用squeeze去除单维度条目
    # 计算精确率、召回率和F1分数
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true_np, y_pred_np, average='macro')
    return precision, recall, f1_score
    




## Kernel Mixture Loss

In [15]:
class KernelMixtureLoss(nn.Module):
    def __init__(self, num_mixtures):
        super(KernelMixtureLoss, self).__init__()
        self.num_mixtures = num_mixtures
        self.alpha = nn.Parameter(torch.ones(num_mixtures))
        self.tau = nn.Parameter(torch.ones(num_mixtures))

    def forward(self, q, k_plus, k_negatives):
        # Compute the exponential terms
        exp_terms = torch.exp(q * k_plus / self.tau.unsqueeze(1))
        
        # Compute the numerator and denominator
        numerator = torch.sum(self.alpha.unsqueeze(1) * exp_terms, dim=1)
        denominator = torch.sum(self.alpha.unsqueeze(1) * torch.exp(q * k_negatives / self.tau.unsqueeze(1)), dim=2)
        
        # Compute the loss
        loss = -torch.log(numerator / denominator).mean()
        return loss

In [16]:
kernel_mixture_loss = KernelMixtureLoss(10)



In [17]:
# # 示例数据
# predictions_example = torch.tensor([[1.0, 2.0, 0.0, 3.0], [4.0, 1.0, 2.0, 0.0]])
# targets_example = torch.tensor([3, 0])  # True labels
# num_classes_example = 4

# # 计算平均类别准确率
# average_acc = average_accuracy(predictions_example, targets_example, num_classes_example)
# print(f"Average Class Accuracy: {average_acc}")

# train one epoch

In [18]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    dataset_size = 0
    running_loss = 0.0
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ')
    for step, (img, sensor,label) in pbar:         
        # img = img.to(device, dtype=torch.float)
        # sensor  = sensor.to(device, dtype=torch.float)
        # label  = label.to(device, dtype=torch.float)
        batch_size = img.size(0)
        
        batch = {"image":img,"sensor":sensor}

        y_pred = model(batch)
        label = label.to(config.device)
        label = label.long()
        loss = criterion(y_pred['cls_output'], label)

        # print("output",y_pred['cls_output'])
        # print(f"Loss: {loss.item()}")
        #一坨优化
        optimizer.zero_grad()# 每一次反向传播之前都要归零梯度
        loss.backward()      # 反向传播
        
        # print("train eopch{}-step{}",epoch,step)
        # 监视特定层的梯度
        # specific_layers_gradients = {
        #     "transformer.blocks.0.norm1.bias": None,
        #     "transformer.blocks.0.attn.qkv.bias": None,
        #     "transformer.blocks.0.attn.proj.bias": None,
        #     "transformer.blocks.0.norm2.bias": None,
        #     "transformer.blocks.0.mlp.fc1.bias": None,
        #     "transformer.blocks.0.mlp.fc2.bias": None,
        # }
        # for name, param in model.named_parameters():
        #     if name in specific_layers_gradients:
        #         print(f"{name} requires_grad: {param.requires_grad}")
        # 通过model.named_parameters()方法获取模型参数及其名称
        # for name, parameter in model.named_parameters():
        #     if name in specific_layers_gradients:
        #         specific_layers_gradients[name] = parameter.grad

        # 打印获取到的梯度信息
        # for layer_name, gradient in specific_layers_gradients.items():
        #     print(f"Gradient for {layer_name}: {gradient}")
        optimizer.step()     #固定写法
        scheduler.step()
     
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        epoch_loss = running_loss / dataset_size
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(train_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_mem=f'{mem:0.2f} GB')
        
        wandb.log({"Training Loss": loss,
                "Training lr": scheduler.get_last_lr()[0]
                })

    
        
        
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss

# valid one epoch

In [19]:
from sklearn.metrics import precision_recall_fscore_support
import collections
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, optimizer):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    # 计算overall_accuracy
    predictions= torch.empty(0, config.class_num).to(config.device)
    targets = torch.empty(0).to(config.device)
    running_loss_mae = 0.0
    running_loss_smape = 0.0
    running_loss_mape = 0.0
    running_overall_accuracy = 0.0
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ')
    for step, (img, sensor,label) in pbar:               
        
        
        batch_size = img.size(0)
        batch = {"image":img,"sensor":sensor}

        y_pred  = model(batch)
        label = label.to(config.device)
        label = label.long()
        loss = criterion(y_pred['cls_output'], label)
        
        targets = torch.cat((targets, label), dim=0)
        predictions = torch.cat((predictions, y_pred['cls_output']), dim=0)

        running_overall_accuracy = overall_accuracy(y_pred['cls_output'], label)
        running_average_accuracy = average_accuracy(y_pred['cls_output'], label,config.class_num)
        # loss_mae = criterion_mae(y_pred['cls_output'], label)
        # loss_smape = SMAPE(label,y_pred['cls_output'])
        # loss_mape = MAPE(label.cpu(),y_pred['cls_output'].cpu())
        
        running_loss += (loss.item() * batch_size)
        # running_loss_mae += (loss_mae.item() * batch_size)
        # running_loss_smape += (loss_smape.item() * batch_size)
        # running_loss_mape += (loss_mape.item() * batch_size)

        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        # epoch_loss_mae = running_loss_mae / dataset_size
        # epoch_loss_smape = running_loss_smape / dataset_size
        # epoch_loss_mape = running_loss_mape / dataset_size
        # print("验证过程中日志记录")
        # print("当前loss:",epoch_loss)
        # print("当前overall_accuracy:",running_overall_accuracy)
        # print("当前average_accuracy:",running_average_accuracy)
        # print("当前targets",targets.cpu().tolist())
        # print("当前predictions",predictions.cpu().tolist())

        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(valid_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_memory=f'{mem:0.2f} GB',
                        overall_accuracy=f'{running_overall_accuracy:0.4f}',
                        average_accuracy=f'{running_average_accuracy:0.4f}',
        )
        # wandb.log({"Validing Loss": loss,
        #             "Validing OAcc": running_overall_accuracy,
        #             "Validing AAcc": running_average_accuracy,
        #         })
    torch.cuda.empty_cache()
    gc.collect()
    
    precision, recall, f1_score = precision_recall_fscore(targets,predictions)


    overall_acc = overall_accuracy(predictions,targets)
    average_acc = average_accuracy(predictions,targets,config.class_num)

    Metrics = collections.namedtuple('Metrics', ['val_loss', 'overall_acc', 'average_acc','precision', 'recall', 'f1_score'])
    metrics = Metrics(epoch_loss, overall_acc, average_acc, precision, recall, f1_score)
    return metrics


# train

In [20]:

def run_training(model, optimizer, scheduler, device, num_epochs,train_loader,valid_loader):
     # init wandb
    run = wandb.init(project=config.exp_name,
                    config={k: v for k, v in dict(vars(config)).items() if '__' not in k},
                    # config={k: v for k, v in dict(config).items() if '__' not in k},
                    anonymous=anonymous,
                    # name=f"vilt|fold-{config.valid_fold}",
                    name=config.wandb_name,
                    # group=config.wandb_group,
                    )
    wandb.watch(model, log_freq=100)

    best_loss = 9999
    best_valid_loss = 9999
    history = defaultdict(list)
    if torch.cuda.is_available():
        print("cuda: {}\n".format(torch.cuda.get_device_name()))
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        print(f'Epoch {epoch}/{num_epochs}', end='')
        train_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=device, epoch=epoch)
        # val_loss,overall_acc,average_acc = valid_one_epoch(model,valid_loader,device=device,optimizer=optimizer) # epoch_loss
        valid_metrics = valid_one_epoch(model,valid_loader,device=device,optimizer=optimizer) 
        history['Train Loss'].append(train_loss)
        history['Valid Loss'].append(valid_metrics.val_loss)
        history['Valid OAcc'].append(valid_metrics.overall_acc)
        history['Valid AAcc'].append(valid_metrics.average_acc)
        history['Valid Precision'].append(valid_metrics.precision)
        history['Valid recall'].append(valid_metrics.recall)
        history['Valid f1_score'].append(valid_metrics.f1_score)

        wandb.log({"Train Loss": train_loss,
                    "Valid Loss": valid_metrics.val_loss,
                    "Valid OAcc": valid_metrics.overall_acc,
                    "Valid AAcc": valid_metrics.average_acc,
                    "Valid Precision": valid_metrics.precision,
                    "Valid recall": valid_metrics.recall,
                    "Valid f1_score": valid_metrics.f1_score,
                    "lr": scheduler.get_last_lr()[0]
                })
        if best_valid_loss > valid_metrics.val_loss:
            best_valid_loss = valid_metrics.val_loss
            # model_file_path = os.path.join(wandb.run.dir,"epoch-{}-{}.bin".format(epoch,wandb.run.id))
            # model_file_path = os.path.join(wandb.run.dir,"epoch-best.bin")
            run.summary["Best Epoch"] = epoch
            # torch.save(model.state_dict(), model_file_path)
            # print("model save to", model_file_path)
               
    os.system("cp /home/junsheng/ViLT/my_vilt_total_off_road.ipynb {}".format(wandb.run.dir))
    run.finish()
    return model, history

run train

In [21]:
def run(label_col:str,model_name:str,wandb_name:str,sensor_only:bool):
    config.model_name = model_name
    config.wandb_name = wandb_name
    config.sensor_only = sensor_only

    df = fetch_df(label_col)
    df = creat_folds(df)
    kernel_mixture_loss = KernelMixtureLoss(10)
    
    train_loader,valid_loader = fetch_dataloader(fold=0,df=df)
    # train_loader,valid_loader = fetch_dataloader_ubiquatous()


    model = build_model(config.model_name,True)
    model.to(config.device)
    print(config.device)
    # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    optimizer =  optim.SGD(model.parameters(), lr=config.learning_rate,weight_decay=config.weight_decay,momentum=0.9)
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=config.T_max, 
                                                    eta_min=5e-6)
    model, history = run_training(model, optimizer, scheduler,device=config.device,num_epochs=config.max_epoch,train_loader=train_loader,valid_loader=valid_loader)

## tasks

In [22]:
vist_task_dict = {
    "tsm1_original":{
        "label_col":"tsm1_original",
        "model_name":"ViST",
        "wandb_name":"vist|tsm1_original|OffRoad",
        "sensor_only":False,
    
    },
    "tsm1_k2":{
        "label_col":"tsm1_k2",
        "model_name":"ViST",
        "wandb_name":"vist|tsm1_k2|OffRoad",
        "sensor_only":False,
        },
    "tsm1_k3":{
        "label_col":"tsm1_k3",
        "model_name":"ViST",
        "wandb_name":"vist|tsm1_k3|OffRoad",
        "sensor_only":False,
    
    },
    "tsm1_k4":{
        "label_col":"tsm1_k4",
        "model_name":"ViST",
        "wandb_name":"vist|tsm1_k4|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_original":{
        "label_col":"tsm2_original",
        "model_name":"ViST",
        "wandb_name":"vist|tsm2_original|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_k2":{
        "label_col":"tsm2_k2",
        "model_name":"ViST",
        "wandb_name":"vist|tsm2_k2|OffRoad",
        "sensor_only":False,
        },
    "tsm2_k3":{
        "label_col":"tsm2_k3",
        "model_name":"ViST",
        "wandb_name":"vist|tsm2_k3|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_k4":{
        "label_col":"tsm2_k4",
        "model_name":"ViST",
        "wandb_name":"vist|tsm2_k4|OffRoad",
        "sensor_only":False,
    
    }
}


In [23]:
semantic_task_dict = {  
    "tsm1_original":{
        "label_col":"tsm1_original",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm1_original|OffRoad",
        "sensor_only":False,
    
    },
    "tsm1_k2":{
        "label_col":"tsm1_k2",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm1_k2|OffRoad",
        "sensor_only":False,
        },
    "tsm1_k3":{
        "label_col":"tsm1_k3",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm1_k3|OffRoad",
        "sensor_only":False,
    
    },
    "tsm1_k4":{
        "label_col":"tsm1_k4",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm1_k4|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_original":{
        "label_col":"tsm2_original",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm2_original|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_k2":{
        "label_col":"tsm2_k2",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm2_k2|OffRoad",
        "sensor_only":False,
        },
    "tsm2_k3":{
        "label_col":"tsm2_k3",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm2_k3|OffRoad",
        "sensor_only":False,
    
    },
    "tsm2_k4":{
        "label_col":"tsm2_k4",
        "model_name":"SemanticEstimation",
        "wandb_name":"Semantic|tsm2_k4|OffRoad",
        "sensor_only":False,
    
    },
    "sensorViST":{
        "label_col":"tsm1_k2",
        "model_name":"sensorViST",
        "wandb_name":"sensorViST|tsm2_k2|OffRoad",
        "sensor_only":True,
    
    }
}

run task


In [24]:
# 测试仅传感器
# task = semantic_task_dict["sensorViST"]
# config.class_num = 2
# config.max_epoch = 10
# config.learning_rate = 0.001
# run(task["label_col"],task["model_name"],task["wandb_name"],task["sensor_only"])

In [25]:


config.class_num = 3
config.learning_rate = 0.0001
task = vist_task_dict["tsm1_k3"]
run(task["label_col"],task["model_name"],task["wandb_name"],task["sensor_only"])

task = vist_task_dict["tsm2_k3"]
run(task["label_col"],task["model_name"],task["wandb_name"],task["sensor_only"])




input dim: 15
fold
0.0    40
1.0    40
2.0    40
3.0    40
4.0    40
Name: label, dtype: int64
input dim: 15
train_df.shape: (160, 40)
valid_df.shape: (40, 40)


ValueError: too many values to unpack (expected 2)

#  test


In [None]:
def test():
    model = build_model("ViST",True)
    state_dict = torch.load('/home/junsheng/ViLT/wandb/run-20230111_141431-tb52bngc/files/epoch-best.bin')
    model.load_state_dict(state_dict)
    model.to(config.device)
    test_df = pd.read_csv("/home/junsheng/ViLT/data/ubiquitous_soybean.csv")
    tianhang_sensor = ['co2', 'stemp', 'stemp2', 'stemp3', 'stemp5', 'shumi', 'shumi2', 'shumi3', 'shumi5', 'humi', 'pm10', 'pm25', 'press', 'solar', 'temp', 'wind_d', 'wind_sp']
    test_df['sensor'] = test_df[tianhang_sensor].values.tolist()
    test_data = BuildDataset(df=test_df,label=True)
    test_loader = DataLoader(test_data, batch_size=config.valid_batch_size,shuffle=False)
    val_loss,val_loss_mae,val_loss_smape,val_loss_mape = valid_one_epoch(model,test_loader,device=config.device) # # epoch_loss,epoch_loss_mae,epoch_loss_smape,epoch_loss_mape