# import

In [78]:
import torch
import torch.nn as nn
from vilt.modules import heads, objectives
import vilt.modules.vision_transformer as vit
import torch.nn.functional as F
import random
from typing import OrderedDict
import os
import pandas as pd
import numpy as np
from vilt.transforms import pixelbert_transform
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
from torch.utils.data import DataLoader
import gc
import torch.optim as optim
from torch.optim import lr_scheduler
from collections import defaultdict
import wandb
import pretrainedmodels
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold

from models import DNNF1,DNNF2,resnet,vilt

# config

In [79]:


class config:
    debug = False
    sensor_only = False

    exp_name = "vilt"
    seed = 101
    batch_size = 4096  # this is a desired batch size; pl trainer will accumulate gradients when per step batch is smaller.
    train_batch_size = 32
    valid_batch_size = 4
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # root_path = r'E:\\Download\\xiangguan' # 存放数据的根目录
    root_path = r'/home/junsheng/data/xiangguan' # 存放数据的根目录
    n_fold = 5

    #ViLT
    # model_name = "sensorViLOnlyTransformerSS" #仅vilt图像
    # model_name = "sensorOnlyViLTransformerSS"  #仅vilt传感器
    model_name = "sensorViLTransformerSS"  #vilt图像+传感器

    # model_name = "DNNF1"  #DNNF1图像＋传感器
    # model_name = "DNNF1PictureOnly"  #DNNF1仅图像
    # model_name = "DNNF1SensorOnly"  #DNNF1仅图像

    # model_name = "DNNF2"  #DNNF2图像＋传感器
    # model_name = "DNNF2PictureOnly"  #DNNF2仅图像
    # model_name = "DNNF2SensorOnly"  #DNNF2仅传感器

    # wandb 
    wandb_name = "vilt|水稻|290图像加传感器"
    # wandb_name = "vilt|水稻|290仅传感器"
    # wandb_name = "vilt|水稻|290仅图像"

    # wandb_name = "DNNF1|水稻|290图像加传感器"
    # wandb_name = "DNNF1|水稻|290仅图像"
    # wandb_name = "DNNF1|水稻|290仅传感器"
    
    # wandb_name = "DNNF2|水稻|290图像加传感器"
    # wandb_name = "DNNF2|水稻|290仅图像"
    # wandb_name = "DNNF2|水稻|290仅传感器"
    # 
    # Image setting
    train_transform_keys = ["pixelbert"]
    val_transform_keys = ["pixelbert"]
    img_size = 384
    max_image_len = -1
    patch_size = 32
    draw_false_image = 1
    image_only = False

    # Sensor
    # senser_input_num = 11 # 翔冠的传感器参数
    senser_input_num = 19 # 天航的传感器参数
    
    # Text Setting
    vqav2_label_size = 3129
    max_text_len = 40
    tokenizer = "bert-base-uncased"
    vocab_size = 30522 # vocabulary词汇数量
    whole_word_masking = False
    mlm_prob = 0.15
    draw_false_text = 0

    # Transformer Setting
    vit = "vit_base_patch32_384"
    hidden_size = 768  # 嵌入向量大小
    num_heads = 12
    num_layers = 12
    mlp_ratio = 4
    drop_rate = 0.1

    # Optimizer Setting
    optim_type = "adamw"
    learning_rate = 1e-4#1e-3 #0.0015#2e-3 #
    weight_decay = 1e-4 # 0.01 ->1e-4
    decay_power = 1
    max_epoch = 50
    # T_max = 8000/train_batch_size*max_epoch 
    T_max = 1000/train_batch_size*max_epoch 

    # Downstream Setting
    get_recall_metric = False


    # below params varies with the environment
    data_root = ""
    log_dir = "result"
    per_gpu_batchsize = 0  # you should define this manually with per_gpu_batch_size=#
    num_gpus = 1
    num_nodes = 1
    load_path = "weights/vilt_200k_mlm_itm.ckpt"
    # load_path = "save_model_dict.pt"
    num_workers = 1
    precision = 16

# config = vars(config)
# config = dict(config)
config

if config.debug:
    config.max_epoch = 5

In [80]:
config.device

device(type='cuda', index=0)

In [81]:
def setup_seed(seed):

    torch.manual_seed(seed)  # 为CPU设置随机种子
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    # torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed(seed)  # 为当前GPU设置随机种子
    torch.cuda.manual_seed_all(seed)  # 为所有GPU设置随机种子
    #os.environ['PYTHONHASHSEED'] = str(seed)
    
setup_seed(config.seed)

# wandb

In [82]:
# os.environ["WANDB_MODE"] = 'dryrun' # 离线模式
# os.environ["WANDB_MODE"] = 'online' # 在线模式
if config.debug:
    os.environ["WANDB_MODE"] = 'dryrun' # 离线模式
try:
    # wandb.log(key="*******") # if debug
    wandb.login() # storage in ~/.netrc file
    anonymous = None
except:
    anonymous = "must"
    print('\nGet your W&B access token from here: https://wandb.ai/authorize\n')


# 数据

In [83]:
df_tianhang = pd.read_csv("/home/junsheng/ViLT/data/290-tianhang-rice.csv")
df_tianhang['image_path'] = df_tianhang['pic_key'].map(lambda x:os.path.join('/home/junsheng/data/tianhang_rice',x.split('/')[-1]))
df_tianhang['label'] = df_tianhang['LAI']
df_tianhang = df_tianhang.dropna()
df_tianhang = df_tianhang.reset_index()
df_tianhang.head()

Unnamed: 0,index,pic_key,date_hour,date,co2,stemp,stemp2,stemp3,stemp4,stemp5,...,pm10,pm25,press,solar,temp,wind_d,wind_sp,LAI,image_path,label
0,282,/789/1655496854_1655496673_4.jpg,2022-06-18 04,2022-06-18,624.0,19.8,19.6,19.8,19.3,19.1,...,6.0,6.0,991.1,2.52,17.26,274.3,3.75,1.626667,/home/junsheng/data/tianhang_rice/1655496854_1...,1.626667
1,283,/789/1655496854_1655496673_4.jpg,2022-06-18 04,2022-06-18,624.0,19.8,19.6,19.8,19.3,19.1,...,7.0,7.0,991.2,5.93,17.18,268.7,2.67,1.626667,/home/junsheng/data/tianhang_rice/1655496854_1...,1.626667
2,284,/789/1655504090_1655503874_4.jpg,2022-06-18 06,2022-06-18,617.0,19.5,19.5,19.6,19.1,19.0,...,5.0,5.0,991.9,8.84,17.75,248.6,2.07,1.626667,/home/junsheng/data/tianhang_rice/1655504090_1...,1.626667
3,285,/789/1655504090_1655503874_4.jpg,2022-06-18 06,2022-06-18,617.0,19.5,19.4,19.5,19.1,19.0,...,3.0,3.0,992.0,9.2,17.83,265.7,2.95,1.626667,/home/junsheng/data/tianhang_rice/1655504090_1...,1.626667
4,286,/789/1655511249_1655511073_4.jpg,2022-06-18 08,2022-06-18,604.0,19.3,19.2,19.4,19.1,18.9,...,1.0,1.0,992.6,17.75,18.98,275.4,3.62,1.626667,/home/junsheng/data/tianhang_rice/1655511249_1...,1.626667


数据检查

In [84]:
# 检查图片下载的全不全
# pic = df_tianhang.image_path.map(lambda x:x.split('/')[-1]).unique()
# len(pic)
# file_ls = os.listdir("/home/junsheng/data/tianhang_rice")
# len(file_ls)
# ret = list(set(pic) ^ set(file_ls))
# ret #差集
# assert len(pic)==len(file_ls),"请检查下载的图片，缺了{}个".format(len(pic)-len(file_ls))


归一化非object列

In [85]:
list(df_tianhang)

['index',
 'pic_key',
 'date_hour',
 'date',
 'co2',
 'stemp',
 'stemp2',
 'stemp3',
 'stemp4',
 'stemp5',
 'shumi',
 'shumi2',
 'shumi3',
 'shumi4',
 'shumi5',
 'ts',
 'insert_time',
 'humi',
 'pm10',
 'pm25',
 'press',
 'solar',
 'temp',
 'wind_d',
 'wind_sp',
 'LAI',
 'image_path',
 'label']

In [86]:
number_title = []
recorder = {}
for title in df_tianhang:
    # print(df_xiangguan[title].head())
    if title == 'raw_label':
        continue
    if df_tianhang[title].dtype != "object":
        
        number_title.append(title)
        x_min = df_tianhang[title].min()
        x_max = df_tianhang[title].max()
        # print(x_min,x_max)
        recorder[title] = (x_min,x_max)
        df_tianhang[title] = df_tianhang[title].map(lambda x:(x-x_min)/(x_max - x_min))
number_title
recorder

{'index': (282, 1493),
 'co2': (0.0, 1175.0),
 'stemp': (13.3, 24.0),
 'stemp2': (14.1, 22.8),
 'stemp3': (14.1, 23.2),
 'stemp4': (14.3, 22.3),
 'stemp5': (14.7, 21.8),
 'shumi': (73.9, 76.9),
 'shumi2': (70.4, 74.8),
 'shumi3': (67.5, 69.2),
 'shumi4': (72.2, 74.2),
 'shumi5': (69.8, 71.8),
 'humi': (31.0, 100.0),
 'pm10': (0.0, 1333.0),
 'pm25': (0.0, 1333.0),
 'press': (981.1, 1009.0),
 'solar': (0.0, 200.0),
 'temp': (7.39, 32.0),
 'wind_d': (0.0, 359.8),
 'wind_sp': (0.0, 9.41),
 'LAI': (1.3458333333333334, 2.2466666666666666),
 'label': (1.3458333333333334, 2.2466666666666666)}

In [87]:
# xiangguan_sensor = ['temperature', 'humidity', 'illuminance', 'soil_temperature', 'soil_humidity', 'pressure', 'wind_speed', 'photosynthetic', 'sun_exposure_time', 'COz', 'soil_ph']
tianhang_sensor = ['co2', 'stemp', 'stemp2', 'stemp3', 'stemp4', 'stemp5', 'shumi', 'shumi2', 'shumi3', 'shumi4', 'shumi5', 'humi', 'pm10', 'pm25', 'press', 'solar', 'temp', 'wind_d', 'wind_sp']

df_tianhang['sensor'] = df_tianhang[tianhang_sensor].values.tolist()
print("input dim:",len(tianhang_sensor))

input dim: 19


仅传感器？

In [88]:
df_tianhang.shape

(1082, 29)

In [89]:
if config.sensor_only:
# del df_tianhang['pic_key']
    df_tianhang.drop_duplicates(subset=['pic_key'],inplace=True,ignore_index=True)


In [90]:
print("*********************df shape:",df_tianhang.shape)

*********************df shape: (541, 29)


In [91]:
df=df_tianhang
if config.debug:
    df = df[:100]
df.shape

(541, 29)

create folds

In [92]:
skf = StratifiedKFold(n_splits=config.n_fold, shuffle=True, random_state=config.seed)  
for fold, (train_idx, val_idx) in enumerate(skf.split(df,df.date)):
    df.loc[val_idx, 'fold'] = fold
df.groupby(['fold'])['label'].count()# ???



fold
0.0    109
1.0    108
2.0    108
3.0    108
4.0    108
Name: label, dtype: int64

In [93]:
df.head()
df.to_csv("test_fold.csv",index=False)

# dataset


In [94]:
myTransforms = transforms.Compose([
    transforms.Resize((config.img_size,config.img_size)),
    transforms.ToTensor(), 
    transforms.Normalize(
    mean=[0.4452, 0.5014, 0.4292],
    std=[0.1973, 0.2035, 0.1830],
    
)
])

def load_img(path):
    img =  Image.open(path).convert('RGB')
    img = myTransforms(img)
    return img

class BuildDataset(torch.utils.data.Dataset):
    def __init__(self, df, label=True, transforms=None):
        self.df         = df
        self.label      = label
        self.sensors = df['sensor'].tolist()
        self.img_paths  = df['image_path'].tolist()   
        if self.label:
            self.labels = df['label'].tolist()
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        img = load_img(img_path)
        sensor = self.sensors[index]
        sensor = torch.tensor(sensor).unsqueeze(0) #[1,n]
        if self.label:
            label = self.labels[index]
            return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float),torch.tensor(label).to(torch.float)
        else:
            return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float)

# dataloader

In [95]:
def fetch_dataloader(fold:int):
    train_df = df.query("fold!=@fold").reset_index(drop=True)

    valid_df = df.query("fold==@fold").reset_index(drop=True)
    print("train_df.shape:",train_df.shape)
    print("valid_df.shape:",valid_df.shape)

    train_data  = BuildDataset(df=train_df,label=True)
    valid_data = BuildDataset(df=valid_df,label=True)

    train_loader = DataLoader(train_data, batch_size=config.train_batch_size,shuffle=True)
    valid_loader = DataLoader(valid_data, batch_size=config.valid_batch_size,shuffle=False)
    # test_loader = DataLoader(test_data, batch_size=config.test_batch_size,shuffle=False)
    return train_loader,valid_loader


In [96]:
# train_dataset = BuildDataset(df=df)
# train_loader = DataLoader(train_dataset, batch_size=config.train_batch_size,shuffle=True)
# valid_loader = DataLoader(train_dataset, batch_size=config.valid_batch_size,shuffle=True)
train_loader,valid_loader = fetch_dataloader(fold=0)


train_df.shape: (432, 30)
valid_df.shape: (109, 30)


In [97]:
img,sensor,label = next(iter(train_loader))

  return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float),torch.tensor(label).to(torch.float)


计算图像均值标准差

In [98]:
def get_mean_std_value(loader):
    '''
    求数据集的均值和标准差
    :param loader:
    :return:
    '''
    data_sum,data_squared_sum,num_batches = 0,0,0
    
    for data,sensor,label  in loader:
        # data: [batch_size,channels,height,width]
        # 计算dim=0,2,3维度的均值和，dim=1为通道数量，不用参与计算
        # data_sum += torch.mean(data,dim=[0,2,3])    # [batch_size,channels,height,width]
        data_sum += torch.mean(data,dim=[0,2,3])    # [batch_size,height,width,channels]
        # 计算dim=0,2,3维度的平方均值和，dim=1为通道数量，不用参与计算
        # data_squared_sum += torch.mean(data**2,dim=[0,2,3])  # [batch_size,channels,height,width]
        data_squared_sum += torch.mean(data**2,dim=[0,2,3])  # [batch_size,height,width,channels]
        # 统计batch的数量
        num_batches += 1

       
    # 计算均值
    mean = data_sum/num_batches
    # 计算标准差
    std = (data_squared_sum/num_batches - mean**2)**0.5
    return mean,std
# mean,std = get_mean_std_value(train_loader)
# print('mean = {},std = {}'.format(mean,std))

  return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float),torch.tensor(label).to(torch.float)


mean = tensor([0.4452, 0.5014, 0.4292]),std = tensor([0.1973, 0.2035, 0.1830])


# model

## model build

In [60]:
import pretrainedmodels
from efficientnet_pytorch import EfficientNet

def build_model(model_name: str,pre_train):
    if model_name[:6] == "resnet50":
        model = pretrainedmodels.__dict__[config.model_name](
            num_classes=1000, pretrained='imagenet')
        dim_feats = model.last_linear.in_features  # =2048
        nb_classes = 1
        model.last_linear = nn.Linear(dim_feats, nb_classes)
        return model
    if model_name == "se_resnet50":
        model = pretrainedmodels.__dict__[config.model_name](
            num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(204800, 1,bias=True)
        return model
    if model_name == "efficientnet-b4": # efficient net
        # refer:https://github.com/lukemelas/EfficientNet-PyTorch#example-classification
        nb_classes = 1
        if pre_train:
            model = EfficientNet.from_pretrained(config.model_name)# 'efficientnet-b4'
        else:
            model = EfficientNet.from_name(config.model_name)# 'efficientnet-b4'
        model._fc = nn.Linear(1792, nb_classes)
        return model

    if model_name == "sensorOnlyViLTransformerSS": #仅传感器
        model = vilt.sensorOnlyViLTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
    if model_name == "sensorViLOnlyTransformerSS": # 仅vit图像
        model = vilt.sensorViLOnlyTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
        
    if model_name == "sensorResnet50TransformerSS":
        model = resnet.sensorResnet50TransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model
    if model_name == "sensorResnet101TransformerSS":
        model = resnet.sensorResnet101TransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model

    if model_name == "sensorViLTransformerSS":
        model = vilt.sensorViLTransformerSS(sensor_class_n= config.senser_input_num,output_class_n = 1,config=config)
        return model

    if model_name == "DNNF1":
        model = DNNF1.DNNF1(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF1PictureOnly":
        model = DNNF1.DNNF1PictureOnly(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF1SensorOnly":
        model = DNNF1.DNNF1SensorOnly(sensor_nums=config.senser_input_num,config=config)
        return model
        
    if model_name == "DNNF2":
        model = DNNF2.DNNF2(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF2PictureOnly":
        model = DNNF2.DNNF2PictureOnly(sensor_nums=config.senser_input_num,config=config)
        return model
    if model_name == "DNNF2SensorOnly":
        model = DNNF2.DNNF2SensorOnly(sensor_nums=config.senser_input_num,config=config)
        return model

model = build_model(config.model_name,True)
model.to(config.device)
print(config.device)
# for i,m in enumerate(model.modules()):
#     print(i,m)

No pretrained weights exist or were found for this model. Using random initialization.


cuda:0
0 sensorOnlyViLTransformerSS(
  (sensor_linear): Linear(in_features=19, out_features=768, bias=True)
  (token_type_embeddings): Embedding(2, 768)
  (transformer): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
    )
    (pos_drop): Dropout(p=0.1, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.1, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate=none)
          (fc2): Linear(in

test

In [61]:

# sensor = torch.rand(config.senser_input_num)
# # sensor = torch.ones(config.senser_input_num)
# print(sensor)
# sensor =  torch.tensor(sensor).unsqueeze(0).unsqueeze(0) # torch.Size([1, 1, 3])
# batch = {}
# batch['sensor'] = sensor
# batch['image'] = "/home/junsheng/data/xiangguan/pic/xiangguanD4-2021-05-24-10-00-25.jpeg"
# model(batch)

# 损失函数

In [62]:
criterion = F.mse_loss #均方误差损失函数
criterion_mae = nn.L1Loss()


# train one epoch

In [63]:



def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    dataset_size = 0
    running_loss = 0.0
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ')
    for step, (img, sensor,label) in pbar:         
        # img = img.to(device, dtype=torch.float)
        # sensor  = sensor.to(device, dtype=torch.float)
        # label  = label.to(device, dtype=torch.float)
        batch_size = img.size(0)
        
        batch = {"image":img,"sensor":sensor}

        y_pred = model(batch)
        label = label.to(config.device).unsqueeze(1)
        loss = criterion(y_pred['cls_output'], label)
        
        #一坨优化
        optimizer.zero_grad()#每一次反向传播之前都要归零梯度
        loss.backward()      #反向传播
        optimizer.step()     #固定写法
        scheduler.step()
     
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        epoch_loss = running_loss / dataset_size
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(epoch=f'{epoch}',train_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_mem=f'{mem:0.2f} GB')

    
        
        
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss

# valid one epoch

In [64]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, optimizer):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    running_loss_mae = 0.0
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ')
    for step, (img, sensor,label) in pbar:               
        
        
        batch_size = img.size(0)
        batch = {"image":img,"sensor":sensor}

        y_pred  = model(batch)
        label = label.to(config.device).unsqueeze(1)

        loss = criterion(y_pred['cls_output'], label)
        loss_mae = criterion_mae(y_pred['cls_output'], label)

        running_loss += (loss.item() * batch_size)
        running_loss_mae += (loss_mae.item() * batch_size)

        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        epoch_loss_mae = running_loss_mae / dataset_size
        
        
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(valid_loss=f'{epoch_loss:0.4f}',
        valid_loss_mae=f'{epoch_loss_mae:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_memory=f'{mem:0.2f} GB')
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss,epoch_loss_mae#MSE，MAE

# train

In [65]:

def run_training(model, optimizer, scheduler, device, num_epochs):
     # init wandb
    run = wandb.init(project="vilt",
                    config={k: v for k, v in dict(vars(config)).items() if '__' not in k},
                    # config={k: v for k, v in dict(config).items() if '__' not in k},
                    anonymous=anonymous,
                    # name=f"vilt|fold-{config.valid_fold}",
                    name=config.wandb_name,
                    # group=config.wandb_group,
                    )
    wandb.watch(model, log_freq=100)

    best_loss = 9999
    best_valid_loss = 9999
    history = defaultdict(list)
    if torch.cuda.is_available():
        print("cuda: {}\n".format(torch.cuda.get_device_name()))
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        print(f'Epoch {epoch}/{num_epochs}', end='')
        train_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=device, epoch=epoch)
        val_loss,val_loss_mae = valid_one_epoch(model,valid_loader,device=device,optimizer=optimizer)
        history['Train Loss'].append(train_loss)
        history['Valid Loss'].append(val_loss)
        history['Valid Loss MAE'].append(val_loss_mae)

        wandb.log({"Train Loss": train_loss,
                    "Valid Loss": val_loss,
                    "Valid Loss MAE": val_loss_mae,
                "lr": scheduler.get_last_lr()[0]
                })
                
        if best_valid_loss > val_loss:
            best_valid_loss = val_loss
            # 保存模型
            # model_file_path = os.path.join(wandb.run.dir,"epoch-{}-{}.bin".format(epoch,wandb.run.id))
            # model_file_path = os.path.join(wandb.run.dir,"epoch-best.bin")
            # run.summary["Best Epoch"] = epoch
            # torch.save(model.state_dict(), model_file_path)
            # print("model save to", model_file_path)
            
    os.system("cp /home/junsheng/ViLT/my_vilt_tianhang_rice.ipynb {}".format(wandb.run.dir))
    run.finish()
    return model, history

optimizer

In [66]:
optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=config.T_max, 
                                                   eta_min=1e-5)


run train

In [67]:

model, history = run_training(model, optimizer, scheduler,device=config.device,num_epochs=config.max_epoch)



cuda: NVIDIA GeForce RTX 3090

Epoch 1/5

  return torch.tensor(img).to(torch.float), torch.tensor(sensor).to(torch.float),torch.tensor(label).to(torch.float)
Train : 100%|██████████| 3/3 [00:05<00:00,  1.71s/it, gpu_mem=1.60 GB, lr=0.00100, train_loss=0.1407]
Valid : 100%|██████████| 5/5 [00:01<00:00,  4.46it/s, gpu_memory=1.58 GB, lr=0.00100, valid_loss=0.3857, valid_loss_mae=0.6196]


Epoch 2/5

Train : 100%|██████████| 3/3 [00:04<00:00,  1.52s/it, gpu_mem=1.60 GB, lr=0.00100, train_loss=0.3325]
Valid : 100%|██████████| 5/5 [00:01<00:00,  4.51it/s, gpu_memory=1.58 GB, lr=0.00100, valid_loss=0.1463, valid_loss_mae=0.3801]


Epoch 3/5

Train : 100%|██████████| 3/3 [00:04<00:00,  1.53s/it, gpu_mem=1.60 GB, lr=0.00100, train_loss=0.1932]
Valid : 100%|██████████| 5/5 [00:01<00:00,  4.45it/s, gpu_memory=1.58 GB, lr=0.00100, valid_loss=0.1464, valid_loss_mae=0.3802]


Epoch 4/5

Train : 100%|██████████| 3/3 [00:04<00:00,  1.50s/it, gpu_mem=1.60 GB, lr=0.00100, train_loss=0.1961]
Valid : 100%|██████████| 5/5 [00:01<00:00,  4.51it/s, gpu_memory=1.58 GB, lr=0.00100, valid_loss=0.0044, valid_loss_mae=0.0595]


Epoch 5/5

Train : 100%|██████████| 3/3 [00:04<00:00,  1.58s/it, gpu_mem=1.60 GB, lr=0.00100, train_loss=0.1719]
Valid : 100%|██████████| 5/5 [00:01<00:00,  4.23it/s, gpu_memory=1.58 GB, lr=0.00100, valid_loss=0.0855, valid_loss_mae=0.2910]


0,1
Train Loss,▁█▃▃▂
Valid Loss,█▄▄▁▂
Valid Loss MAE,█▅▅▁▄
lr,█▇▆▄▁

0,1
Train Loss,0.17187
Valid Loss,0.08554
Valid Loss MAE,0.291
lr,0.001


# infer

In [68]:
# for (img,sensor,label) in valid_loader:
#     print(img.shape,sensor.shape,label)
#     break

In [69]:
# torch.save(model.state_dict(), 'embedding_test_dict.pt')
# print(model)

# model.load_state_dict(torch.load("/home/junsheng/ViLT/wandb/offline-run-20220811_120519-nzfb1xoz/files/epoch-best.bin"))
# model.eval()
# device = config.device
# model.to(device)
# def infer(img_filename, sensor):
#     try:
#         img_path = os.path.join('pictures',img_filename)
#         image = Image.open(img_path).convert("RGB")
#         img = pixelbert_transform(size=384)(image) # 将图像数据归一化torch.Size([3, 384, 576])
#         img = torch.tensor(img)
#         img = torch.unsqueeze(img, 0) # torch.Size([1, 3, 384, 576])
#         img = img.to(device)
#         print("img.shape:",img.shape)
#     except :
#         print("图片加载失败！")
#         raise

#     batch = dict()
#     batch["image"] = img

#     batch['sensor_masks'] = torch.ones(1,1).to(device)
#     with torch.no_grad():
#         batch['sensor'] = sensor.to(device)       
#         infer = model(batch)

#         print(infer)
#         sensor_emb, img_emb = infer["sensor_feats"], infer["image_feats"]# torch.Size([1, 23, 768]) torch.Size([1, 217, 768])
#         cls_output = infer['cls_output']
        

#     return [cls_output]


random test

In [None]:

# examples=[
#             "/home/junsheng/data/xiangguan/pic/xiangguanD4-2021-05-24-10-00-25.jpeg", #0
            
#             "/home/junsheng/data/xiangguan/pic/xiangguanD4-2021-07-18-04-22-30-preset-18.jpeg", # 3
#     ]



# n = 1
# sensor = torch.rand(config.senser_input_num)
# # sensor = torch.ones(config.senser_input_num)
# print(sensor)
# sensor =  torch.tensor(sensor).unsqueeze(0).unsqueeze(0) # torch.Size([1, 1, 3])
# out = infer(examples[0],sensor)
# print("out:",out,"000\n")
# print("out0.shape:",out[0].shape)
# cv2.imwrite('output.png',out[0])



In [None]:
# print(out[0].cpu().numpy()[0][0])
#0.00031266143

test by valid

选择三组生长期不同的数据去验证训练的结果

In [None]:
# df_test = df.query("fold==0").reset_index(drop=True)
# df_test.to_csv("test_by_valid.csv",index=False)
# sensor_test_list = df_test.sensor.tolist()
# image_test_list = df_test.image_path.tolist()

In [None]:
# idx = 64
# sensor =  torch.tensor(sensor_test_list[idx]).unsqueeze(0).unsqueeze(0)
# out = infer(image_test_list[idx],sensor)

In [None]:
# idx = 876
# sensor =  torch.tensor(sensor_test_list[idx]).unsqueeze(0).unsqueeze(0)
# out = infer(image_test_list[idx],sensor)

In [None]:
# idx = 1817
# sensor =  torch.tensor(sensor_test_list[idx]).unsqueeze(0).unsqueeze(0)
# out = infer(image_test_list[idx],sensor)