In [None]:
from avcv.all import *
import torch

In [None]:
class Label:
    def __init__(self, label, video):
        self.label = label
        self.video = video
        
    def check_action_at_frame_idx(self, i):
        if isinstance(i, int):
            i = i/self.video.fps
            
        actions = []
        for action in self.label['annotation']['actionAnnotationList']:
            if i >= action['start'] and i < action['end']:
                action_idx = action['action']
                action_name = self.actionid2name[action_idx]
                actions.append(action_name)
        return actions
    @property
    def actionid2name(self):
        if hasattr(self, '_actionid2name'):
            return self._actionid2name
        ret = dict()
        for actionLabel in self.label['config']['actionLabelData']:
            ret[actionLabel['id']] = actionLabel['name']
        self._actionid2name = ret
        return ret

In [None]:
def get_data(label_path, label_type='food'):
    if label_type == 'food':
        video_name = get_name(label_path)
        pred_json_path = f'/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/{video_name}/annotations/pred_mb2_face_food.json'
        root_video_name = '_'.join(video_name.split('_')[:-2])
        video_path = label_path.replace('.json', '.mp4')
        raw_feat_path = pred_json_path.replace('.json', '_2_raw_outputs.pkl')
    elif label_type == 'phone/cigarret':
        pred_json_path = f'/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/{video_name}/annotations/pred_mb2_face_food.json'
        t = 'smoking' if 'smoking' in video_name else 'mobile_usage'
        label_path = f'/data/DMS_Behavior_Detection/mobile_cigarret_foreignerUS/training/yoon/{t}/{video_name}.json'
        video_path = label_path.replace('.json', '.mp4')
        raw_feat_path = pred_json_path.replace('.json', '_2_raw_outputs.pkl')
    else:
        raise NotImplementedError
    assert osp.exists(pred_json_path), f'404 {pred_json_path}'
    assert osp.exists(label_path), f'404 {label_path}'
    assert osp.exists(video_path), f'404 {video_path}'
    assert osp.exists(raw_feat_path), f'404 {raw_feat_path}'
    
    return dict(
        pred_json_path=pred_json_path,
        label_path = label_path,
        video_path=video_path,
        raw_feat_path=raw_feat_path
    )

In [None]:
list_json_paths = []


label_paths  = glob('/data/DMS_Behavior_Detection/RawVideos/Action_Eating/**/*.json', recursive=True)
for label_path in label_paths:
    try:
        json_paths = get_data(label_path, 'food')
        list_json_paths.append(json_paths)
    except Exception as e:
        print(e)
        pass

label_paths  = glob('/data/DMS_Behavior_Detection/mobile_cigarret_foreignerUS/*/**/*.json', recursive=True)
print(f'{len(label_paths)=}')
for label_path in label_paths:
    try:
        json_paths = get_data(label_path, 'food')
        list_json_paths.append(json_paths)
    except Exception as e:
        print(e)
        pass
len(list_json_paths)

404 /home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0017/annotations/pred_mb2_face_food_2_raw_outputs.pkl
404 /home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hoangnh42_Sensing_Session0_CAMc_1b_2b_3a_4c_5b_6b_7b_8a_9a_10b_11b_12b_13b_14a_15b_16f_17a_18a_19a_20b_eating_0011/annotations/pred_mb2_face_food_2_raw_outputs.pkl
len(label_paths)=67


107

In [None]:
list_json_paths[0]

{'pred_json_path': '/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003/annotations/pred_mb2_face_food.json',
 'label_path': '/data/DMS_Behavior_Detection/RawVideos/Action_Eating/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003.json',
 'video_path': '/data/DMS_Behavior_Detection/RawVideos/Action_Eating/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003.mp4',
 'raw_feat_path': '/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_ea

## Read 2d video feat

In [None]:
def convert_flatten_to_2d_feature(flatten_sample):
    feat_sizes = [416//8, 416//16, 416//32]
    # reg_orig_shape = np.array(reg_orig_shape)**2
    cur_i = 0
    feats = []
    for feat_size in feat_sizes:
        a = cur_i
        b = a+feat_size**2
        cur_i = b
        feats.append(flatten_sample[a:b].reshape(feat_size, feat_size, -1))
    return feats

def read_raw_feat_one_video(path):
    data = dict(mmcv.load(path))
    # for k in data:
    #     data[k] = convert_flatten_to_2d_feature(data[k])
    return data
    # list_json_paths[0]['raw_feat_path']

In [None]:
# 

In [None]:
def anns2tensor(img, anns):
    h, w = img['height'], img['width']
    tensor = []
    for ann in anns:
        x,y,w,h = ann['bbox']
        x /=img['width']
        w /=img['width']
        y /=img['height']
        h /=img['height']
        s = ann['score']
        cat = ann['category_id']
        tensor.append([x,y,w,h,s, cat])
    return np.array(tensor)

In [None]:
def collect_data(inp):
    index, json_paths = inp
    v = mmcv.VideoReader(json_paths['video_path'])
    # t = len(v)
    label = Label(mmcv.load(json_paths['label_path']), v)
    cc = CocoDataset(json_paths['pred_json_path'])
    data = []
    raw_feat2d = read_raw_feat_one_video(json_paths['raw_feat_path'])
    for i, frame in enumerate(v):
        actions = label.check_action_at_frame_idx(i)
        anns = cc.gt.imgToAnns[i]
        img = cc.gt.imgs[i]
        tensor = None#anns2tensor(img, anns)
        img_path = osp.join(cc.img_dir, img['file_name'])
        
        data.append((tensor, actions[0] if len(actions) else 'none' , 
                     index, img_path, img['id'], raw_feat2d[img['id']]))
        
    return data
data = multi_thread(collect_data, list(enumerate(list_json_paths)))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 107/107 [01:41<00:00,  3.84it/s]2022-09-20 05:01:48.873 | INFO     | avcv.process:multi_thread:33 - multi_thread
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 107/107 [01:41<00:00,  1.05it/s]


In [None]:
all_data = []
for _ in data: all_data += _

df = pd.DataFrame(all_data, columns=['tensor', 'action', 'video_index', 'img_path', 'img_id', 'feat1d'])
ids = df[df['action'] == 'smocking'].index
df.loc[ids, 'action'] = 'smoking'

In [None]:
# exp = get_exp_by_file('exps/dms/mb2_face_food.py')
# model = exp.get_model()
# st = torch.load('./YOLOX_outputs/mb2_face_food/best_ckpt.pth')['model']
# res = model.load_state_dict(st)
# print(res)
# model.requires_grad_(False).eval().cuda();

In [None]:
_id2action = dict(enumerate(df['action'].apply(str).unique().tolist()))
action2id = {v:k for k, v in _id2action.items()}
print(action2id)

def get_y(actions):
    return action2id[str(actions)]

def get_x_tensor(tensor):
    zt = np.zeros([1,6], dtype=np.float32)
    def get_tensor_cat(cat):
        if len(tensor) == 0:
            return zt
        
        _t = tensor[tensor[:,-1] ==cat]
        
        if len(_t):
            max_id = _t[:,-2].argmax()
            _t = _t[max_id][None]
        else:
            return zt
            
        return _t
    return np.concatenate([get_tensor_cat(i) for i in range(1, 7)])

{'eating': 0, 'none': 1, 'mobile usage': 2, 'smoking': 3}


In [None]:
df['x'] = df.feat1d#multi_process(get_x, df.tensor, 15)

In [None]:
df['y'] = df['action'].apply(get_y)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
labels = df.action.unique().tolist()
labels

['eating', 'none', 'mobile usage', 'smoking']

In [None]:
df_train = df[df.video_index.apply(lambda i: i % 5 != 0)]
df_val = df[df.video_index.apply(lambda i: i % 5 == 0)]
print(f'{len(df_train)=}, {len(df_val)=}')

len(df_train)=65748, len(df_val)=18277


In [None]:
def df2xy(df):
    return np.array(df.x.values.tolist()).reshape(len(df), -1), np.array(df.y.tolist())

In [None]:
xtrain, ytrain = df2xy(df_train)
xval, yval = df2xy(df_val)

In [None]:
# from sklearn.datasets import load_diabetes
# from sklearn.model_selection import cross_val_score
# from sklearn.tree import DecisionTreeClassifier
# # regressor = DecisionTreeRegressor(random_state=0)
# # cross_val_score(regressor, X_train, y_train, cv=10)

# clf = DecisionTreeClassifier(max_depth=3, random_state = 42)

# clf.fit(xtrain, ytrain)

# list_new_categories = ['cigarette', 'food/drink', 'phone', 'face', 'eye', 'mouth']

# len(df_train), len(df_val)

# list_cats = [_['name'] for _ in mmcv.load(list_json_paths[0]['pred_json_path'])['categories']]

# from sklearn import tree

# import matplotlib.pyplot as plt

# plt.figure(figsize=(30,10), facecolor ='k')
# feature_names = []
# for i in range(6):
#     for j in range(6):
#         part = list_cats
#         name = ['x', 'y', 'w', 'h', 'score', 'category'][j]
#         feature_names.append('{}-{}'.format(part, name))

# a = tree.plot_tree(clf,

#                    feature_names = feature_names,

#                    class_names = labels,

#                    rounded = True,

#                    filled = False,

#                    fontsize=14
#                   )
# plt.show()
# test_pred_decision_tree = clf.predict(xval)

In [None]:
df.groupby('action').size()

action
eating           5358
mobile usage    22372
none            50551
smoking          5744
dtype: int64

In [None]:
# from sklearn import metrics

# import seaborn as sns

# import matplotlib.pyplot as plt

# confusion_matrix = metrics.confusion_matrix(yval,  

#                                             test_pred_decision_tree)

# matrix_df = pd.DataFrame(confusion_matrix)

# ax = plt.axes()

# sns.set(font_scale=1.3)

# plt.figure(figsize=(10,7))

# sns.heatmap(matrix_df, annot=True, fmt="g", ax=ax, cmap="magma")

# ax.set_title('Confusion Matrix - Decision Tree')

# ax.set_xlabel("Predicted label", fontsize =15)

# ax.set_xticklabels(labels)

# ax.set_ylabel("True Label", fontsize=15)

# ax.set_yticklabels(list(labels), rotation = 0)

# plt.show()

# Simple classifier

In [None]:
from ple.all import *

## SimpleCLS

In [None]:
import torch.nn as nn, torch
class SimpleCLS(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(36, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(128, 4)
        )
    def forward(self, x):
        x = self.layers(x)
        # x = self.head(x)
        return x

In [None]:
def get_fuse_conv(s, out_channel=16):
    l = nn.Sequential(
        # nn.MaxPool2d(s),
        nn.Conv2d(11, out_channel, kernel_size=(s,s),stride=(s,s))
        # nn.BatchNorm2d(out_channel),
        # nn.ReLU(),
    )
    return l
import torch.nn as nn, torch
m 


In [None]:
# Image.open(df.iloc[0].img_path)

In [None]:
_df = df.sample(100)
xraw = x = torch.from_numpy(np.array(_df.feat1d.values.tolist())).cpu().float()


# x1 = nn.functional.max_pool2d(x1, 4)
# x2 = nn.functional.interpolate(x2, (52, 52))
# x3 = nn.functional.interpolate(x3, (52, 52))
# # x.shape
# # fuse = m(x)
# # fuse.shape

In [None]:
# SimpleCLS2D()(x).shape

[{'id': 1, 'name': 'cigarette'},
 {'id': 2, 'name': 'food/drink'},
 {'id': 3, 'name': 'phone'},
 {'id': 4, 'name': 'face'},
 {'id': 5, 'name': 'eye'},
 {'id': 6, 'name': 'mouth'}]

In [None]:
# x3[0, 5+0].max()

# i=6
# Image.open(_df.iloc[i].img_path)

# lcats = [{'id': 1, 'name': 'cigarette'}, {'id': 2, 'name': 'food/drink'}, {'id': 3, 'name': 'phone'}, {'id': 4, 'name': 'face'}, {'id': 5, 'name': 'eye'}, {'id': 6, 'name': 'mouth'}]

# for j in range(6):
#     _x = x_fuse[i, j]
#     print(lcats[j]['name'], _x.max().item(), _x.shape)
#     plt.imshow(_x)
#     plt.show()

## MyLit

In [None]:
class MyLit(LitModel):
    pass
#     def training_step(self, b, i):
        
#         x, y = b
#         y = y.reshape(-1, 1).float()
#         p = self(x)
#         with torch.no_grad():
#             acc = ((p.sigmoid()>0.5).float()==y.float()).float().mean()
#         loss = nn.functional.binary_cross_entropy_with_logits(p.reshape_as(y), y.float())
#         self.log('train_acc', acc, prog_bar=True, on_step=True, on_epoch=True)
#         return loss
    
#     def validation_step(self, b, i):
#         x, y = b
#         y = y.reshape(-1, 1).float()
#         p = self(x)
#         # loss = nn.functional.binary_cross_entropy_with_logits(p.reshape_as(y), y.float())
#         # acc = (p.sigmoid()>0.5).float().mean()
#         # acc = ((p.sigmoid()>0.5).float()==y.float()).float().mean()
#         self.log('val_loss', loss, prog_bar=True, on_epoch=True)
#         self.log('val_acc', acc, prog_bar=True, on_epoch=True)
#         return loss

## PLData

In [None]:
from ple.all import *
import torch.utils.data as td
import pytorch_lightning as pl
from fastcore.all import *

def convert_feat18_to_feat15(x):
    return x[:,[i for i in range(18) if not i%6==5]].copy()

class DS:
    def __init__(self, x,y):
        # x = convert_feat18_to_feat15(x)
        self.x, self.y = x,y
        
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx].astype(np.float32).reshape(-1, 11), self.y[idx].astype(np.int64)

class PLData(pl.LightningDataModule):
    def __init__(self, **kwargs):
        super().__init__()
        store_attr(**kwargs)

    def train_dataloader(self):
        dataset = DS(xtrain, ytrain)
        return td.DataLoader(dataset, self.batch_size, num_workers=self.num_workers, drop_last=True, shuffle=True)

    def val_dataloader(self):
        dataset = DS(xval, yval)
        return td.DataLoader(dataset, self.batch_size, num_workers=self.num_workers,drop_last=True)

In [None]:
ds = DS(xtrain, ytrain)

In [None]:
type(model)

yolox.models.yolox.YOLOX

tensor([0.0484, 0.4297, 0.2828, 0.2391])

In [None]:
from tools.demo import Predictor
predictor = Predictor(model, exp)
@patch
def img_preproc(self:Predictor, img):
    img_info = {"id": 0}
    if isinstance(img, str):
        img_info["file_name"] = os.path.basename(img)
        img = cv2.imread(img)
    else:
        img_info["file_name"] = None

    height, width = img.shape[:2]
    img_info["height"] = height
    img_info["width"] = width
    img_info["raw_img"] = img

    ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
    img_info["ratio"] = ratio

    img, _ = self.preproc(img, None, self.test_size)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.float()
    if self.device == "gpu":
        img = img.cuda()
        if self.fp16:
            img = img.half()  # to FP16

    return img, img_info

torch.Size([1, 3, 416, 416])

In [None]:
img, img_info = predictor.img_preproc(df.iloc[0].img_path)

'/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003/images/000001.jpg'

torch.Size([1, 3549, 11])

## TriStageExp

In [None]:
class TriStageExp(BaseExp):

    def __init__(self, exp_name='EXPNAME', 
                 batch_size=64, 
                 num_workers=2, 
                 devices=2,
                 strategy='dp', 
                 **kwargs):
        super().__init__()
        store_attr(**kwargs)

    def get_model(self):
        dl = self.get_data_loader().train_dataloader()
        sched = fn_schedule_cosine_with_warmpup_decay_timm(
            num_epochs=self.max_epochs,
            num_steps_per_epoch=len(dl)//self.devices,
            num_epochs_per_cycle=self.max_epochs//self.num_lr_cycles,
            min_lr=1/100,
            cycle_decay=0.7,
        )
        optim = lambda params:torch.optim.Adam(params)

        return MyLit(self.model, create_optimizer_fn=optim,
                                   create_lr_scheduler_fn=sched, loss_fn=FocalLoss())

    def get_data_loader(self):
        return PLData(batch_size=self.batch_size, num_workers=self.num_workers)

    def get_trainer(self, **kwargs):
        from ple.trainer import get_trainer
        return get_trainer(self.exp_name, 
                              max_epochs=self.max_epochs, 
                              gpus=self.devices,
                           strategy=self.strategy,
                           **kwargs,

                          )
exp = TriStageExp(exp_name='simple_nn', batch_size=256, devices=1, model=SimpleCLS2D(), max_epochs=30)
# print(exp)

In [None]:
lit_model = exp.get_model()


2022-09-20 06:48:55.766 | INFO     | ple.lit_model:fn_schedule_cosine_with_warmpup_decay_timm:66 - num_cycles=3
2022-09-20 06:48:55.769 | INFO     | ple.trainer:get_trainer:34 - Log root dir: lightning_logs/simple_nn/40
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer = exp.get_trainer()
trainer.fit(lit_model, exp.get_data_loader());

Missing logger folder: lightning_logs/simple_nn/39/tb_logs/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name    | Type        | Params
----------------------------------------
0 | model   | SimpleCLS2D | 294 K 
1 | loss_fn | FocalLoss   | 0     
----------------------------------------
294 K     Trainable params
0         Non-trainable params
294 K     Total params
1.176     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
lit_model.state_dict().keys()

odict_keys(['model.layers.0.weight', 'model.layers.0.bias', 'model.layers.1.weight', 'model.layers.1.bias', 'model.layers.1.running_mean', 'model.layers.1.running_var', 'model.layers.1.num_batches_tracked', 'model.layers.3.weight', 'model.layers.3.bias', 'model.layers.4.weight', 'model.layers.4.bias', 'model.layers.4.running_mean', 'model.layers.4.running_var', 'model.layers.4.num_batches_tracked', 'model.layers.7.weight', 'model.layers.7.bias'])

In [None]:
lit_model = exp.get_model();

lit_model.load_from_checkpoint('lightning_logs/simple_nn/39/ckpts/epoch=7-val_acc=0.73.ckpt', model=lit_model.model)
lit_model = lit_model.cuda().requires_grad_(False).eval()

2022-09-20 06:49:04.058 | INFO     | ple.lit_model:fn_schedule_cosine_with_warmpup_decay_timm:66 - num_cycles=3


In [None]:
from dms_drowsiness.video_writer import Board

In [None]:
# lit_model(feat)

In [None]:
val_ids = df_train.video_index.unique().tolist()
val_json_paths = [list_json_paths[i] for i in val_ids]
# json_paths = list_json_paths[val_ids[-4]]
# json_paths = [j for j in val_json_paths if '' in get_name(j['label_path'])][0]
# val_json_paths
json_paths = np.random.choice(val_json_paths)
json_paths

{'pred_json_path': '/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hamid_smoking_0158/annotations/pred_mb2_face_food.json',
 'label_path': '/data/DMS_Behavior_Detection/mobile_cigarret_foreignerUS/training/hamid/smoking/hamid_smoking_0158.json',
 'video_path': '/data/DMS_Behavior_Detection/mobile_cigarret_foreignerUS/training/hamid/smoking/hamid_smoking_0158.mp4',
 'raw_feat_path': '/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hamid_smoking_0158/annotations/pred_mb2_face_food_2_raw_outputs.pkl'}

Unsupported operator aten::add encountered 10 time(s)
Unsupported operator aten::sigmoid encountered 6 time(s)
The following submodules of the model were never called during the trace of the graph. They may be unused, or they were accessed by direct calls to .forward() or via other python methods. In the latter case they will have zeros for statistics, though their statistics will still contribute to their parent calling module.
head.bcewithlog_loss, head.iou_loss, head.l1_loss


DAT CODE- FLOP: 0.2902592


2022-09-20 07:06:14.019 | INFO     | __main__:get_model:20 - loading checkpoint done.


input size: torch.Size([1, 416, 416, 3])


In [None]:
# exp = get_exp_by_file('exps/dms/mb2_face_food.py')
# model = exp.get_model()
# st = torch.load('YOLOX_outputs/mb2_face_food/best_ckpt.pth')['model']
# model.load_state_dict(st)
# model.eval().requires_grad_(False);
# mb2_yolox = model.cpu()

class ModelWrapper(nn.Module):
    def __init__(self):
        super().__init__()

        self.mb2_yolox = mb2_yolox
        self.cls = lit_model.model.eval().cpu()
    def forward(self, img):
        x = self.mb2_yolox(img)
        x = self.cls(x)
        return x
    
model_wraper = ModelWrapper().cpu()

In [None]:
torch.onnx.export(model_wraper,
                    torch.randn(1, 1, 416, 416),
                    'yolox_mb2_classifer_4_softmax',
                    export_params=True,
                    opset_version=11,
                    # do_constant_folding=True,
                    input_names = ['input'],
                    output_names = ['output'])

'/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003/images/000001.jpg'

In [None]:
input = predictor.img_preproc('/home/anhvth8/gitprojects/YOLOX/.cache/raw_video_predict_face_food/hungng_Sensing_Session0_CAMc_1b_2c_3a_4c_5b_6b_7b_8a_9a_10a_11b_12a_13a_14b_15b_16f_17a_18a_19a_20b_eating_0003/images/000001.jpg')[0]

In [None]:
model_wraper.eval();

In [None]:
model_wraper(input[:,:1]).squeeze().softmax(0).max(0)

torch.return_types.max(
values=tensor(0.9986, grad_fn=<MaxBackward0>),
indices=tensor(0))

In [None]:
# img = cc.gt.imgs[0] 
# img_path = osp.join(cc.img_dir, img['file_name'])
# inp = predictor.img_preproc(img_path)[0].cuda()
# model_wraper(inp)
# img_path = osp.join(cc.img_dir, img['file_name'])

In [None]:
model_wraper(inp).max(0)

torch.return_types.max(
values=tensor(0.9235, device='cuda:0'),
indices=tensor(3, device='cuda:0'))

In [None]:
from IPython.display import clear_output
cc = CocoDataset(json_paths['pred_json_path'])
label = Label(mmcv.load(json_paths['label_path']), mmcv.VideoReader(json_paths['video_path']))
# lit_model.cpu()
model_wraper.cuda()
vis_list = []
raw_feat = read_raw_feat_one_video(json_paths['raw_feat_path'])
def fv(img_id):
    board = Board(num_lines=4, line_w=500)
    frame = cc.visualize(img_id, score_thr=0.05);
    # anns = cc.gt.imgToAnns[img_id]
    img = cc.gt.imgs[img_id]
    # feat = get_x(anns2tensor(img, anns)).flatten()[None]
    # feat = raw_feat[img_id]
    # feat = torch.from_numpy(feat).cpu().float().reshape([1,-1, 11])
    # score, pred_cls = lit_model(feat).softmax(1).max(1)
    img_path = osp.join(cc.img_dir, img['file_name'])
    inp = predictor.img_preproc(img_path)[0].cuda()
    score, pred_cls = model_wraper(inp).max(0)
    action = _id2action[pred_cls.item()]
    if action != 'none':
        board.set_line_text(1, action, score.item())
    lbl = label.check_action_at_frame_idx(img_id)
    if len(lbl):
        board.set_line_text(2, f'Label: {lbl[0]}')

    vis = board.img_concat(frame)
    return vis
vis_list = multi_thread(fv, cc.img_ids, 1)
clear_output()
images_to_video(vis_list, 'vis.mp4', output_size=(800, 300))


  0%|                                                                                                                                                                                                                    | 0/917 [00:00<?, ?it/s][A

ValueError: only one element tensors can be converted to Python scalars

In [None]:
import torch

In [None]:
torch.