In [1]:
!./setup.sh

Looking in indexes: http://mirrors.tencentyun.com/pypi/simple
Collecting albumentations
  Downloading http://mirrors.tencentyun.com/pypi/packages/b0/be/3db3cd8af771988748f69eace42047d5edebf01eaa7e1293f3b3f75f989e/albumentations-1.0.0-py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 7.2 MB/s eta 0:00:011
[?25hCollecting scikit-image>=0.16.1
  Downloading http://mirrors.tencentyun.com/pypi/packages/0e/ba/53e1bfbdfd0f94514d71502e3acea494a8b4b57c457adbc333ef386485da/scikit_image-0.17.2-cp36-cp36m-manylinux1_x86_64.whl (12.4 MB)
[K     |████████████████████████████████| 12.4 MB 38.6 MB/s eta 0:00:01
Collecting opencv-python-headless>=4.1.1
  Downloading http://mirrors.tencentyun.com/pypi/packages/cc/09/3ed889f37b1bb1dff85f10d91b1f9e8b8a812a7e8413c4e906c21aab9469/opencv_python_headless-4.5.2.52-cp36-cp36m-manylinux2014_x86_64.whl (38.2 MB)
[K     |████████████████████████████████| 38.2 MB 17.2 MB/s eta 0:00:01
Collecting networkx>=2.0
  Downloading http://mirrors.

In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms as T
import torchvision.models as models
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
A_transform = A.Compose([
            A.Resize(1024,1024),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],  # mean on ImageNet
                std=[0.229, 0.224, 0.225],  # std on ImageNet
            ),
            ToTensorV2(),
        ])

In [2]:
import cv2
import sys
import numpy as np
import torch
import torch.nn as nn
def frame_iterator_list(filename, every_ms=1000, max_num_frames=300):
    video_capture = cv2.VideoCapture()
    if not video_capture.open(filename):
        print(sys.stderr, 'Error: Cannot open video file ' + filename)
        return
    last_ts = -99999  # The timestamp of last retrieved frame.
    num_retrieved = 0

    frame_all = []
    while num_retrieved < max_num_frames:
        # Skip frames
        while video_capture.get(cv2.CAP_PROP_POS_MSEC) < every_ms + last_ts:
            if not video_capture.read()[0]:
                return frame_all

        last_ts = video_capture.get(cv2.CAP_PROP_POS_MSEC)
        has_frames, frame = video_capture.read()
        if not has_frames:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = frame[:, :, ::-1]
        augmented = A_transform(image = frame)
        frame = augmented['image'].unsqueeze(0)
        frame_all.append(frame)
        num_retrieved += 1

    return frame_all # [60,[3,1024,1024]]

In [3]:
import torch
import os
def extract_features_from_videos(file_name,save_path,oom_files,save_flag=False):
    video_id = file_name[:-4]
    save_name = save_path+video_id+'.npy'
    if(os.path.exists(save_name) or file_name in oom_files):
        return 0
    frame_list = frame_iterator_list(video_path+file_name,every_ms=1000)
    
    # 为了不爆显存，将frame_list 分发多次
    frame_list_A = frame_list[:len(frame_list)//3]
    frame_list_B = frame_list[len(frame_list)//3:(len(frame_list)//3)*2]
    frame_list_C = frame_list[(len(frame_list)//3)*2:]
    video_feature_A = []
    video_feature_B = []
    video_feature_C = []
    frame_batch_A = torch.cat(frame_list_A,dim=0)
    frame_batch_B = torch.cat(frame_list_B,dim=0)
    frame_batch_C = torch.cat(frame_list_C,dim=0)

    frame_batch_A = frame_batch_A.cuda()
    pred_A = model(frame_batch_A)
    video_feature_A = pred_A.cpu().detach().numpy()
    torch.cuda.empty_cache()

    frame_batch_B = frame_batch_B.cuda()
    pred_B = model(frame_batch_B)
    video_feature_B = pred_B.cpu().detach().numpy()
    torch.cuda.empty_cache()

    frame_batch_C = frame_batch_C.cuda()
    pred_C = model(frame_batch_C)
    video_feature_C = pred_C.cpu().detach().numpy()
    torch.cuda.empty_cache()

    # print(pred.shape)
    video_feature = np.concatenate((video_feature_A,video_feature_B,video_feature_C),axis=0)
    if(save_flag==True):
        np.save(save_name,video_feature)
        return 1
    return 2

In [2]:
import timm
import torch.nn as nn
class CustomResNext(nn.Module):
    def __init__(self):
        super(CustomResNext, self).__init__()
        self.model = timm.create_model('resnext50_32x4d',pretrained=True)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Identity()
    def forward(self,x):
        x = self.model(x)
        return x
class CustomEfficientNet(nn.Module):
    def __init__(self):
        super(CustomEfficientNet, self).__init__()
        self.model = timm.create_model('efficientnet_b4',pretrained=True) # 1792
        self.model.classifier = nn.Identity()
    def forward(self,x):
        x = self.model(x)
        return x
model = CustomEfficientNet()
model.eval()
model.to('cuda')

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b4_ra2_320-7eb33cd5.pth" to /home/tione/.cache/torch/checkpoints/efficientnet_b4_ra2_320-7eb33cd5.pth


URLError: <urlopen error EOF occurred in violation of protocol (_ssl.c:833)>

In [5]:
from tqdm import tqdm
import torch
import os
import cv2

video_path = '../dataset/videos/video_5k/train_5k/'
file_names = os.listdir(video_path)
save_path = '../dataset/eff_frames/train_5k/'

torch.cuda.empty_cache()
oom_files = []
with torch.no_grad():
    for file_name in tqdm(file_names):
        try:
            extract_features_from_videos(file_name,save_path,oom_files,save_flag=True)
        except RuntimeError:
            oom_files.append(file_name)
            print(file_name)
            torch.cuda.empty_cache()
        else:
            continue
            


100%|██████████| 5000/5000 [02:50<00:00, 29.30it/s]


In [6]:
from tqdm import tqdm
import torch
import os
import cv2

video_path = '../dataset/videos/video_5k/test_5k/'
file_names = os.listdir(video_path)
save_path = '../dataset/eff_frames/test_5k/'

torch.cuda.empty_cache()
oom_files = []
with torch.no_grad():
    for file_name in tqdm(file_names):
        try:
            extract_features_from_videos(file_name,save_path,oom_files,save_flag=True)
        except RuntimeError:
            oom_files.append(file_name)
            print(file_name)
            torch.cuda.empty_cache()
        else:
            continue
            

100%|██████████| 5000/5000 [02:05<00:00, 39.94it/s] 


In [1]:
# 制作相应的路径文件txt
import linecache
import os
file_name = '../dataset/tagging/GroundTruth/datafile/val_resnet.txt'
train_good = '/home/tione/notebook/dataset/tagging/GroundTruth/datafile/train_good.txt'
val_good = '/home/tione/notebook/dataset/tagging/GroundTruth/datafile/val_good.txt'
data_num_per_sample  = 6

with open(file_name,'w') as f:
    for index in range(500):
        data_list = []
        for line_i in range(data_num_per_sample*index+1,data_num_per_sample*(index+1)):
            line = linecache.getline(val_good,line_i)
            line = line.strip('\r\n')
            data_list.append(line)
        video_file_name = os.path.basename(data_list[0])
        data_list[0] = '../dataset/frames/train_5k/'+video_file_name
        
        for i in range(len(data_list)):
            f.write(data_list[i]+'\r\n')
        f.write('\r\n')

In [4]:
# 制作相应的路径文件txt
import linecache
import os
file_name = '../dataset/tagging/GroundTruth/datafile/self_sup_VAT_R.txt'
selfsup_file = './self_sup_VAT.txt'
data_num_per_sample  = 4

with open(file_name,'w') as f:
    for index in range(10000):
        data_list = []
        for line_i in range(data_num_per_sample*index+1,data_num_per_sample*(index+1)):
            line = linecache.getline(selfsup_file,line_i)
            line = line.strip('\r\n')
            data_list.append(line)
        video_file_name = os.path.basename(data_list[0])
        if(data_list[0].split('/')[3]=='tagging_dataset_train_5k'):
            data_list[0] = '../dataset/frames/train_5k/'+video_file_name
        else:
            data_list[0] = '../dataset/frames/test_5k/'+video_file_name
        for i in range(len(data_list)):
            f.write(data_list[i]+'\r\n')
        f.write('\r\n')

In [11]:
count = len(open(selfsup_file,'rU').readlines())

  if __name__ == '__main__':


In [12]:
count

36315

In [1]:
from dataloader.dataloader import TestingDataset,MultimodaFeaturesDataset
import yaml
import linecache
config_path = './config/config.yaml'
config = yaml.load(open(config_path))
dataset = TestingDataset(config['DatasetConfig'])
train_full_path = '/home/tione/notebook/dataset/tagging/GroundTruth/datafile/train_full.txt'
train_test = './self_sup_VAT.txt'
with open(train_test,'w') as f:
    for index in range(5000):
        data_list = []
        for line_i in range(6*index+1,6*(index+1)):
            line = linecache.getline(train_full_path,line_i)
            line = line.strip('\r\n')
            data_list.append(line)
        video_path = data_list[0]+'\r\n'
        audio_path = data_list[1]+'\r\n'
        text_path = data_list[3]+'\r\n'
        f.write(video_path)
        f.write(audio_path)
        f.write(text_path)
        f.write('\r\n')
with open(train_test,'a') as f:
    for index in range(5000):
        feat_dict = dataset[index]
        video_path = feat_dict['video_path']+'\r\n'
        audio_path = feat_dict['audio_path']+'\r\n'
        text_path = feat_dict['text_path']+'\r\n'
        f.write(video_path)
        f.write(audio_path)
        f.write(text_path)
        f.write('\r\n')



In [2]:
count = len(open('./self_sup_VAT.txt','rU').readlines())

  if __name__ == '__main__':


In [3]:
count

40000