In [1]:
import torch
print(torch.__version__)
import torchvision
print(torchvision.__version__)
import numpy as np
import transform as T
def normalize(tensor):
    # Subtract the mean, and scale to the interval [-1,1]
    tensor_minusmean = tensor - tensor.mean()
    return tensor_minusmean/tensor_minusmean.abs().max()
transform_video = torchvision.transforms.Compose([
        T.ToFloatTensorInZeroOne(),
        T.Resize((128, 171)),
        T.RandomHorizontalFlip(),
        normalize,
        T.RandomCrop((112, 112))
    ])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


import xml.etree.ElementTree as ET
root = ET.parse('/root/yangsen-data/LIRIS-ACCEDE-movies/ACCEDEmovies.xml').getroot()
movie_length = {}
def get_sec(time_str: str) -> int:
    """Get Seconds from time."""
    h, m, s = time_str.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)
for i in root:
    name = i.find('movie').text
    length =get_sec(i.find('length').text)
    movie_length[name] = length

fpsMovie = [['After_The_Rain',23.976]]
# fpsMovie = [['After_The_Rain',23.976],
#     ['Attitude_Matters',29.97],
#     ['Barely_legal_stories',23.976],
#     ['Between_Viewings',25],
#     ['Big_Buck_Bunny',24],
#     ['Chatter',24],
#     ['Cloudland',25],
#     ['Damaged_Kung_Fu',25],
#     ['Decay',23.976],
#     ['Elephant_s_Dream',24],
#     ['First_Bite',25],
#     ['Full_Service',29.97],
#     ['Islands',23.976],
#     ['Lesson_Learned',29.97],
#     ['Norm',25],
#     ['Nuclear_Family',23.976],
#     ['On_time',30],
#     ['Origami',24],
#     ['Parafundit',24],
#     ['Payload',25],
#     ['Riding_The_Rails',23.976],
#     ['Sintel',24],
#     ['Spaceman',23.976],
#     ['Superhero',29.97],
#     ['Tears_of_Steel',24],
#     ['The_room_of_franz_kafka',29.786],
#     ['The_secret_number',23.976],
#     ['To_Claire_From_Sonny',23.976],
#     ['Wanted',25],
#     ['You_Again',29.97]]

import pandas as pd
import numpy as np
import torchaudio
print(torchaudio.__version__)
path_prefix = '/root/yangsen-data/LIRIS-ACCEDE-movies/movies/'
class MOVIE(torch.utils.data.Dataset):
    def __init__(self, name: str):
        super(MOVIE).__init__()
        self.name = name
        self.max_length = 200
        self.max_audio_length = 400000
        self.mfcc_transformer = torchaudio.transforms.MFCC()
        self.seconds_length = movie_length[name]
    def __len__(self):
        return self.seconds_length // 8
    def __getitem__(self, idx: int):
        data = torchvision.io.read_video(path_prefix + name, start_pts=idx * 8, end_pts=idx * 8 + 8, pts_unit='sec')
        transformed_video = transform_video(data[0])
        if transformed_video.shape[1] > self.max_length:
            transformed_video = transformed_video[:,:self.max_length,:,:]
        elif self.max_length > transformed_video.shape[1]:
            last_frame = transformed_video[:,-1:,:,:]
            frames = last_frame.repeat(1,self.max_length-transformed_video.shape[1],1,1)
            transformed_video = torch.cat([transformed_video,frames],1)
#         print(transformed_video.shape)
            
        return {'name': self.name,
                'start': idx*8,
                'video': transformed_video}


video_model = torchvision.models.video.r3d_18(pretrained=False, progress=True)
num_ftrs = video_model.fc.in_features
video_model.fc = torch.nn.Linear(num_ftrs, 2)
video_model.load_state_dict(torch.load('video_model.model'))
video_model.eval()
video_model = video_model.to(device)

VA = {}
for i in fpsMovie:
    name = i[0]+'.mp4'
    dataset = MOVIE(name)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
    line = torch.tensor([]).to(device)
    for i, data in enumerate(dataloader, 0):
        inputs = data['video']
        inputs = inputs.to(device)
        for i in inputs.shape:
            if i == 0:
                continue
        with torch.set_grad_enabled(False):
            print (inputs.shape)
            outputs = video_model(inputs)
            line = torch.cat([line, outputs])
    VA[name] = line.cpu().numpy()

1.4.0
0.5.0
0.4.0
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([8, 3, 200, 112, 112])
torch.Size([1, 3, 200, 112, 112])
skip bad movie "After_The_Rain.mp4", which read into a empty image [3, 0, 1, 1]


In [11]:
VA_list = {}
for i in VA:
    length = VA[i].shape[0]
    valence = []
    arousal = []
    for j in range(length):
        valence.append(float(VA[i][j][0]))
        arousal.append(float(VA[i][j][1]))
    VA_list[i] = [valence, arousal]

In [13]:
print(f'answer: {VA_list}')
import json
with open('predictVA.json', 'w') as f:
    json.dump(VA_list, f)

answer: {'After_The_Rain.mp4': [[2.542762041091919, 2.546539545059204, 2.5694050788879395, 2.6589908599853516, 2.574186325073242, 2.6745223999023438, 2.6937193870544434, 2.614956855773926, 2.7494566440582275, 2.732647657394409, 2.7044167518615723, 2.756711006164551, 2.7459716796875, 2.743102550506592, 2.592294692993164, 2.6651525497436523, 2.723682403564453, 2.622361898422241, 2.6853630542755127, 2.66196346282959, 2.684624671936035, 2.7410292625427246, 2.720621109008789, 2.6948142051696777, 2.783475160598755, 2.716914653778076, 2.6863441467285156, 2.70951509475708, 2.671013832092285, 2.556947708129883, 2.597607135772705, 2.5896177291870117, 2.707080125808716, 2.7352237701416016, 2.746755599975586, 2.6125869750976562, 2.538053274154663, 2.5534443855285645, 2.7414751052856445, 2.715134620666504, 2.7519211769104004, 2.654019355773926, 2.6285557746887207, 2.758132219314575, 2.620696544647217, 2.427410125732422, 2.7634353637695312, 2.7480051517486572, 2.630345344543457, 2.720594644546509, 2

In [14]:
with open('predictVA.json', 'r') as f:
    VA_load = json.load(f)

In [15]:
VA_load

{'After_The_Rain.mp4': [[2.542762041091919,
   2.546539545059204,
   2.5694050788879395,
   2.6589908599853516,
   2.574186325073242,
   2.6745223999023438,
   2.6937193870544434,
   2.614956855773926,
   2.7494566440582275,
   2.732647657394409,
   2.7044167518615723,
   2.756711006164551,
   2.7459716796875,
   2.743102550506592,
   2.592294692993164,
   2.6651525497436523,
   2.723682403564453,
   2.622361898422241,
   2.6853630542755127,
   2.66196346282959,
   2.684624671936035,
   2.7410292625427246,
   2.720621109008789,
   2.6948142051696777,
   2.783475160598755,
   2.716914653778076,
   2.6863441467285156,
   2.70951509475708,
   2.671013832092285,
   2.556947708129883,
   2.597607135772705,
   2.5896177291870117,
   2.707080125808716,
   2.7352237701416016,
   2.746755599975586,
   2.6125869750976562,
   2.538053274154663,
   2.5534443855285645,
   2.7414751052856445,
   2.715134620666504,
   2.7519211769104004,
   2.654019355773926,
   2.6285557746887207,
   2.7581322193145

In [10]:
a = json.loads(VA_load)
a

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)