# 動画の一時的な保存と展開

In [1]:
from pathlib import Path
import cv2
import numpy as np

In [2]:
import h5py

In [3]:
import shutil

## HDF5への動画の保存(np.int8として) 

In [4]:
hdf5_path = Path("movie_data.hdf5")
f = h5py.File(hdf5_path, "w")
group = f.create_group("/movie")

### データセットの作成に必要なテータの取得 

In [5]:
video_path = Path("movies/mini_movie.mp4")
cap = cv2.VideoCapture(str(video_path))

In [6]:
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(w,h,frame_num)

640 480 500


In [7]:
dataset = group.create_dataset(name="one_movie",
                               shape=(frame_num, h, w, 3),
                               dtype=np.uint8
                              )

In [8]:
for i in range(frame_num):
    ret, frame = cap.read()
    if frame is not None:
        dataset[i] = frame

cap.release()

In [9]:
%timeit first_frame = dataset[1]

The slowest run took 4.15 times longer than the fastest. This could mean that an intermediate result is being cached.
1.31 ms ± 795 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
f.close()

### 既存のdatasetの参照 

In [11]:
f = h5py.File(hdf5_path, "r")

In [12]:
dataset = f["movie/one_movie"]

In [13]:
first_frame = dataset[0]
first_frame

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [14]:
f.close()

データが大きすぎるので削除

In [15]:
hdf5_path.unlink()

### 動画から取得した画像のarray 

In [91]:
class MovieImageArray:
    """
    動画をhdf5に保存することで，メモリを節約しつつ画像のリストとしてアクセス可能とした．
    ストレージを利用するので注意．
    """
    def __init__(self, movie_path, is_temp=False, temp_dir=Path("")):
        """
        movie_path: pathlib.Path or str
            読み込みたい動画のパス
        is_temp:　bool
            保存データをcloseと同時に破棄するかどうか
        temp_dir:　pathlib.Path or str
            保存データを置くディレクトリ
        """
        if movie_path is not None:
            self.movie_path = Path(movie_path)
            movie_name = self.movie_path.stem
        else:
            self.movie_path = None
            movie_name = ""
        self.is_temp = is_temp
        
        
        self.hdf5_path = Path(temp_dir)/Path("hdf5_" + movie_name+ ".hdf5")
        
    def read_movie(self, is_update=False):
        """
        動画データの読み込み
        is_update: bool
            すでに保存データが存在する場合にデータを更新するかとうか．
        """
        if self.movie_path is None:
            raise Exception("movie path is not setted")
            
        if self.hdf5_path.exists() and not is_update:  # ファイルが存在しており，アップデートしない場合
            self.f = h5py.File(self.hdf5_path, "r")
            self.dataset = self.f["/movie/movie_dataset"]
            return None
            
        cap = cv2.VideoCapture(str(self.movie_path))
        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))        
        
        self.f = h5py.File(self.hdf5_path, "w")
        group = self.f.create_group("/movie")
        self.dataset = group.create_dataset(name="movie_dataset",
                                            shape=(frame_num, h, w, 3),
                                            dtype=np.uint8
                                           )
        self.dataset.attrs["length"] = frame_num
        
        for i in range(frame_num):
            ret, frame = cap.read()
            if frame is not None:
                self.dataset[i] = frame

        cap.release()
    
    @classmethod
    def from_file(cls, hdf5_path, is_temp=False):
        """
        hdf5_path: pathlib.Path or str
            保存データのパス
        is_temp:
            保存データをcloseと同時に破棄するかどうか
        """
        instance = cls(None, is_temp)
        instance.hdf5_path = hdf5_path
        instance.f = h5py.File(instance.hdf5_path, "r")
        instance.dataset = instance.f["/movie/movie_dataset"]
        return instance
    
    def close(self):
        if hasattr(self, "f"):
            self.f.close()
        if self.is_temp:
            if self.hdf5_path.exists():
                self.hdf5_path.unlink()
                    
    def __del__(self):
        self.close()
            
    def __len__(self):
        return self.dataset.attrs["length"]
    
    def __getitem__(self, i):
        if i >= len(self):
            raise IndexError("index out of range")
        
        return self.dataset[i]
    
    def __iter__(self):
        def inner_gen():
            for i in range(len(self)):
                yield self[i]
        yield from inner_gen()
        
    @property
    def saved_path(self):
        return self.hdf5_path

In [92]:
image_array1 = MovieImageArray("movies/mini_movie.mp4", is_temp=False)

In [93]:
image_array1.read_movie(is_update=True)

In [94]:
len(image_array1)

500

In [95]:
image_array1[0]

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [96]:
%%time
for i in range(len(image_array1)):
    img = image_array1[i]
    pass

Wall time: 11.8 s


In [97]:
%%time
for image in image_array1:
    pass

Wall time: 11.4 s


In [101]:
image_array1.close()

In [98]:
image_array1_from_file = MovieImageArray.from_file(image_array1.saved_path, is_temp=True)

In [99]:
image_array1_from_file[0]

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [102]:
image_array1_from_file.close()

## 画像ファイルで保存 

In [35]:
dir_path = Path("movie_images")
if not dir_path.exists():
    dir_path.mkdir()

In [36]:
video_path = Path("movies/mini_movie.mp4")
cap = cv2.VideoCapture(str(video_path))
frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

In [37]:
ret, frame = cap.read()

In [38]:
counter = 0
image_name_base = "image"

for i in range(frame_num):
    ret, frame = cap.read()
    if frame is not None:
        image_path = dir_path / Path(image_name_base+"_{}.png".format(counter))
        cv2.imwrite(str(image_path), frame)
        counter += 1

In [39]:
cap.release()

In [40]:
%timeit cv2.imread("movie_images/image_0.png")

59.7 ms ± 27.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [41]:
shutil.rmtree(dir_path)

### 動画から取得した画像のarray(直接pngを作成)

In [4]:
class MovieImageArrayFile:
    """
    MovieImageArrayと同じインターフェースで，生画像を保存する．
    ストレージを利用するので注意．
    """
    def __init__(self, movie_path, is_temp=False, temp_dir=Path("")):
        """
        movie_path: pathlib.Path or str
            読み込みたい動画のパス
        is_temp:　bool
            保存データをcloseと同時に破棄するかどうか
        temp_dir:　pathlib.Path or str
            保存データを置くディレクトリ
        """
        if movie_path is not None:
            self.movie_path = Path(movie_path)
            movie_name = self.movie_path.stem
        else:
            self.movie_path = None
            movie_name = ""
            
        self.is_temp = is_temp
        self.dir_path = Path(temp_dir)/Path("images_"+movie_name)
        self.image_name_base = "image"
        self.image_paths = []
        
    def read_movie(self, is_update=False):
        """
        動画データの読み込み
        is_update: bool
            すでに保存データが存在する場合にデータを更新するかとうか．
        """
        if self.movie_path is None:
            raise Exception("movie path is not setted")
            
        if self.dir_path.exists() and not is_update:  # ディレクトリが存在しており，アップデートしない場合
            self._read_from_dir()
            return None
        elif self.dir_path.exists() and is_update:  # ディレクトリが存在しており，アップデートする場合
            shutil.rmtree(self.dir_path)
            self.dir_path.mkdir()
        elif not self.dir_path.exists():  # ディレクトリが存在しない場合
            self.dir_path.mkdir()

        cap = cv2.VideoCapture(str(self.movie_path))
        frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        counter = 0
        for i in range(frame_num):
            ret, frame = cap.read()
            if frame is not None:
                image_path = self.dir_path / Path(self.image_name_base+"_{}.png".format(counter))
                cv2.imwrite(str(image_path), frame)
                counter += 1        
        
        cap.release()
        self._read_from_dir()
    
    def _read_from_dir(self):
        """
        ディレクトリの読み込み
        """
        image_paths = []
        for one_image_path in self.dir_path.glob("*.png"):
            image_paths.append(one_image_path)
        self.image_paths = image_paths
    
    @classmethod
    def from_file(cls, dir_path, is_temp=False):
        """
        dir_path: pathlib.Path or str
            保存データのパス
        is_temp:
            保存データをcloseと同時に破棄するかどうか
        """
        instance = cls(None, is_temp)
        instance.dir_path = dir_path
        instance._read_from_dir()
        return instance
    
    def close(self):
        if self.is_temp:
            if self.dir_path.exists():
                shutil.rmtree(self.dir_path)
                    
    def __del__(self):
        self.close()
            
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, i):
        image_path = self.image_paths[i]
        image = cv2.imread(str(image_path))
        return image
        
    def __iter__(self):
        def inner_gen():
            for i in range(len(self)):
                yield self[i]
        yield from inner_gen()
        
    @property
    def saved_path(self):
        return self.dir_path

In [5]:
image_array2 = MovieImageArrayFile("movies/mini_movie.mp4", is_temp=False)

In [6]:
image_array2.read_movie(is_update=False)

In [106]:
image_array2[0]

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [107]:
%%time
for image in image_array2:
    pass

Wall time: 25 s


In [108]:
image_array2.close()

In [109]:
image_array2_from_file = MovieImageArrayFile.from_file(image_array2.saved_path, is_temp=True)

In [110]:
image_array2_from_file[0]

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [111]:
image_array2_from_file.close()

## 動画から取得した画像のarray(生画像) 

In [48]:
class MovieImageArrayRaw:
    """
    MovieImageArrayと同じインターフェースでメモリに展開．
    速度は早い．
    """
    def __init__(self, movie_path, *args, **kwargs):
        """
        movie_path: pathlib.Path or str
            読み込みたい動画のパス
        """
        if movie_path is not None:
            self.movie_path = Path(movie_path)
        else:
            self.movie_path = None
        self.images = []
        
    def read_movie(self, *args, **kwargs):
        """
        動画データの読み込み
        """
        if self.movie_path is None:
            raise Exception("movie path is not setted")
            
        cap = cv2.VideoCapture(str(self.movie_path))
        frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        images_list = []
        for i in range(frame_num):
            ret, frame = cap.read()
            if frame is not None:
                images_list.append(frame)
        
        cap.release()
        self.images = images_list
    
    def close(self):
        pass
    
    def __del__(self):
        self.close()
            
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, i):
        image = self.images[i]
        return image
        
    def __iter__(self):
        def inner_gen():
            for i in range(len(self)):
                yield self[i]
        yield from inner_gen()

以下，メモリ注意

In [54]:
image_array3 = MovieImageArrayRaw("movies/mini_movie.mp4")

In [55]:
image_array3.read_movie()

In [51]:
image_array3[0]

array([[[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       [[  0,   4,   3],
        [  0,   4,   3],
        [  0,   4,   3],
        ...,
        [192, 173, 181],
        [192, 173, 181],
        [192, 173, 181]],

       ...,

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]],

       [[120, 134,  82],
        [120, 134,  82],
        [120, 134,  82],
        ...,
        [  0,   3,   2],
        [  0,   2,   1],
        [  0,   2,   1]]

In [52]:
%%time
for image in image_array3:
    pass

Wall time: 2 ms


In [56]:
del image_array3