In [1]:
import os, glob, time, datetime, multiprocessing
import cv2
import matplotlib.pyplot as plt

class Videos2Imgs:
    """
    inputPathL = sorted(glob.glob("/home/jovyan/data-vol-2/HAR/C10/20220810/*.mp4"))[:]
    obj = Videos2Imgs( inputPathL, "../_data/20220810" )
    obj.fetch()
    """
    def __init__(self, inputPathL, outputPath, divide=2, frameInterval=4, printInfo=True):
        self.inputPathL = inputPathL
        self.outputPath = outputPath
        self.divide     = divide
        self.frameInterval = frameInterval
        self.getOverallInfo(inputPathL) if printInfo else None
    
    def getOverallInfo(self, videoPathL):
        for i,videoPath in enumerate(videoPathL):
            cap    = cv2.VideoCapture(videoPath)
            frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
            fps    = cap.get(cv2.CAP_PROP_FPS)
            height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
            width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
            cap.release()
            print(f"{i}, {videoPath.split('/')[-1]}, frames={frames}, fps={fps}, height={height}, width={width}")
    
    def fetch(self):
        overallStart = datetime.datetime.now()
        os.makedirs(self.outputPath, exist_ok=True)
        #
        n = min(len(self.inputPathL), 32) # number of subprocesses
        q, r = len(self.inputPathL)//n, len(self.inputPathL)%n
        amortizedL = [ self.inputPathL[q*i:q*(i+1)] + ([self.inputPathL[q*n+i]] if i<r else []) for i in range(n) ]
        print(f"lanuch {len(amortizedL)} processed, with each videos: {[ len(ele) for ele in amortizedL ]}")
        pool = multiprocessing.Pool(processes=n)
        pool.map(self.fetchVideos, amortizedL)
        #
        print(f"\nOverallTime = {str(datetime.datetime.now()-overallStart)[:-7]}")
        print(f"images = { len(os.listdir(self.outputPath)) }" )
        print(f"size = {os.popen('du -sh '+ self.outputPath).read()}")
    
    def fetchVideos(self, videoPathL):
        for videoPath in videoPathL:
            cap = cv2.VideoCapture(videoPath)
            length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            for i in range(length):
                _, img = cap.read()
                if i%self.frameInterval==0:
                    prefix = os.path.basename(videoPath).split(".")[0]
                    cv2.imwrite( f"{self.outputPath}/{prefix}_" + '0'*(5-len(str(i))) + f"{i}.jpg", cv2.resize(img,(568,320)) )
            cap.release()

In [None]:
inputPathL = sorted(glob.glob("/home/jovyan/nas-dataset/HAR/C10/20220826/*.mp4"))[:]
obj = Videos2Imgs( inputPathL, "../_data/imgs/20220826/", printInfo=0 )
obj.fetch()

lanuch 32 processed, with each videos: [8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
