# Import ness. lib

In [79]:
%pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\jiray\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [80]:
import cv2
import os
import matplotlib.pyplot as plt
import re
import numpy as np
from tqdm import tqdm
import pickle
import datetime as dt
import time


import pandas as pd

# Init constants

In [81]:
IMAGE_POOL_PATH = 'F:/image_pool' # path to store images that come from extract image from vdo
DICT_PATH = 'F:/dict' # path to store dictionary from analyze image function

# Create extract function

In [82]:

def fetch_and_show_images(clip_path, 
                          save_dir, 
                          capture_interval=20,
                          replace=False,
                          verbose=True
                          ):
    """
    Fetch images from a clip, save as individual files, then visualize using subplots.
    
    Parameters:
    - clip_path (str): Path to the video clip.
    - save_dir (str): Folder to save the fetched images.
    - capture_interval (int): Span to fetch images. Default is 20 seconds.
    - replace (bool): If True, replace existing images with the same name. Default is False.
    - verbose (bool): If True, print out the process. Default is True.
    """
    clip_name = clip_path.split('/')[-1].split('.')[0]

    # Verify if save directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    # Initialize video reader
    clip_reader = cv2.VideoCapture(clip_path)
    frame_total = int(clip_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(clip_reader.get(cv2.CAP_PROP_FPS))
    duration = frame_total // fps

    if verbose:
        print(f"Extracting {clip_name}")
        print(f"Frames available in the clip: {frame_total} ({duration} sec.)")
        print(f"Fetching every {capture_interval} second or {fps * capture_interval} frames")
    capture_interval *= fps

    current_frame = 0
    fetched_count = 0

    while True:
        status, image = clip_reader.read()

        # If reading frame fails, exit the loop
        if not status:
            break
        
        # Switch from BGR (OpenCV default) to RGB for visualization with matplotlib
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if current_frame % capture_interval == 0:
            image_filename = f"{clip_name}-{int(current_frame/fps)}.png"
            image_filepath = save_dir + '/' + image_filename
            
            if not os.path.exists(image_filepath) or replace:
                cv2.imwrite(image_filepath, image)
                if verbose:
                    print(f"Fetched frame {current_frame}({current_frame / fps}) as {image_filename}")
            elif verbose:
                print(f"Image {image_filename} already exists, skipping...")
            fetched_count += 1

        current_frame += 1

    clip_reader.release()

    if verbose:
        print(f"Process complete! Fetched {fetched_count} frames from the clip.")



# Extract info from image

## Extract image form VDO to image pool

In [83]:
fetch_and_show_images(clip_path='E:/indiv_vdo/2021_0607_184742_013.MOV',
                      save_dir=IMAGE_POOL_PATH,
                      capture_interval=10,
                      replace=False,
                      verbose=True
                    )

Extracting 2021_0607_184742_013
Frames available in the clip: 79094 (2636 sec.)
Fetching every 10 second or 300 frames
Image 2021_0607_184742_013-0.png already exists, skipping...


Image 2021_0607_184742_013-10.png already exists, skipping...
Image 2021_0607_184742_013-20.png already exists, skipping...
Image 2021_0607_184742_013-30.png already exists, skipping...
Image 2021_0607_184742_013-40.png already exists, skipping...
Image 2021_0607_184742_013-50.png already exists, skipping...
Image 2021_0607_184742_013-60.png already exists, skipping...
Image 2021_0607_184742_013-70.png already exists, skipping...
Image 2021_0607_184742_013-80.png already exists, skipping...
Image 2021_0607_184742_013-90.png already exists, skipping...
Image 2021_0607_184742_013-100.png already exists, skipping...
Image 2021_0607_184742_013-110.png already exists, skipping...
Image 2021_0607_184742_013-120.png already exists, skipping...
Image 2021_0607_184742_013-130.png already exists, skipping...
Image 2021_0607_184742_013-140.png already exists, skipping...
Image 2021_0607_184742_013-150.png already exists, skipping...
Image 2021_0607_184742_013-160.png already exists, skipping...
I

## Extract info from image pool
1. date and time each image from image pool
1. average intensity each image from image pool

In [87]:
# create directory if it does not exist
if not os.path.exists(DICT_PATH):
    os.makedirs(DICT_PATH)

# create empty dictionary if file does not exist
if not os.path.exists(DICT_PATH + '/info_dict.pkl'):
    with open(DICT_PATH + '/info_dict.pkl', 'wb') as f:
        pickle.dump({}, f)

# read info dict
with open(DICT_PATH + '/info_dict.pkl', 'rb') as f:
    info_dict = pickle.load(f)


In [88]:
def get_datetime(img):
    '''
    Get datetime from image name
    on assumtion that file name is on format: 'yyyy_mmdd_hhmmss_fff-t.png'
    which
        yyyy_mmdd_hhmmss_fff is started record datetime
        t is time from started record datetime (sec.)
    '''
    
    # remove file extension
    img = img.split('.')[0]

    # get datetime from file name
    start_record_datetime = img.split('-')[0]
    start_record_datetime = dt.datetime.strptime(start_record_datetime, '%Y_%m%d_%H%M%S_%f')

    # get time from started record datetime
    time_from_start_record = img.split('-')[1]
    time_from_start_record = dt.timedelta(seconds=int(time_from_start_record))

    # get datetime from image
    img_datetime = start_record_datetime + time_from_start_record

    return img_datetime


In [98]:
looper = tqdm(image_name_list, unit='image')
for image_name in looper:
    looper.set_description(f'Processing {image_name}')

    if img not in info_dict:
        info_dict[img] = dict()
    
    ## get datetime from image
    img_datetime = get_datetime(img)
    
    info_dict[img]['datetime'] = img_datetime

    ## get avg intensity
    gray_image = cv2.imread(IMAGE_POOL_PATH + '/' + image_name, cv2.IMREAD_GRAYSCALE)
    avg_intensity = np.mean(gray_image)
    
    info_dict[image_name]['avg_intensity'] = avg_intensity

    ## get original vdo name
    info_dict[image_name]['vdo_name'] = image_name.split('-')[0] + '.MOV'

# save info_dict
with open(DICT_PATH + '/info_dict.pkl', 'wb') as f:
    pickle.dump(info_dict, f)

Processing 2021_0607_184742_013-2630.png: 100%|██████████| 264/264 [00:05<00:00, 44.24image/s]


In [97]:
info_dict['2021_0607_184742_013-0.png']

{'datetime': datetime.datetime(2021, 6, 7, 18, 47, 42, 13000),
 'avg_intensity': 58.359539930555556,
 'vdo_name': '2021_0607_184742_013.MOV'}