# Import ness. lib

In [1]:
%pip install opencv-python




In [1]:
import cv2
import os
import numpy as np
from tqdm import tqdm
import pickle
import datetime as dt

from indiv_image_utils import fetch_and_show_images

# Init constants

In [2]:
# CLIP_LIST = []
CLIP_LIST = [
    'E:/indiv_vdo/2021_0607_184742_013.MOV'\
    ,'E:/indiv_vdo/Top.MOV'\
]

IMAGE_POOL_PATH = 'E:/indiv_vdo/extracted/image_pool' # path to store images that come from extract image from vdo
DICT_PATH = 'E:/indiv_vdo/extracted/dict' # path to store dictionary from analyze image function

# Extract info from image

In [None]:
def fetch_and_show_images(clip_path,
                          save_dir,
                          capture_interval=20,
                          replace=False,
                          verbose=True,
                          resolution=(512, 256)
                          ):
    """
    Fetch images from a clip, save as individual files, then visualize using subplots.
    
    Parameters:
    - clip_path (str): Path to the video clip.
    - save_dir (str): Folder to save the fetched images.
    - capture_interval (int): Span to fetch images. Default is 20 seconds.
    - replace (bool): If True, replace existing images with the same name. Default is False.
    - verbose (bool): If True, print out the process. Default is True.
    """

    clip_name = clip_path.split('/')[-1].split('.')[0]

    # Verify if save directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    # Initialize video reader
    clip_reader = cv2.VideoCapture(clip_path)
    frame_total = int(clip_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(clip_reader.get(cv2.CAP_PROP_FPS))
    duration = frame_total // fps

    if verbose:
        print(f"Extracting {clip_name}")
        print(f"Frames available in the clip: {frame_total} ({duration} sec.)")
        print(f"Fetching every {capture_interval} second or {fps * capture_interval} frames")
    capture_interval *= fps

    current_frame = 0
    fetched_count = 0

    while True:
        # Read frame
        status, image = clip_reader.read()

        # If reading frame fails, exit the loop
        if not status:
            break

        if current_frame % capture_interval == 0:
            # resize image
            image = cv2.resize(image, resolution)

            image_filename = f"{clip_name}-{int(current_frame/fps)}.png"
            image_filepath = save_dir + '/' + image_filename

            if not os.path.exists(image_filepath) or replace:
                cv2.imwrite(image_filepath, image)
                if verbose:
                    print(f"Fetched frame {current_frame}({100*current_frame/frame_total:.2f}%) as {image_filename}")
            elif verbose:
                print(f"Image {image_filename} already exists, skipping...")
            fetched_count += 1

        current_frame += 1

    clip_reader.release()

    if verbose:
        print(f"Process complete! Fetched {fetched_count} frames from the clip.")

## Extract image form VDO to image pool

In [3]:
for clip_path in CLIP_LIST:
    fetch_and_show_images(clip_path=clip_path,
                          save_dir=IMAGE_POOL_PATH,
                          capture_interval=10,
                          replace=False,
                          verbose=True
                        )

Extracting 2021_0607_184742_013
Frames available in the clip: 79094 (2636 sec.)
Fetching every 10 second or 300 frames
Image 2021_0607_184742_013-0.png already exists, skipping...
Image 2021_0607_184742_013-10.png already exists, skipping...
Image 2021_0607_184742_013-20.png already exists, skipping...
Image 2021_0607_184742_013-30.png already exists, skipping...
Image 2021_0607_184742_013-40.png already exists, skipping...
Image 2021_0607_184742_013-50.png already exists, skipping...
Image 2021_0607_184742_013-60.png already exists, skipping...
Image 2021_0607_184742_013-70.png already exists, skipping...
Image 2021_0607_184742_013-80.png already exists, skipping...


KeyboardInterrupt: 

## Extract info from image pool
1. date and time each image from image pool
1. average intensity each image from image pool
1. label to night or day each image (after 6pm to 6am is night, other is day)

In [6]:
# create directory if it does not exist
if not os.path.exists(DICT_PATH):
    os.makedirs(DICT_PATH)

# create empty dictionary if file does not exist
if not os.path.exists(DICT_PATH + '/info_dict.pkl'):
    with open(DICT_PATH + '/info_dict.pkl', 'wb') as f:
        pickle.dump({}, f)

# read info dict
with open(DICT_PATH + '/info_dict.pkl', 'rb') as f:
    info_dict = pickle.load(f)


In [7]:
def get_datetime(img):
    '''
    Get datetime from image name
    on assumtion that file name is on format: 'yyyy_mmdd_hhmmss_fff-t.png'
    which
        yyyy_mmdd_hhmmss_fff is started record datetime
        t is time from started record datetime (sec.)
    '''
    try:
        # remove file extension
        img = img.split('.')[0]

        # get datetime from file name
        start_record_datetime = img.split('-')[0]
        start_record_datetime = dt.datetime.strptime(start_record_datetime, '%Y_%m%d_%H%M%S_%f')

        # get time from started record datetime
        time_from_start_record = img.split('-')[1]
        time_from_start_record = dt.timedelta(seconds=int(time_from_start_record))

        # get datetime from image
        img_datetime = start_record_datetime + time_from_start_record

        return img_datetime
    except:
        return None

In [10]:
image_name_list = os.listdir(IMAGE_POOL_PATH)

looper = tqdm(image_name_list, unit='image')
for image_name in looper:
    looper.set_description(f'Processing {image_name}')

    if image_name not in info_dict:
        info_dict[image_name] = dict()
    
    ## get datetime from image
    img_datetime = get_datetime(image_name)
    
    info_dict[image_name]['datetime'] = img_datetime

    ## get image shape
    image = cv2.imread(IMAGE_POOL_PATH + '/' + image_name)
    info_dict[image_name]['shape'] = image.shape

    ## get avg intensity
    gray_image = cv2.imread(IMAGE_POOL_PATH + '/' + image_name, cv2.IMREAD_GRAYSCALE)
    avg_intensity = np.mean(gray_image)
    
    info_dict[image_name]['avg_intensity'] = avg_intensity

    ## get original vdo name
    info_dict[image_name]['vdo_name'] = image_name.split('-')[0] + '.MOV'

    ## label it's day or night(7pm - 6am)
    if img_datetime is None:
        # if vdo name is Top.MOV, then it's day
        if info_dict[image_name]['vdo_name'] == 'Top.MOV':
            info_dict[image_name]['day_night'] = 'day'
    elif img_datetime.hour >= 19 or img_datetime.hour <= 6:
        info_dict[image_name]['day_night'] = 'night'
    else:
        info_dict[image_name]['day_night'] = 'day'

# save info_dict
with open(DICT_PATH + '/info_dict.pkl', 'wb') as f:
    pickle.dump(info_dict, f)

Processing Top-990.png: 100%|██████████| 673/673 [00:06<00:00, 102.55image/s]                  


# Summary in image pool

In [22]:
import tqdm

In [11]:
# read info dict
with open(DICT_PATH + '/info_dict.pkl', 'rb') as f:
    info_dict = pickle.load(f)

In [23]:
night_image_name_list = []
day_image_name_list = []
other_image_name_list = []
total_image_amount = len(info_dict)
original_vdo_count = dict()

for image_name in tqdm.tqdm(info_dict):
    # add image name to list
    if info_dict[image_name]['day_night'] == 'night':
        night_image_name_list.append(image_name)
    elif info_dict[image_name]['day_night'] == 'day':
        day_image_name_list.append(image_name)
    else:
        other_image_name_list.append(image_name)

    # add original vdo name to dict
    if info_dict[image_name]['vdo_name'] not in original_vdo_count:
        original_vdo_count[info_dict[image_name]['vdo_name']] = 1
    else:
        original_vdo_count[info_dict[image_name]['vdo_name']] += 1

100%|██████████| 673/673 [00:00<00:00, 672407.48it/s]


In [28]:
print(f'[x] Total amount of image: {total_image_amount}')
print(f'[x] Amount of day image: {len(day_image_name_list)}({len(day_image_name_list)/total_image_amount*100:.2f}%)')
print(f'[x] Amount of night image: {len(night_image_name_list)}({len(night_image_name_list)/total_image_amount*100:.2f}%)')

print('[x] Count of image from each original vdo')
for vdo_name in original_vdo_count:
    print(f'\t- {vdo_name}: {original_vdo_count[vdo_name]}')

[x] Total amount of image: 673
[x] Amount of day image: 483(71.77%)
[x] Amount of night image: 190(28.23%)
[x] Count of image from each original vdo
	- 2021_0607_184742_013.MOV: 264
	- Top.MOV: 409
