In [1]:
import os
import re
import json
import glob
import pandas as pd

In [2]:
root = r'D:\VN_Multi_User_Video_Search'
des_path = os.path.join(root, 'dict')

# Part 1

In [3]:
data_root = os.path.join(root, 'frontend', 'ai', 'public', 'data')
keyframes_root = os.path.join(data_root, 'Keyframes')  # <-- DUYỆT ĐÚNG THƯ MỤC NÀY
metadata_root = os.path.join(root, 'dict', 'metadata')
scene_root = os.path.join(root, 'dict', 'SceneJSON')
map_keyframes_root = os.path.join(root, 'dict', 'map_keyframes')
fps_path = os.path.join(root, 'dict', 'fps.json')

os.makedirs(des_path, exist_ok=True)

with open(fps_path, 'r', encoding='utf-8') as f:
    fps_dict = json.load(f)

In [4]:
scene_id2info = {}

for lxx in sorted(os.listdir(scene_root)):
    lxx_path = os.path.join(scene_root, lxx)
    if not os.path.isdir(lxx_path):
        continue

    scene_id2info[lxx] = {}
    for video_json in sorted(os.listdir(lxx_path)):
        if not video_json.lower().endswith('.json'):
            continue
        vxxx = os.path.splitext(video_json)[0]  # "V001"
        full_json = os.path.join(lxx_path, video_json)
        video_fps = fps_dict.get(f'{lxx}_{vxxx}')
        if video_fps is None:
            print(f'[WARN] Missing FPS for {lxx}_{vxxx}')
            continue

        with open(full_json, 'r', encoding='utf-8') as f:
            video_scene_info = json.load(f)  # [[start_frame, end_frame], ...]

        # metadata
        meta_file = os.path.join(metadata_root, f'{lxx}_{vxxx}.json')
        if os.path.exists(meta_file):
            with open(meta_file, 'r', encoding='utf8') as f:
                video_metadata = json.load(f)
        else:
            video_metadata = {}

        scene_id2info[lxx][vxxx] = {
            'video_metadata': video_metadata,
            'lst_shot': {}
        }

        for i, item in enumerate(video_scene_info):
            start_f, end_f = int(item[0]), int(item[1])
            scene_id2info[lxx][vxxx]['lst_shot'][str(i)] = {
                'shot_range': [start_f, end_f],
                'shot_time': [start_f / video_fps, end_f / video_fps],  # thời gian theo giây
                'lst_keyframe_paths': [],
                'lst_keyframe_idxs': []
                # có thể sẽ thêm lst_keyframe_secs sau
            }

In [5]:
global_index = 0
id2img_fps_list = []

# duyệt tất cả thư mục [A-Z]xx_Vxxx trong Keyframes
for data_part in sorted(os.listdir(keyframes_root)):
    # phải khớp dạng [A-Z]xx_Vxxx
    if not re.match(r'^[A-Z]\d+_V\d+$', data_part, flags=re.IGNORECASE):
        continue

    lxx, vxxx = data_part.split('_', 1)
    data_part_path = os.path.join(keyframes_root, data_part)

    # đọc SceneJSON tương ứng
    scene_json_path = os.path.join(scene_root, lxx, f'{vxxx}.json')
    if not os.path.exists(scene_json_path):
        print(f"[WARN] Không tìm thấy SceneJSON: {scene_json_path}")
        continue

    with open(scene_json_path, 'r', encoding='utf-8') as f:
        video_scene_info = json.load(f)  # sẽ dùng bản copy này để trượt con trỏ

    # đọc mapper CSV (nếu có)
    mapper = None
    mapper_path = os.path.join(map_keyframes_root, f'{lxx}_{vxxx}.csv')  # <-- FIX TÊN FILE
    if os.path.exists(mapper_path):
        mapper = pd.read_csv(mapper_path, index_col='n')  # index theo keyframe "n"
    else:
        print(f"[WARN] Không có mapper cho {lxx}_{vxxx}: {mapper_path}")

    # cần FPS để fallback tính thời gian
    video_fps = fps_dict.get(f'{lxx}_{vxxx}', 30.0)

    # sắp xếp file theo số frame (không sort theo chuỗi)
    def num_key(name: str) -> int:
        try:
            return int(os.path.splitext(name)[0])
        except Exception:
            return 10**9

    file_names = sorted(os.listdir(data_part_path), key=num_key)

    # con trỏ shot
    scene_track = 0
    # tạo bản sao để pop khi tiến
    scene_ranges = [list(map(int, r)) for r in video_scene_info]  # [[s, e], ...]
    # nếu không có scene -> bỏ
    if not scene_ranges:
        continue

    for file_name in file_names:
        stem = os.path.splitext(file_name)[0]
        if not stem.isdigit():
            continue
        n = int(stem)  # số trên tên file keyframe (n)

        # map n -> frame_idx và thời gian
        if mapper is not None and n in mapper.index:
            frame_idx = int(mapper.loc[n]['frame_idx'])
            # ưu tiên dùng pts_time nếu có
            if 'pts_time' in mapper.columns:
                sec = float(mapper.loc[n]['pts_time'])
            else:
                sec = frame_idx / float(mapper.loc[n].get('fps', video_fps))
        else:
            # fallback: không có mapper → coi n là frame_idx (xấp xỉ), tính thời gian theo fps chung
            frame_idx = n
            sec = frame_idx / float(video_fps)

        # đường dẫn tương đối cho FE: /data/Keyframes/Lxx_Vxxx/NNN.jpg
        full_path = os.path.join(data_part_path, file_name)
        rel_path = full_path.replace(os.path.join(root, 'frontend', 'ai', 'public'), '')
        # đảm bảo prefix bắt đầu bằng '/data/Keyframes/...'
        # nếu lỡ ra '\', đổi về '/'
        rel_path = rel_path.replace('\\', '/')

        # đẩy con trỏ scene sao cho frame_idx nằm trong khoảng hiện tại
        while scene_track < len(scene_ranges) and frame_idx > scene_ranges[scene_track][1]:
            scene_track += 1
        if scene_track >= len(scene_ranges):
            # ra ngoài phạm vi scene cuối cùng
            continue

        # Ghi vào cấu trúc
        # Bảo đảm nút tồn tại (phòng khi mất init ở đoạn trên)
        if lxx not in scene_id2info:
            scene_id2info[lxx] = {}
        if vxxx not in scene_id2info[lxx]:
            scene_id2info[lxx][vxxx] = {'video_metadata': {}, 'lst_shot': {}}
        if str(scene_track) not in scene_id2info[lxx][vxxx]['lst_shot']:
            s, e = scene_ranges[scene_track]
            scene_id2info[lxx][vxxx]['lst_shot'][str(scene_track)] = {
                'shot_range': [int(s), int(e)],
                'shot_time': [int(s) / video_fps, int(e) / video_fps],
                'lst_keyframe_paths': [],
                'lst_keyframe_idxs': []
            }

        scene_id2info[lxx][vxxx]['lst_shot'][str(scene_track)]['lst_keyframe_paths'].append(rel_path)
        scene_id2info[lxx][vxxx]['lst_shot'][str(scene_track)]['lst_keyframe_idxs'].append(global_index)
        # (Tuỳ chọn) nếu muốn lưu luôn thời gian từng keyframe theo shot:
        # scene_id2info[lxx][vxxx]['lst_shot'][str(scene_track)].setdefault('lst_keyframe_secs', []).append(sec)

        # map id → thông tin keyframe (thêm 'sec' để backend/FE có thể seek đúng vị trí)
        id2img_fps_list.append({
            "image_path": rel_path,
            "scene_idx": f'{lxx}/{vxxx}/lst_shot/{scene_track}',
            "frame_idx": frame_idx,
            "sec": sec
        })
        global_index += 1

In [6]:
id2img_fps = dict(enumerate(id2img_fps_list))

In [7]:
with open(os.path.join(des_path, 'scene_id2info.json'), 'w', encoding='utf-8') as f:
    json.dump(scene_id2info, f, ensure_ascii=False)

with open(os.path.join(des_path, 'id2img_fps.json'), 'w', encoding='utf-8') as f:
    json.dump(id2img_fps, f, ensure_ascii=False)

print(f'Saved to {des_path}')
print(f'Number of Index: {len(id2img_fps)}')

Saved to D:\VN_Multi_User_Video_Search\dict
Number of Index: 382299


In [8]:
global_index

382299

# Part 2

In [9]:
map_keyframes_dict = dict()
for video_path in sorted(os.listdir(map_keyframes_root)):
    data_part, video_id = video_path.replace('.csv', '').split('_')
    mapper = pd.read_csv(f'{map_keyframes_root}/{data_part}_{video_id}.csv', index_col='n')
    
    key = f'{data_part}_{video_id}'
    map_keyframes_dict[key] = dict() 
    
    for index in mapper.index.values:
        map_keyframes_dict[key][int(index)] = int(mapper.loc[index]['frame_idx'])

In [10]:
with open(f'{des_path}/map_keyframes.json', 'w') as f:
    f.write(json.dumps(map_keyframes_dict))

# Part 3

In [11]:
with open(f'{des_path}/scene_id2info.json', 'r') as f:
    SceneID2Info = json.load(f)

In [12]:
audios_detection_dir = f'{des_path}/audio_detection'

check_error = 0
audio_id2img_fps = []
for data_part in sorted(os.listdir(audios_detection_dir)):
    for audio_detection_path in sorted(os.listdir(f'{audios_detection_dir}/{data_part}')):
        audio_id = audio_detection_path.replace('.json', '')
        scene_info = SceneID2Info[data_part][audio_id]['lst_shot']
        
        with open(f'{audios_detection_dir}/{data_part}/{audio_detection_path}', 'r') as f:
            audio_shots = json.load(f)
            
        i = 0
        scene_info_len = len(scene_info)
        for audio_interval in audio_shots:
            result = []
            start, end = audio_interval
                
            while True:
                if i >= scene_info_len:
                    break
                
                shot_interval = scene_info[str(i)]['shot_time']
                if end <= shot_interval[0]:
                    break
                if(start >= shot_interval[1]):
                    i += 1
                    continue
                    
                result.extend(scene_info[str(i)]['lst_keyframe_idxs'].copy()) 
                if end > shot_interval[1]:
                    i += 1
                    start = shot_interval[1]
                else:
                    break
                    
            audio_id2img_fps.append(result)
            
            
            check_error += 1

In [13]:
if len(audio_id2img_fps) != check_error:
    print("Some thing not good!!")
else:
    print("Successfully")
    
audio_id2img_fps = dict(enumerate(audio_id2img_fps))

Successfully


In [14]:
with open(f'{des_path}/audio_id2img_id.json', 'w') as f:
    f.write(json.dumps(audio_id2img_fps))

# Part 5

In [15]:
with open(f'{des_path}/scene_id2info.json', 'r') as f:
    SceneID2Info = json.load(f)

In [16]:
video_id2img_id = dict()

In [17]:
for data_part in SceneID2Info.keys():
    for video_id in SceneID2Info[data_part].keys():
        sample_key = f'{data_part}_{video_id}'
        video_id2img_id[sample_key] = []
        for key, value in SceneID2Info[data_part][video_id]['lst_shot'].items():
            video_id2img_id[sample_key].extend(value['lst_keyframe_idxs'])

In [18]:
with open(f'{des_path}/video_id2img_id.json', 'w') as f:
    f.write(json.dumps(video_id2img_id))

# Part 6

In [19]:
with open(f'{des_path}/scene_id2info.json', 'r') as f:
    SceneID2Info = json.load(f)

In [20]:
import numpy as np
from tqdm import tqdm

In [21]:
def find_nearest(array, value):     
    array = np.asarray(array)
    idx = sorted((np.abs(array - value)).argsort()[:2].tolist())
    return idx

In [22]:
audios_detection_dir = f'{des_path}/audio_detection'

audio_global_id = 0
img_id2audio_id = dict()
for data_part in tqdm(sorted(os.listdir(audios_detection_dir))):
    for audio_detection_path in sorted(os.listdir(f'{audios_detection_dir}/{data_part}')):
        audio_id = audio_detection_path.replace('.json', '')
        scene_info = SceneID2Info[data_part][audio_id]['lst_shot']
        
        with open(f'{audios_detection_dir}/{data_part}/{audio_detection_path}', 'r') as f:
            audio_shots = json.load(f)
        
        audio_pivot_shots = []
        for audio_shot in audio_shots:
            start, end = audio_shot
            audio_pivot_shots.append((start+end)/2)
        
        for shot in scene_info.values():
            shot_center = (shot['shot_time'][0] + shot['shot_time'][1])/2
            shot_frame_idxs = shot['lst_keyframe_idxs']
            nearest_audio = [audio_global_id + val for val in find_nearest(audio_pivot_shots, shot_center)]
            
            for shot_frame_idx in shot_frame_idxs:
                img_id2audio_id[shot_frame_idx] = nearest_audio
            
        audio_global_id += len(audio_shots)

100%|██████████| 30/30 [00:00<00:00, 64.91it/s]


In [23]:
with open(f'{des_path}/img_id2audio_id.json', 'w') as f:
    f.write(json.dumps(img_id2audio_id))