# Video Metadata Inspector

This notebook inspects every video in the `video-data` folder and extracts:
- **Framerate** (FPS)
- **Number of frames**
- **Length** (duration in seconds)

It also computes summary statistics for each of these fields.

In [1]:
import os

import cv2
import pandas as pd

## Video Inspection Function

In [2]:
def get_video_metadata(video_path: str) -> dict:
    """
    Extract metadata from a video file.
    
    Args:
        video_path: Path to the video file
        
    Returns:
        Dictionary containing filename, framerate, frame_count, and duration
    """
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        return {
            'filename': os.path.basename(video_path),
            'framerate': None,
            'frame_count': None,
            'duration_seconds': None,
            'error': 'Could not open video'
        }
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    duration = frame_count / fps if fps > 0 else 0
    
    cap.release()
    
    return {
        'filename': os.path.basename(video_path),
        'framerate': fps,
        'frame_count': frame_count,
        'duration_seconds': duration,
        'error': None
    }

## Inspect All Videos

In [3]:
video_dir = './video-data'

video_extensions = ('.mp4', '.avi', '.mov', '.mkv', '.webm')
video_files = [
    os.path.join(video_dir, f) 
    for f in os.listdir(video_dir) 
    if f.lower().endswith(video_extensions)
]

print(f"Found {len(video_files)} video files in '{video_dir}'")

Found 77 video files in './video-data'


In [4]:
metadata_list = []

for video_path in sorted(video_files):
    metadata = get_video_metadata(video_path)
    metadata_list.append(metadata)
    
df = pd.DataFrame(metadata_list)

## Video Metadata Table

In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

df_display = df[['filename', 'framerate', 'frame_count', 'duration_seconds']].copy()
df_display['duration_seconds'] = df_display['duration_seconds'].round(2)
df_display

Unnamed: 0,filename,framerate,frame_count,duration_seconds
0,1_dksksjfwijf.mp4,29.839151,169,5.66
1,2_dfsaeklnvvalkej.mp4,29.526883,129,4.37
2,2_difficult_2.mp4,29.814735,173,5.8
3,2_difficult_sdafkljsalkfj.mp4,29.606801,114,3.85
4,2_dkdjwkndkfw.mp4,29.97003,200,6.67
5,2_dkdmkejkeimdh.mp4,29.658922,174,5.87
6,2_dkjd823kjf.mp4,23.60741,241,10.21
7,2_dsalkfjalwkenlke.mp4,23.640824,208,8.8
8,2_kling_20251205_Text_to_Video_On_a_sandy_4976...,24.0,121,5.04
9,2_kling_20251206_Text_to_Video_Generate_a_71_1...,24.0,96,4.0


## Summary Statistics

In [6]:
df_valid = df[df['error'].isna()].copy()

stats_columns = ['framerate', 'frame_count', 'duration_seconds']

summary_stats_df = df_valid[stats_columns].describe().round(2)
summary_stats_df

Unnamed: 0,framerate,frame_count,duration_seconds
count,77.0,77.0,77.0
mean,26.34,205.84,8.12
std,5.72,44.69,2.34
min,23.23,96.0,2.74
25%,23.98,184.0,6.15
50%,24.0,218.0,9.23
75%,29.67,241.0,10.04
max,59.43,325.0,13.56


In [7]:
unique_fps = sorted(df_valid['framerate'].unique())
most_common_fps = df_valid['framerate'].mode().values[0]
min_frame_idx = df_valid['frame_count'].idxmin()
max_frame_idx = df_valid['frame_count'].idxmax()
total_duration_sec = df_valid['duration_seconds'].sum()
total_duration_min = total_duration_sec / 60
min_duration_idx = df_valid['duration_seconds'].idxmin()
max_duration_idx = df_valid['duration_seconds'].idxmax()

insights_data = {
    'Category': [
        'Overview', 'Overview',
        'Framerate', 'Framerate',
        'Frame Count', 'Frame Count', 'Frame Count',
        'Duration', 'Duration', 'Duration'
    ],
    'Metric': [
        'Total videos analyzed',
        'Videos with errors',
        'Unique framerates',
        'Most common FPS',
        'Total frames (all videos)',
        'Shortest video',
        'Longest video',
        'Total duration',
        'Shortest video',
        'Longest video'
    ],
    'Value': [
        str(len(df_valid)),
        str(len(df) - len(df_valid)),
        ', '.join([f"{fps:.2f}" for fps in unique_fps]),
        f"{most_common_fps:.2f}",
        f"{df_valid['frame_count'].sum():,}",
        f"{df_valid.loc[min_frame_idx, 'filename']} ({df_valid['frame_count'].min()} frames)",
        f"{df_valid.loc[max_frame_idx, 'filename']} ({df_valid['frame_count'].max()} frames)",
        f"{total_duration_sec:.2f} seconds ({total_duration_min:.2f} minutes)",
        f"{df_valid.loc[min_duration_idx, 'filename']} ({df_valid['duration_seconds'].min():.2f}s)",
        f"{df_valid.loc[max_duration_idx, 'filename']} ({df_valid['duration_seconds'].max():.2f}s)"
    ]
}

insights_df = pd.DataFrame(insights_data)
insights_df

Unnamed: 0,Category,Metric,Value
0,Overview,Total videos analyzed,77
1,Overview,Videos with errors,0
2,Framerate,Unique framerates,"23.23, 23.60, 23.61, 23.61, 23.63, 23.63, 23.6..."
3,Framerate,Most common FPS,24.00
4,Frame Count,Total frames (all videos),15850
5,Frame Count,Shortest video,2_kling_20251206_Text_to_Video_Generate_a_71_1...
6,Frame Count,Longest video,4_pushup_1f2da596-7619-4d55-9376-069e15a42a1a_...
7,Duration,Total duration,625.27 seconds (10.42 minutes)
8,Duration,Shortest video,3_dsjlaeijlksjdfie.mp4 (2.74s)
9,Duration,Longest video,4_pushup_1f2da596-7619-4d55-9376-069e15a42a1a_...


## Grouped Statistics by Label

In [8]:
df_valid['label'] = df_valid['filename'].apply(lambda x: x.split('_')[0])

grouped_df = df_valid.groupby('label').agg({
    'filename': 'count',
    'framerate': ['mean', 'std'],
    'frame_count': ['mean', 'std', 'min', 'max'],
    'duration_seconds': ['mean', 'std', 'min', 'max']
}).round(2)

grouped_df.columns = ['_'.join(col).strip() for col in grouped_df.columns.values]
grouped_df = grouped_df.rename(columns={'filename_count': 'video_count'})
grouped_df

Unnamed: 0_level_0,video_count,framerate_mean,framerate_std,frame_count_mean,frame_count_std,frame_count_min,frame_count_max,duration_seconds_mean,duration_seconds_std,duration_seconds_min,duration_seconds_max
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1,29.84,,169.0,,169,169,5.66,,5.66,5.66
2,16,27.48,3.05,169.94,44.38,96,241,6.27,1.95,3.85,10.21
3,34,25.81,5.61,206.35,40.15,121,241,8.27,2.11,2.74,10.21
4,21,24.2,1.3,235.1,36.66,113,325,9.78,1.69,3.79,13.56
5,2,44.55,21.04,192.0,11.31,184,200,4.92,2.58,3.1,6.74
6,2,26.95,4.17,213.5,38.89,186,241,8.13,2.7,6.22,10.04
7,1,29.85,,198.0,,198,198,6.63,,6.63,6.63
