# Remove <keyword>Commercials</keyword> with <keyword>ffmpeg</keyword> and <keyword>PySceneDetect</keyword>
*Managing non-contiguous sections*
<created>01/11/2022</created>
<updated></updated>

# Background

I wanted to remove commercials from several hundred TV episodes present in a handful of recordings. Additionally, I wanted to split them per episode to be added to a media library.

## Scene Detection

Detecting the transition from content to commercial is made easy using [PySceneDetect](https://pyscenedetect.readthedocs.io/en/latest/).

The default settings were nearly perfect at detecting the scene transition.

### Usage

PySceneDetect is easily called from the command line. However, as I was doing this in batches and wanted a way to incorporate reviewing the scenes and episode annotation with the conversion, I decided to use a Jupyter Notebook


In [None]:
from subprocess import Popen

def get_scenes(folder, video):
    args = ["scenedetect", "-i", video, "detect-threshold", "list-scenes", "save-images", "export-html", "-w",
           "320", "-h", "180"]
    with Popen(args, cwd=folder) as p:
        p.communicate()

As you see from the above function, PySceneDetect will generate a report of scenes detected, along with thumbnails to confirm their accuracy.

With the report finished - I used some helper functions to work with the report and integrate it into Jupyter.

In [None]:
import pandas as pd
from glob import glob
from PIL import Image
from pathlib import Path
from base64 import b64encode
from IPython.display import display, HTML
from io import BytesIO
from itertools import chain

video = "~/Videos/movie.mkv"
scenes = glob("~/Videos/*.jpg")  # Thumbnails from PySceneDetect
scene_list = glob("~/Videos/Scenes.csv")  # Scene Report

def sort_scene(f):
    """Sort scenes by timecode"""
    fp = Path(f).stem
    scene_num, scene_idx = [int(x) for x in fp.split('-')[2:]]
    return scene_num, scene_idx

def get_scene_image(scene, imgs):
    """Given a scene, return it's thumbnail"""
    matches = [img for img in imgs if sort_scene(img)[0] == scene]
    matches.sort(key=sort_scene)
    img_objs = [Image.open(f) for f in matches]
    img_width = sum(i.width for i in img_objs)
    img_height = max(i.height for i in img_objs)
    scene_img = Image.new('RGB', (img_width, img_height))
    x = 0
    for i in img_objs:
        scene_img.paste(i, (x, 0))
        x += i.width
    return scene_img

def encode_b64_img(img):
    """Base64 encode thumbnail for display in Jupyter"""
    fp = BytesIO()
    img.save(fp, format='png')
    b64_img = b64encode(fp.getvalue()).decode('ascii')
    return f"data:image/png;base64,{b64_img}"

def embed_scene_image(scene, imgs, df):
    """Given a scene, retrieve its thumbnail and display it's timestamps"""
    img = get_scene_image(scene, imgs=imgs)
    img_b64 = encode_b64_img(img)
    scene_data = df.loc[df['Scene Number'] == scene]
    scene_start = scene_data.iat[0, 3]
    scene_end = scene_data.iat[0, 6]
    scene_length = scene_data.iat[0, 9]
    element = HTML(f'''<div>
    <h2>Scene: {scene}</h2>
    <h3>{scene_start} - {scene_end} ({scene_length})</h3>
    <img src="{img_b64}"/></div>''')
    display(element)

def parse_scene(scene, is_end, df):
    scene_data = df.loc[df['Scene Number'] == scene]
    scene_start = scene_data.iat[0, 3]
    scene_end = scene_data.iat[0, 6]
    scene_length = scene_data.iat[0, 9]
    if is_end:
        return scene_end
    return scene_start

def parse_scenes(ends, df):
    scene_times = []
    for start, end in ends:
        start_secs = parse_scene(start, False, df)
        end_secs = parse_scene(end, True, df)
        scene_times.append((start_secs, end_secs))
    return scene_times

def parse_ss(ends, df, as_scenes=True):
    if as_scenes:
        ends = parse_scenes(ends, df)
    ss_start = min(chain.from_iterable(ends)) - 100
    ss_start = max([ss_start, 0])
    new_pieces = []
    for (start, end) in ends:
        new_start = start - ss_start
        new_end = end - ss_start
        new_pieces.append((f"{new_start:.04f}", f"{new_end:.04f}"))
    return new_pieces, f"{ss_start:.04f}"

def parse_scene_annotation(s):
    try:
        scenes_str, output = s.split("...")
        if not output.endswith(".mp4"):
            output = f"{output}.mp4"
        scenes = []
        for sc in scenes_str.split(","):
            sc = sc.strip()
            if "-" not in sc:
                scenes.append((int(sc), int(sc)))
                continue
            start, end = [x.strip() for x in sc.split("-")]
            scenes.append((int(start), int(end)))

        return [scenes, output]
    except Exception as e:
        print(s, e)


There's a lot of "plumbing code" going on above. But it serves to allow me to write the instructions like:

```python
episodes = [
     "2-3,10-11...S06E36",
    ]
```

Which is interpreted as:
"2-3,10-11"  -> Select Scenes 2, 3, 10, 11 and Output as S06E36.mp4


## Hooking in ffmpeg

<keyword>ffmpeg</keyword> can do this, but the filtergraph is... very verbose!

Below is some additional plumbing code that writes this out based on the scene and episode command

In [None]:
def make_filtergraph_pieces(ends, n):
    """Trimming video and audio and setting the timestamp"""
    v = f"[0:v]trim=start={ends[0]}:end={ends[1]},setpts=PTS-STARTPTS[{n}v];"
    a = f"[0:a]atrim=start={ends[0]}:end={ends[1]},asetpts=PTS-STARTPTS[{n}a];"
    return v+a

def make_concat_filtergraph(n):
    """Rejoining the trim and atrim from above with concat"""
    pre = ""
    for i in range(n):
        pre += f"[{i}v][{i}a]"
    pre_concat=f"{pre}concat=n={n}:v=1:a=1[outv][outa]"
    return pre_concat

def make_filtergraph(ends):
    """Generate the several lines of text for filtergraph"""
    n = len(ends)
    inputs = [make_filtergraph_pieces(e, i) for i, e in enumerate(ends)]
    inputs = "".join(inputs)
    output_graph = make_concat_filtergraph(n)
    s =  inputs + output_graph
    return s


## Ready

In [None]:
from subprocess import Popen, STDOUT, PIPE

episode_commands = parse_scene_annotation(episodes)

def extract_pieces(pieces, input_file, output_file):
    """Uses cuda h264. This can be swapped out as needed"""
    pieces = parse_scenes(pieces, df)

    args = ['ffmpeg', '-vsync', '0', '-hwaccel', 'cuda', '-i', input_file, '-filter_complex',
           make_filtergraph(pieces), "-map", "[outv]", "-map", "[outa]", "-c:v", "h264_nvenc", "-preset", "slow", "-movflags", "+faststart",
           output_file]

    with Popen(args) as p:
        p.communicate()


In [None]:
from tqdm.auto import tqdm
import os

df = pd.read_csv(scene_list, skiprows=1)
df['Start Timecode'] = pd.to_timedelta(df['Start Timecode'])
df['End Timecode'] = pd.to_timedelta(df['End Timecode'])


for scenes, out in tqdm(episode_commands):
    mv_out = os.path.join("~/Videos/Episodes", out)
    extract_pieces(scenes, video, mv_out)