In [None]:
"""Take the video at fullpath and transcode it to h264 mp4 with AAC audio using reasonable presets.
    Write the new file to the equivalent location in dst, so a video from F:\blah\vid.wmv will go to 
    H:\stuff\blah\vid.mp4 """

from subprocess import run

dst = "H:\\stuff"

def transcode_and_copy(fullpath):

        
    print("starting transcoding:", fullpath)
    tail = fullpath[3:]  # strip off the drive letter
    newtail, _ = os.path.splitext(tail)
    newtail += ".mp4"  # make a destination file name with the correct file extension as we are transcoding
    dest_dir, _ = os.path.split(tail)
    try:
        os.makedirs(os.path.join(dst, dest_dir))  # makes the destination location if it doesn't already exist
    except FileExistsError:
        pass
    dest = os.path.join(dst, newtail)
    cmd = f'''start /low /wait /b ffmpeg -i "{fullpath}" -c:v libx264 -crf 23 -c:a aac -q 100 "{dest}"'''
    # the Windows "start" command lets us set low priority so we can run this in the background without ffmpeg
    # consuming all the resources
    # low priority, /b is use the same window, /wait stops the "start" command from just returning immediately
    # so that this will work in a "for" loop
    run(cmd, shell=True)  # need shell = True for run to find the "start" executable in windows
    # otherwise we'll get a "file not found" error
    
    with open("done.txt", "a") as f:
        f.write(fullpath)
        f.write("\n")
    print("done")

In [None]:
"""Code for determining whether a video has hard-coded black bars, and fixing them with an ffmpeg command"""

import videoobject as vo  # from video-tagger project, uses PIL images

locs = [(1,1),(120,4),(200,8),(4,230)]

def black_test(img):
    
    out = []
    for loc in locs:
        out.append(img.getpixel(loc))
    if len(set(out)) == 1:
        return True
    else:
        return False
    
def full_test(my_video_object):
    
    results = [black_test(x) for x in my_video_object.images]
    if False in results:
        return False
    else:
        return True
    
def test_video(apath):
    
    ob = vo.VideoObject(apath)
    return full_test(ob)

#trim 90 from top and bottom
#crop=in_h-2*90

import subprocess as sp

cmd = "ffmpeg -i {infile} -vf crop=in_w:in_h-2*90 -c:a copy {outfile}"
#  this was done with a series of files where the hardcoded black bars were consistently 90 px

for x in os.listdir(os.path.join(path,"aspect_ratio")):
    vidpath = os.path.join(path,"aspect_ratio", x)
    dest = os.path.join(path,x)
    sp.run(cmd.format(infile=vidpath,outfile=dest))

In [None]:
"""Code for fixing videos where the DAR (display aspect ratio) is wrong, leading to the videos being
displayed in a wacky aspect ratio. Doesn't transcode the video."""

probe_cmd = '''ffprobe -v error -select_streams v:0 -show_entries stream=height,width,display_aspect_ratio -of csv {}'''
# outputs something like: stream,960,540,0:1
# this was an easy way to get the width and height to stdout

import subprocess as sp
import os

fix_cmd = "ffmpeg -i {} -vf setdar=dar=16/9 {}"
# command that sets the DAR to an assumed regular widescreen format

def get_res(path):
    
    path = "\"{}\"".format(path)  # put it in quotes in case the filename contains spaces
    cmd = probe_cmd.format(path)
    pipe = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, bufsize=10 ** 8)
    info2, error = pipe.communicate()
    info = info2.decode("utf-8")
    info = info.rstrip("\r\n")
    return info.split(",")  # gets the output like stream,960,540,0:1


def fix_res(path):
    
    print("fixing resolution of {}...".format(path), end="")
    a, b = os.path.split(path)
    c = "ar_fixed_" + b
    out_path = os.path.join(a, c)
    to_run = fix_cmd.format("\"" + path + "\"", "\"" + out_path + "\"")
    sp.run(to_run)
    print("video processed")
    
root = "E:\\path_to_videos"
    
for x in os.listdir(root):
    full_path = os.path.join(root, x)
    _, ext = os.path.splitext(x)

    if ext == ".mp4":
        a,b,c,d = get_res(full_path)
        if b=="1440" and c == "1080" and not d == "16:9":
            # detect videos where the DAR is not correctly set
            # these videos were a total shitshow anyway, encoded at a zany resolution and relying on the DAR
            # to be displayed correctly. Hopefully these kinds of situations are rare.
            try:
                fix_res(full_path)
            except KeyboardInterrupt:
                exit = True
            else:
                exit = False
            with open("done.txt","a") as f:
                f.write("{}\n".format(full_path))
            if exit:
                print("Interrupted, exiting")
                break

In [None]:
"""Chop a video up into multiple scenes, assuming that the scenes are separated with fades-to-black.
This code detects the black screens and uses them to establish the time points for cutting.

Everything in  this block is actually designed to run as a standalone script from the command line. It expects
on stdin the output of ffmpeg using the "blackdetect" filter. This outputs a bunch of lines with timestamps
where black screens were detected. Unfortunately I didn't save the exact blackdetect command I used anywhere.

ffmpeg -i inputfile -vf blackdetect=d=0.1:pix_th=.1 -f rawvideo -y NUL

gives an output like

[blackdetect @ 00000000026269a0] black_start:0 black_end:2.29396 black_duration:2.29396
[blackdetect @ 00000000026269a0] black_start:18.1431 black_end:18.2266 black_duration:0.0834168
[blackdetect @ 00000000026269a0] black_start:19.5612 black_end:19.6446 black_duration:0.0834168
[blackdetect @ 00000000026269a0] black_start:186.854 black_end:188.73 black_duration:1.8768847x
[blackdetect @ 00000000026269a0] black_start:215.632 black_end:215.716 black_duration:0.0834168
[blackdetect @ 00000000026269a0] black_start:215.924 black_end:216.175 black_duration:0.25025
[blackdetect @ 00000000026269a0] black_start:217.176 black_end:217.259 black_duration:0.0834168
[blackdetect @ 00000000026269a0] black_start:218.051 black_end:218.26 black_duration:0.208542
[blackdetect @ 00000000026269a0] black_start:265.057 black_end:265.182 black_duration:0.125125x
[blackdetect @ 00000000026269a0] black_start:276.193 black_end:277.528 black_duration:1.33467
[blackdetect @ 00000000026269a0] black_start:779.655 black_end:781.49 black_duration:1.8351748x

the exact parameters of blackdetect will need tweaking for the specific application though.


"""

import sys
import re

def get_transitions(gen):
    
    found = []
    current = []
    last = 0
    
    for line in gen:
        q = re.search("t:([0-9.]*)", line)
        if not q:
            continue
        t = q.groups()[0]
        t = float(t)
        current.append(t)
        if t - last > 60.0:
            # detected a new transition
            timepoint = current[len(current)//2] # get middle of transition
            found.append(timepoint)
            current = []

        last = t
        
    timepoint = current[len(current)//2] # run once more to get the final timepoint
    found.append(timepoint)
            
    return found

# script starts here
timepoints = get_transitions(sys.stdin.readlines())  
    
print(f"Splitting video at {sys.argv[1]}")  # the path of the video to split, passed in from the command line

cut_cmd = r'''ffmpeg -ss {start_time} -t {duration} -i "{source}" "{dest}"'''
final_cut_cmd = r'''ffmpeg -ss {start_time} -i "{source}" "{dest}"'''


def cut_video(timepoints): 

    videopath = sys.argv[1]
    videoroot, videoext = os.path.splitext(videopath)
    _, videoname = os.path.split(videoroot)
    dest_path = f"path_to_output\\{videoname}"
                
    last = 0
    cnt = 1
    for x in timepoints:
        delta = x - last
        if delta < 90:
            continue  # ignore very short segments, probably title cards or previews
        dpath = dest_path + f"_part{cnt}" + ".mp4"
        torun = cut_cmd.format(start_time=last,duration=delta,source=videopath,dest=dpath)
        print(f"Encoding part {cnt}")
        sp.run(torun)
        cnt += 1
        last = x
    dpath = dest_path + f"_part{cnt}" + ".mp4"
    torun2 = final_cut_cmd.format(start_time=last,source=videopath,dest=dpath)
    sp.run(torun2)

    
print("got timepoints")
print(timepoints)
cut_video(timepoints)