All the config variables

In [None]:
WHISPERDIR = "/content/drive/MyDrive/whispercut"
INPUTDIR = f"{WHISPERDIR}/input"
WORKDIR = f"{WHISPERDIR}/workdir"
OUTPUTDIR = f"{WHISPERDIR}/output"
ARCHIVEDIR = f"{WHISPERDIR}/archive"

START_PHRASES = ["Rant time", "good topic", "tissue"] 
END_PHRASES = ["rant over", "subscribe", "don't leave", "muted fixed"]
EDITOR_PHRASES = ["editor"]


Check GPU cause why not (Tesla T4 has been fine for me so far)

In [None]:
!nvidia-smi -L

Connect to Google Drive work folder and pick a video to work on

In [None]:
from google.colab import runtime
from google.colab import drive
from glob import glob
import os

drive.mount('/content/drive')
inputfiles = glob(f"{INPUTDIR}/*")

if len(inputfiles) == 0:
  print("Please put a video into", INPUTDIR)
  runtime.unassign()

# for now, we're gonna trust the user to only but video files into the folder
INPUTVIDEO = inputfiles[0]
print("Processing started for:", INPUTVIDEO)
PROJECTNAME = os.path.basename(INPUTVIDEO).split(".")[0]
VIDEOFORMAT = os.path.basename(INPUTVIDEO).split(".")[1]

# setup some directories
PROJECTDIR = f'{WORKDIR}/{PROJECTNAME}'
os.makedirs(PROJECTDIR, exist_ok=True)
RESULTSDIR = f'{OUTPUTDIR}/{PROJECTNAME}'
os.makedirs(RESULTSDIR, exist_ok=True)
os.makedirs(f'{ARCHIVEDIR}', exist_ok=True)

Let's install whisper ai and ffmpeg

In [None]:
!pip install git+https://github.com/openai/whisper.git 
!sudo apt update && sudo apt install ffmpeg
!pip install setuptools-rust

Convert video to mp3 and feed into whisper ai

In [None]:
!ffmpeg -i $INPUTVIDEO $PROJECTDIR/audio.mp3 -y

In [None]:
!whisper $PROJECTDIR/audio.mp3 --language en --model medium.en -o $PROJECTDIR

In [None]:
import json
lines = {}
with open(f"{PROJECTDIR}/audio.mp3.srt") as f:
  while True:
    numberOrSpace = f.readline().strip()
    if numberOrSpace == "":
      break
    numberOrSpace = int(numberOrSpace)
    timestamps = f.readline().strip()
    before = timestamps.split(" ")[0]
    after = timestamps.split(" ")[2]
    text = f.readline().strip()
    f.readline()
    lines[numberOrSpace] = {"before": before, "after": after, "text": text}
    print(numberOrSpace, before, after, text)
with open(f"{PROJECTDIR}/lines.json", "w") as f:
  json.dump(lines, f)

Now we go through the lines and find the start and end markers, looking at two lines at a time in case a start phrase wraps over a line break.

In [None]:
def search_for_phrase(phrase):
  results = []

  if len(lines.keys()) == 0:
    return []
  # code dup but saves me set comp later
  if phrase.lower() in lines[1]["text"]:
    # print(1, phrase, lines[1]["text"])
    results.append({
        "index": 1, 
        "before": lines[1]["before"], 
        "after": lines[1]["after"],
        "text": lines[1]["text"]})

  for i in range(1, len(lines.keys())):
    # search doublelines because activation phrases might be on a line break
    first = lines[i]
    second = lines[i+1]
    doubleline = f'{first["text"]} {second["text"]}'
    if phrase.lower() in doubleline.lower():
      if phrase.lower() in first["text"].lower():
        pass # results appear in `second` before they appear in `first`
      elif phrase.lower() in second["text"].lower():
        # print(i+1, phrase, second["text"])
        results.append({
            "index": i+1, 
            "before": second["before"], 
            "after": second["after"],
            "text": second["text"]})
      else:
        # print(i, "&", i+1, phrase, doubleline) 
        results.append({
            "index": i + 0.5, 
            "before": first["before"], 
            "after": second["after"],
            "text": doubleline})
  #endfor
  return results

print("Start markers")
start_markers=[]
for start_phrase in START_PHRASES:
  print(start_phrase)
  hits = search_for_phrase(start_phrase)
  for hit in hits:
    print(hit)
    start_markers.append(hit)
start_markers.sort(key=lambda x: x["index"])

print()
print("End markers")
end_markers=[]
for end_phrase in END_PHRASES:
  print(end_phrase)
  hits = search_for_phrase(end_phrase)
  for hit in hits:
    print(hit)
    end_markers.append(hit)
end_markers.sort(key=lambda x: x["index"])
print(end_markers)

print()
print("Editor markers")
editor_markers=[]
for editor_phrase in EDITOR_PHRASES:
  print(editor_phrase)
  hits = search_for_phrase(editor_phrase)
  for hit in hits:
    print(hit)
    editor_markers.append(hit)
editor_markers.sort(key=lambda x: x["index"])
  

Evaluate the markers

In [None]:
def get_end_marker(start_index):
  for end_marker in end_markers:
    if end_marker["index"] > start_index:
      return end_marker
  end = len(lines.keys())
  return {
      "index": end,
      "before": lines[end]["before"],
      "after": lines[end]["after"],
      "text": lines[end]["text"],
  }

cuts = []
for start_marker in start_markers:
  end_marker = get_end_marker(start_marker["index"])
  print(start_marker["before"].replace(",","."), start_marker["text"], end_marker["after"].replace(",","."))
  cuts.append({
      "before": start_marker["before"].replace(",","."),
      "after": end_marker["after"].replace(",","."),
      "text": start_marker["text"],      
  })

Make the cuts with ffmpeg

In [None]:
import subprocess
for index, cut in enumerate(cuts):
  print("Processing cut", index+1, "of", len(cuts))
  subprocess.run([
      "ffmpeg", 
      "-i",
      INPUTVIDEO,
      "-ss",
      cut["before"],
      "-to",
      cut["after"],
      "-c:v",
      "copy",
      "-c:a",
      "copy",
      f'{OUTPUTDIR}/{index:0>2d}.{VIDEOFORMAT}'
    ])

Finally, move video to archive

In [None]:
!mv $INPUTVIDEO $ARCHIVEDIR

And disconnect the runtime

In [None]:
runtime.unassign()