<a href="https://colab.research.google.com/github/jermwatt/bleep_that_sht/blob/main/beep_that_sht_walkthrough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import os
from IPython.display import HTML 
from base64 import b64encode

# if running in collab pull repo and install requirements
if os.getenv("COLAB_RELEASE_TAG"):
    !git clone https://github.com/jermwatt/bleep_that_sht.git
    %cd ocr_preprocessing
    !pip install -r requirements.txt
    !sudo apt install tesseract-ocr

# make sure video can be played on ubuntu
def display_video(path):  
    mp4 = open(path,'rb').read()   
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    display(
      HTML(
      """
          <video width=200 controls>
                <source src="%s" type="video/mp4">
          </video>
      """ % data_url
           )   
    )

# Bleep That Sh*t

- use whisper to build a fun app that makes anyone sound naughty
- first pre-process your video - stripping off the audio
- transcribe the audio with whisper
- choose a set of "bleep words" that you want to replace in the audio
- post-process the audio locally, replacing each instance of your "bleep words" with a bleep
- post-process locally - sewing your new bleep audio back onto the original video

# 1.  pre-processing utilities

In [1]:
from moviepy.editor import VideoFileClip

def extract_audio(*, local_file_path: str, audio_filepath: str) -> None:
    try:
        video = VideoFileClip(local_file_path)
        audio = video.audio
        if audio is not None:
            audio.write_audiofile(audio_filepath, verbose=False, logger=None)
            audio.close()
            video.close()
    except Exception as e:
        raise ValueError(f"error extracting audio from video {local_file_path}, exception: {e}")

In [13]:
# extract audio
videopath = "data/input/bleep_test_1.mp4"
audiopath = "data/input/bleep_test_1.mp3"
extract_audio(local_file_path=videopath, audio_filepath=audiopath)

In [16]:
# display video
display_video(videopath)

In [20]:
%%HTML
<video width="320" height="240" controls>
  <source src="data/input/bleep_test_1.mp4" type="video/mp4">
</video>

# 2.  compute transcription using whisper

In [4]:
import whisper_timestamped as whisper

model = whisper.load_model("tiny", device="cpu")
local_file_path = "data/input/bleep_test_1.mp3"
process_output = whisper.transcribe(model, local_file_path, verbose=False)

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.

Detected language: English


100%|██████████| 3093/3093 [00:04<00:00, 692.34frames/s] 


In [8]:
# extract transcript and timestamped transcript from process_output
transcript = process_output["text"]
timestamped_transcript = process_output["segments"]

# 3.  post-processing utilities

In [9]:
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
from pydub import AudioSegment 
bleep_sound = AudioSegment.from_mp3("bleep_that_sht/bleep.mp3")
bleep_first_sec = bleep_sound[1 * 1000: 2 * 1000] 

# simple word cleaner - remove punctuation etc.,
def word_cleaner(word: str) -> str:
    return ''.join(e for e in word if e.isalnum()).lower().strip()

# collect all timestamped instances of bleep_word in transcript
def query_transcript(bleep_words: list,
                     timestamped_transcript: list) -> list:
    transcript_words = sum([timestamped_transcript[i]["words"] for i in range(len(timestamped_transcript))], [])
    detected_bleep_words = []
    for bleep_word in bleep_words:
        detected_bleep_words += [v for v in transcript_words if word_cleaner(v["text"]) == word_cleaner(bleep_word)]
    detected_bleep_words = sorted(detected_bleep_words, key=lambda d: d["start"])
    return detected_bleep_words

def bleep_that_sht(og_video_path: str,
                   og_audio_path: str,
                   final_video_path: str,
                   final_audio_path: str,
                   bleep_words: list) -> None:
    # extract and save audio from original video
    extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path)

    # input og audio file for splicing    
    test_sound = AudioSegment.from_mp3(og_audio_path)

    # find bleep_words in timestamped transcript
    bleep_word_instances = query_transcript(bleep_words, timestamped_transcript)

    # start creation of test_sound_bleeped - by splicing in instance 0
    test_clip = test_sound[:1]
    test_sound_clips = [test_clip]

    # loop over instances, thread in clips of bleep
    prev_end_time = 1
    for instance in bleep_word_instances:
        # unpack bleep_word start / end times - converted to microseconds
        start_time = int(instance["start"]*1000) - 50
        end_time = int(instance["end"]*1000) + 50
        
        # collect clip of test starting at previous end time, and leading to start_time of next bleep
        test_clip = test_sound[prev_end_time:start_time]
        
        # create bleep clip for this instance
        bleep_clip = bleep_first_sec[:(end_time - start_time)]
        
        # store test and bleep clips
        test_sound_clips.append(test_clip)
        test_sound_clips.append(bleep_clip)

        # update prev_end_time
        prev_end_time = end_time
        
    # create final clip from test
    test_clip = test_sound[prev_end_time:]
    test_sound_clips.append(test_clip)
    
    # save bleeped audio
    bleeped_test_clip = sum(test_sound_clips)
    bleeped_test_clip.export(final_audio_path, format="mp3") 
    
    # load in og video, overlay with bleeped audio
    og_video = VideoFileClip(og_video_path)
    bleep_audio = AudioFileClip(final_audio_path)
    new_audioclip = CompositeAudioClip([bleep_audio])
    og_video.audio = new_audioclip
    og_video.write_videofile(final_video_path,
                             codec='libx264', 
                             audio_codec='aac', 
                             temp_audiofile='temp-audio.m4a', 
                             remove_temp=True)

In [10]:
# define bleep_words
og_video_path = "data/input/bleep_test_1.mp4"
og_audio_path = "data/input/bleep_test_1.mp3"
final_video_path = "data/output/bleep_test_1.mp4"
final_audio_path = "data/output/bleep_test_1.mp3"
    
bleep_words = ["Treetz", "ice", "cream", "chocolate", "syrup", "cookie", "hooked"]
bleep_that_sht(og_video_path, 
               og_audio_path, 
               final_video_path, 
               final_audio_path, 
               bleep_words)

Moviepy - Building video data/output/bleep_test_1.mp4.
MoviePy - Writing audio in temp-audio.m4a


                                                                    

MoviePy - Done.
Moviepy - Writing video data/output/bleep_test_1.mp4



                                                                

Moviepy - Done !
Moviepy - video ready data/output/bleep_test_1.mp4


In [11]:
# define bleep_words
og_video_path = "data/input/bleep_test_1.mp4"
og_audio_path = "data/input/bleep_test_1.mp3"
final_video_path = "data/output/bleep_test_1.mp4"
final_audio_path = "data/output/bleep_test_1.mp3"
    
bleep_words = ["wiring", "stuff", "man"]
bleep_that_sht(og_video_path, 
               og_audio_path, 
               final_video_path, 
               final_audio_path, 
               bleep_words)

ValueError: error extracting audio from video data/input/test_1.mp4, exception: MoviePy error: the file data/input/test_1.mp4 could not be found!
Please check that you entered the correct path.

In [None]:
# define bleep_words
og_video_path = "data/input/test_1.mp4"
og_audio_path = "data/input/test_1.mp3"
final_video_path = "data/output/bleep_test_1.mp4"
final_audio_path = "data/output/bleep_test_1.mp3"
    
bleep_words = ["wiring"]
bleep_that_sht(og_video_path, 
               og_audio_path, 
               final_video_path, 
               final_audio_path, 
               bleep_words)

Moviepy - Building video bleep_test_1.mp4.
MoviePy - Writing audio in temp-audio.m4a


                                                                    

MoviePy - Done.
Moviepy - Writing video bleep_test_1.mp4



                                                               

Moviepy - Done !
Moviepy - video ready bleep_test_1.mp4
