<a href="https://colab.research.google.com/github/utkar22/CSE508_Winter2023_Group2_Project/blob/main/Censoring/VOSK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip3 install vosk
!pip install pydub
!apt install ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vosk
  Downloading vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
Collecting websockets
  Downloading websockets-11.0.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.7/129.7 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting srt
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: srt
  Building wheel for srt (setup.py) ... [?25l[?25hdone
  Created wheel for srt: filename=srt-3.5.3-py3-none-any.whl size=22445 sha256=83c7afb1aaa87e145c6439993cad7475c469e3a8d8e8b76adbca6d0aad6f7cdd
  Stored in directory: /root/.cache/p

In [2]:
import pandas as pd
import io

from vosk import Model, KaldiRecognizer, SetLogLevel
from pydub import AudioSegment

import wave
from scipy.io import wavfile

from pydub.silence import split_on_silence

In [3]:
import nltk
nltk.download('punkt')  # Download the Punkt tokenizer data
nltk.download('averaged_perceptron_tagger')  # Download the POS tagger data
nltk.download('porter_test')  # Download the Porter stemmer data

from nltk.stem import PorterStemmer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package porter_test to /root/nltk_data...
[nltk_data]   Unzipping stemmers/porter_test.zip.


In [4]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/IR_Project

Mounted at /content/drive
/content/drive/MyDrive/IR_Project


In [5]:
def mp3_to_wav(mp3_audio):
    wav_audio = io.BytesIO()
    mp3_audio.export(wav_audio, format="wav")
    return wav_audio

In [6]:
def remove_punct(word):
    new_word = ""
    for i in word:
        if i.isalpha():
            new_word+=i
    return new_word

In [7]:
stemmer = PorterStemmer()

In [8]:
def clean_word(word):
    word = word.lower()
    word = remove_punct(word)
    word = stemmer.stem(word)
    return word

In [9]:
def get_se_dict(se_df):
    se_dict = {}
    for index, row in se_df.iterrows():
        word = row[2]
        word = clean_word(word)

        if word not in se_dict:
            se_dict[word] = []
        
        curr = [row[0]*1000,row[1]*1000]
        se_dict[word].append(curr)
    return se_dict

In [10]:
def generate_se_dict(path_to_csv):
    se_df = pd.read_csv(path_to_csv, header = None)
    se_dict = get_se_dict(se_df)
    return se_dict, se_df

In [26]:
def find_phrase(removal_word, se_df):
    timestamps = []
    phrase = removal_word.split()
    i = 0

    for index, row in se_df.iterrows():
        word = row[2]

        if clean_word(phrase[i]) == clean_word(word):
            if i == 0:
                start = row[0]*1000
            i+=1
            if i == len(phrase):
                end = row[1]*1000
                curr = [start,end]
                timestamps.append(curr)
                i=0
        else:
            i = 0
    return timestamps

In [27]:
def add_bleep(wav_audio, removal_word, se_dict, se_df):
    if len(removal_word.split())==1:
        removal_word = clean_word(removal_word)
        timestamps = se_dict[removal_word]
    else:
        timestamps = find_phrase(removal_word, se_df)

    bleep_volume = 30

    for i in timestamps:
        end = i[1]
        start = i[0]
        
        duration = (end - start)
        
        # Create a bleep sound segment with the same duration as the word segment and apply gain
        bleep_sound = AudioSegment.silent(duration=duration).fade_in(50).fade_out(50)
        bleep_sound = bleep_sound.apply_gain(bleep_volume)
        
        # Overlay the bleep sound over the word segment
        wav_audio = wav_audio.overlay(bleep_sound, position=start, loop=False, gain_during_overlay=-30)

    return wav_audio

In [13]:
def get_wav_audio(path_to_audio):
    if path_to_audio[-4:] == ".wav":
        audio = AudioSegment.from_wav(path_to_audio)
        return audio
    elif path_to_audio[-4:] == ".mp3":
        audio = AudioSegment.from_mp3(path_to_audio)
        audio_buf = mp3_to_wav(audio)
        audio = AudioSegment.from_wav(audio_buf)
        return audio
    return audio

In [14]:
def run(path_to_audio, path_to_csv, bleeped_word):
    se_dict,se_df = generate_se_dict(path_to_csv)
    wav_audio = get_wav_audio(path_to_audio)

    new_audio = add_bleep(wav_audio, bleeped_word, se_dict, se_df)
    new_audio.export(f"{path_to_audio[:-4]}_bleeped.wav", format="wav")

    return new_audio

In [31]:
path_to_audio = "new_audio.mp3"
path_to_csv = "timestamp.csv"

bleeped_word = "gunfire"

new_audio = run(path_to_audio, path_to_csv, bleeped_word)

In [32]:
new_audio