In [1]:
from pydub import AudioSegment
from pydub.effects import normalize
from pydub.effects import compress_dynamic_range
import re
import __future__
import os
from multiprocessing import Pool
import cyrtranslit
import glob
import logging as log
import MailDownloader
from google_classifier import Classifier

In [2]:
# Vars:
SILENCE_AT_BEGINNING_AND_END_MS = 3000
APPLAUSE = "Applause"

In [3]:

output_path = "C:\\Users\\Eitan\\Music\\dev\\Handclap detection"
preferred_codec = 'wav'

classification_results = {}

In [None]:
def create_folder():
    #TODO: change when going back to prod
    folder = "test"
    path = os.path.join(output_path, folder)

    if os.path.exists(path):
        print('Folder exists')
        
    else:
        os.makedirs(path)
        print('Created folder ' + folder)

    os.chdir(path)


def parse_email(mail):
    try:
        url = re.match("(https?://[^\s]+)", mail).group(0)
        url = re.sub("&list=.*", "", url)
        return url
    except:
        pass


def match_target_amplitude(sound, target_dBFS):
    change_in_dBFS = target_dBFS - sound.dBFS
    return sound.apply_gain(change_in_dBFS)


def print_exceptions(exceptions):
    if len(exceptions['WAV']) > 0:
        log.error("### WAV Exceptions ###")
        for i in exceptions['WAV']:
            log.error(i)

    if len(exceptions['Video']) > 0:
        log.error("### Video Exceptions ###")
        for i in exceptions['Video']:
            log.error(i)


def print_found_urls(urls):
    print("1. Found the following URIs, {0} in total:".format(len(urls)))
    for i in urls:
        print(i)

        
def normalize_all_filenames():
    files = get_file_list()
    print("3. Normalizing all file names")
    for file in files:
        normalize_filename(file)


def normalize_filename(file):
    if not file.endswith('.wav'):
        return

    print("3.1 Normalizing {0}".format(file))
    #fix cyrillic characters
    new_name = re.sub('\W+', ' ', cyrtranslit.to_latin(file, "ru"))
    new_name = re.sub(' wav', ".wav", new_name)
    os.replace(file, new_name)
    print("3.1 Replaced. Old file name: {0}, New file name: {1}".format(file, new_name))
    return new_name


def compress_all_files():
    files = get_file_list()
    print(f"4. Compressing all files")
    with Pool() as pool:
        pool.map(compress_file, files)
    print("4. Finished compressing")


def compress_file(file):
    if not file.endswith('.wav'):
        pass
    else:
        print(f'4.1 Compressing: -- {file}')
        raw_sound = AudioSegment.from_file(file)
        compressed_sound = compress_dynamic_range(raw_sound, -20, 6)
        new_file = re.sub("\.wav", "_C.wav", file)
        compressed_sound.export(new_file , format='wav')

        print("Deleting " + file)
        os.remove(file)

        print(f"4.1 Compressed {new_file}")
        return new_file


def normalize_all_audio_files():
    files = get_file_list()
    print("5. Normalizing files...")
    with Pool() as pool:
        pool.map(normalize_audio_file, files)
    print("5. Finished normalizing")


def normalize_audio_file(file):
    if not file.endswith('.wav'):
        pass

    else:
        print(f"5.1 Normalizing {file}")
        raw_sound = AudioSegment.from_file(file)
        normalized_file = normalize(raw_sound, -1)
        new_file = re.sub("\.wav", "N.wav", file)
        normalized_file.export(new_file, format='wav')

#BUG: not deleting N object
        print("Deleting " + file)
        os.remove(file)   
        print(f"5.1 Normalized {new_file}")     

        return new_file


def add_ms_of_silence_to_all_files():
    files = get_file_list()
    print("6. Add silence to files")
    with Pool() as pool:
        pool.map(add_silence_to_file, files)
    print("6. Finished adding silence ")


def add_silence_to_file(file):
    print(f"6.1 Adding silence to file {file}")
    raw_sound = AudioSegment.from_file(file)
    sample = raw_sound[:SILENCE_AT_BEGINNING_AND_END_MS]
    sample = sample - 70
    output = sample + raw_sound + sample
    new_file = re.sub("\.wav", "S.wav", file)
    output.export(new_file, format='wav')

    print("Deleting " + file)
    os.remove(file)

    print(f"6.1 Added silence to {new_file}")
    return new_file


def read_email():
    email_content = []
    mail = """
Peter Heidrich - Happy Birthday Variations (excerpts)
https://www.youtube.com/watch?v=BmA0bbZSDN4 from 1.07 cut applause in the end please
Gidon Kremer, violin. Vadim Repin, Misha Maisky etc (10-12)
Tchaikovsky - Waltz from Sleeping Beauty ----- Чайковский - Вальс из балета "Спящая красавица"
https://www.youtube.com/watch?v=Gb8Xtix7jc8 4.47
Handel, Birthday Ode for Queen Anne - 5. DUET & CHORUS, "Let rolling streams" (score)
https://www.youtube.com/watch?v=DsXBTMabMaA 2.46
Kathleen Battle & Wynton Marsalis, 'Eternal Source of Light Divine' (G. F. Haendel)          (Ode for the birthday of Queen Anne in 1713).
https://www.youtube.com/watch?v=2MuCCbg0k_0 3.08         и сопрано, и трубач- негры
    """

    for line in mail.split('\n'):
        email_content.append(line)

    return email_content


def parse_all_email_lines(email_content):
    urls = []

    for line in email_content:
        urls.append(parse_email(line))

    return urls


def find_urls_in_email(email_content):
    urls = parse_all_email_lines(email_content)
    urls = list(filter(None, urls))
    return urls


def get_file_list():
    return glob.glob("*.wav")
    

def convert_to_mp3():
    files = get_file_list()
    for file in files:
        audio = AudioSegment.from_wav(file)
        filename = file.replace("wav","mp3")
        audio.export(filename,format='mp3')
    
        print("Deleting " + file)
        os.remove(file)   


def configure_log():
    log.basicConfig(
    level=log.INFO,
    format='%(asctime)s,%(funcName)s,%(levelname)s,%(message)s',
    handlers=[
        log.FileHandler("debug.log"),
        log.StreamHandler()
        ]
    )

def classify_all_audio_files_if_needed():
    files = get_file_list()
    log.info("7. CLASSIFYING FILES")
    for i in range(len(files)):
        results = classify_single_audio_file(files[i])
        classification_results[files[i]] = results
    log.info("7. END CLASSIFYING")
    log_classification_results()


def classify_single_audio_file(audio_file):
    #log.info(f"Got {audio_file}")
    classifier = Classifier(audio_file)
    return classifier.identify()


def log_classification_results():
    for filename,results in classification_results.items():
        log.info("""Timestamps classification results for {0}:
                    Applause: {1}
                    Silence: {2}
                    Speech: {3}""".format(filename, results["Applause"], results["Silence"], results["Speech"]))



In [None]:
email_content = read_email()
urls = find_urls_in_email(email_content)
print_found_urls(urls)

1. Found the following URIs, 4 in total:
https://www.youtube.com/watch?v=BmA0bbZSDN4
https://www.youtube.com/watch?v=Gb8Xtix7jc8
https://www.youtube.com/watch?v=DsXBTMabMaA
https://www.youtube.com/watch?v=2MuCCbg0k_0


In [None]:
# Order of methods for url:
def apply_logic_to_file(file):
    filename = normalize_filename(file)
    filename = compress_file(filename)
    filename = normalize_audio_file(filename)
    filename = add_silence_to_file(filename)

In [7]:
os.chdir('parallel')
MailDownloader.download_all_uris(urls, log)
files = get_file_list()
for file in files:
    with Pool() as pool:
        pool.map(apply_logic_to_file, files)

classify_all_audio_files_if_needed()
convert_to_mp3()

[youtube] Extracting URL: https://www.youtube.com/watch?v=BmA0bbZSDN4
[youtube] BmA0bbZSDN4: Downloading webpage
[youtube] BmA0bbZSDN4: Downloading android player API JSON
[info] BmA0bbZSDN4: Downloading 1 format(s): 251
[dashsegments] Total fragments: 1
[download] Destination: Peter Heidrich -  Happy Birthday Variations (excerpts) [BmA0bbZSDN4].webm
[download] 100% of    7.90MiB in 00:00:00 at 31.88MiB/s              
[ExtractAudio] Destination: Peter Heidrich -  Happy Birthday Variations (excerpts) [BmA0bbZSDN4].wav
Deleting original file Peter Heidrich -  Happy Birthday Variations (excerpts) [BmA0bbZSDN4].webm (pass -k to keep)
[youtube] Extracting URL: https://www.youtube.com/watch?v=Gb8Xtix7jc8
[youtube] Gb8Xtix7jc8: Downloading webpage
[youtube] Gb8Xtix7jc8: Downloading android player API JSON
[info] Gb8Xtix7jc8: Downloading 1 format(s): 251
[dashsegments] Total fragments: 1
[download] Destination: Tchaikovsky - Waltz from Sleeping Beauty ----- Чайковский - Вальс из балета ＂Спяща