<a href="https://colab.research.google.com/github/danielraffel/dodgylegally/blob/main/dodgylegally.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# script hacked together by Daniel Raffel based on original work here https://x.com/ColugoMusic/status/1726001266180956440?s=20

# Install the required packages
!pip install pydub yt-dlp glob2 nltk

# Import necessary libraries
from google.colab import drive
import os
import nltk
from pydub import AudioSegment
import random

# Connect to Google Drive
drive.mount('/content/drive')

# Base directory in Google Drive (modify this path as needed)
base_dir = '/content/drive/My Drive/dodgylegally'

# Check if base directory exists, if not, create it
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Change to your project directory in Google Drive
os.chdir(base_dir)

# Paths for the folders
loop_dir = os.path.join(base_dir, 'wavs/processed/loop')
oneshot_dir = os.path.join(base_dir, 'wavs/processed/oneshot')
raw_dir = os.path.join(base_dir, 'wavs/raw')

# Create the required folders if they don't exist
os.makedirs(loop_dir, exist_ok=True)
os.makedirs(oneshot_dir, exist_ok=True)
os.makedirs(raw_dir, exist_ok=True)

# Check if 'birdwater.txt' exists, if not create it and populate with random words
word_list_file = os.path.join(base_dir, 'birdwater.txt')
if not os.path.exists(word_list_file):
    nltk.download('words')
    from nltk.corpus import words
    word_list = random.sample(words.words(), 200)
    with open(word_list_file, 'w') as file:
        for word in word_list:
            file.write(word + '\n')

# ColugoMusic script with a slight modification for BATCH_SIZE
from pydub import AudioSegment, effects
from yt_dlp import YoutubeDL
import glob
import random

# Modify BATCH_SIZE based on user input
BATCH_SIZE = int(input("Enter the number of samples (default 10): ") or "10")
MAX_SEARCH_RESULTS = 10
DOWNLOAD_DIR = 'wavs/raw'
LOOP_OUTPUT_DIR = 'wavs/processed/loop'
ONESHOT_OUTPUT_DIR = 'wavs/processed/oneshot'
WORD_LIST = 'birdwater.txt'

def read_lines(file):
  return open(file).read().splitlines()

class download_range_func:
  def __init__(self):
    pass
  def __call__(self, info_dict, ydl):
    timestamp = self.make_timestamp(info_dict)
    yield {
        'start_time': timestamp,
        'end_time': timestamp,
    }
  @staticmethod
  def make_timestamp(info):
      duration = info['duration']
      if duration is None:
        return 0
      return duration/2

def make_random_search_phrase(word_list):
  words = random.sample(word_list, 2)
  phrase = ' '.join(words)
  print('Search phrase: "{}"'.format(phrase))
  return phrase

def make_download_options(phrase):
  safe_phrase = ''.join(x for x in phrase if x.isalnum() or x in "._- ")
  return {
    'format': 'bestaudio/best',
    'paths': {'home': DOWNLOAD_DIR},
    'outtmpl': {'default': f'{safe_phrase}-%(id)s.%(ext)s'},
    'download_ranges': download_range_func(),
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'wav',
    }]
  }

def make_oneshot(sound, phrase, output_filepath):
  final_length = min(2000, len(sound))
  quarter = int(final_length/4)
  sound   = sound[:final_length]
  sound   = sound.fade_out(duration=quarter)
  sound   = effects.normalize(sound)
  sound.export(output_filepath, format="wav")

def make_loop(sound, phrase, output_filepath):
    final_length = min(2000, len(sound))
    half         = int(final_length/2)
    fade_length  = int(final_length/4)
    beg   = sound[:half]
    end   = sound[half:]
    end   = end[:fade_length]
    beg   = beg.fade_in(duration=fade_length)
    end   = end.fade_out(duration=fade_length)
    sound = beg.overlay(end)
    sound = effects.normalize(sound)
    sound.export(output_filepath, format="wav")

def process_file(filepath, phrase):
  try:
    safe_phrase = ''.join(x for x in phrase if x.isalnum() or x in "._- ")
    filename                 = os.path.basename(filepath)
    output_filepath_oneshot  = os.path.join(ONESHOT_OUTPUT_DIR, f'oneshot_{safe_phrase}-{filename}')
    output_filepath_loop     = os.path.join(LOOP_OUTPUT_DIR, f'loop_{safe_phrase}-{filename}')

    sound = AudioSegment.from_file(filepath, "wav")
    if (len(sound) > 500):
      if not os.path.exists(output_filepath_oneshot):
        make_oneshot(sound, phrase, output_filepath_oneshot)
      if not os.path.exists(output_filepath_loop):
        make_loop(sound, phrase, output_filepath_loop)
    os.remove(filepath)
  except Exception as err:
    print("Failed to process '{}' ({})".format(filepath, err))

def create_combined_loop():
    combined_dir = os.path.join(base_dir, 'wavs/processed/combined')
    os.makedirs(combined_dir, exist_ok=True)  # Create the combined directory if it doesn't exist

    combined_loop = AudioSegment.silent(duration=100)  # A short silence to start
    for filepath in glob.glob(os.path.join(LOOP_OUTPUT_DIR, '*.wav')):
        sound = AudioSegment.from_file(filepath, format="wav")
        repeated_sound = sound * random.randint(3, 4)  # Repeat 3 or 4 times
        combined_loop += repeated_sound

    version = 1
    output_filename = "combined_loop_v{}.wav".format(version)
    output_filepath = os.path.join(combined_dir, output_filename)

    # Check if file already exists, and increment version number if it does
    while os.path.exists(output_filepath):
        version += 1
        output_filename = "combined_loop_v{}.wav".format(version)
        output_filepath = os.path.join(combined_dir, output_filename)

    combined_loop.export(output_filepath, format="wav")

def setup():
    if not os.path.exists(LOOP_OUTPUT_DIR):
      os.makedirs(LOOP_OUTPUT_DIR)
    if not os.path.exists(ONESHOT_OUTPUT_DIR):
      os.makedirs(ONESHOT_OUTPUT_DIR)

def main():
    try:
        setup()
        word_list = read_lines(WORD_LIST)
        for _ in range(BATCH_SIZE):
            phrase    = make_random_search_phrase(word_list)
            video_url = f'ytsearch1:"{phrase}"'
            options   = make_download_options(phrase)
            YoutubeDL(options).download([video_url])
            for filepath in glob.glob(os.path.join(DOWNLOAD_DIR, f'{phrase}-*.wav')):
                process_file(filepath, phrase)

        create_combined_loop()  # Call this function to create the combined loop

    except Exception as err:
        print('FATAL ERROR: {}'.format(err))

# Run the main function
if __name__ == '__main__':
    main()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Enter the number of samples (default 10): 20
Search phrase: "Vermiformia woman"
[youtube:search] Extracting URL: ytsearch1:"Vermiformia woman"
[download] Downloading playlist: "Vermiformia woman"
[youtube:search] query ""Vermiformia woman"": Downloading web client config
[youtube:search] query ""Vermiformia woman"" page 1: Downloading API JSON
[youtube:search] Playlist "Vermiformia woman": Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=LSeASpvgmwE
[youtube] LSeASpvgmwE: Downloading webpage
[youtube] LSeASpvgmwE: Downloading ios player API JSON
[youtube] LSeASpvgmwE: Downloading android player API JSON
[youtube] LSeASpvgmwE: Downloading m3u8 information
[info] LSeASpvgmwE: Downloading 1 format(s): 251
[info] LSeASpvgmwE: Downloading 1 time ranges: 749.5-749.5
[download] Destination: wavs/ra