In [32]:
import tarfile
import os
import shutil

from pydub import AudioSegment
from pydub.playback import play
import IPython
import wave
from scipy.io import wavfile
from scipy.signal import resample
import librosa
import soundfile as sf

import numpy as np
import random

In [2]:
data_dir = "/Users/glebmokeev/Rask/data"

### Calculate total length of music/speech

In [3]:
def mean_length(directory):
    """Calculates the number of WAV files and their total length in seconds within a directory.

    Args:
        directory (str): The path to the directory containing WAV files.

    Returns:
        tuple: A tuple containing two elements:
            - int: The total number of WAV files found in the directory.
            - float: The total length of all WAV files in seconds.
    """
    wav_count = 0
    sum_length = 0
    failed_files = 0
    for root, dirs, files in os.walk(directory):
        for filename in files:
            try:
                if filename.endswith(".wav"):
                    wav_count += 1
                    with wave.open(os.path.join(root, filename), 'rb') as wav_file:
                        # Get number of frames and sample rate
                        num_frames = wav_file.getnframes()
                        sample_rate = wav_file.getframerate()
                        # Calculate duration in seconds
                        duration = num_frames / float(sample_rate)
                        sum_length += duration
            except:
                failed_files += 1 
    print(f"Failed to proccess {failed_files}")
    return wav_count, sum_length

##### music/

In [19]:
music_count, music_length = mean_length(os.path.join(data_dir, "music/"))
print(f"Number of wavs: {music_count}\nTotal length: {(round(music_length/60/60, 3))} hours \nMean length {round(music_length/music_count, 3)} seconds")

Failed to proccess 473
Number of wavs: 996
Total length: 41.423 hours 
Mean length 149.721 seconds


##### music/clean

In [20]:
music_count, music_length = mean_length(os.path.join(data_dir, "music/clean"))
print(f"Number of wavs: {music_count}\nTotal length: {(round(music_length/60/60, 3))} hours \nMean length {round(music_length/music_count, 3)} seconds")

Failed to proccess 75
Number of wavs: 200
Total length: 16.294 hours 
Mean length 293.286 seconds


##### vocals

In [5]:
speech_count, speech_length = mean_length(os.path.join(data_dir, "vocals"))
print(f"Number of wavs: {speech_count}\nTotal length: {(round(speech_length/60/60, 3))} hours \nMean length {round(speech_length/speech_count, 3)} seconds")

Failed to proccess 426
Number of wavs: 5689
Total length: 68.984 hours 
Mean length 43.653 seconds


In [22]:
def get_wav_params(wav_file):
  with wave.open(wav_file, 'rb') as wav:
    # Get parameters as a tuple
    params = wav.getparams()

    # Convert parameters to dictionary for easier access
    wav_params = {
        "sample_rate": params[0],
        "channels": params[1],
        "sample_width": params[2],
        "nframes": params[3],
    }

  return wav_params

In [23]:
get_wav_params("/Users/glebmokeev/Rask/data/music/clean/music-hd-0055.wav")

{'sample_rate': 1, 'channels': 2, 'sample_width': 16000, 'nframes': 1991993}

### Resample wavs so they are combinable

In [61]:
def resample_dir(folder_path, target_sr):
    os.makedirs(os.path.join(folder_path, "resampled"))
    for filename in os.listdir(folder_path):
      if filename.endswith(".wav") and not(filename.startswith("._")):
        # Create the full path to the wav file
        filepath = os.path.join(folder_path, filename)
    
        # Load the audio data
        y, sr = librosa.load(filepath)
    
        # Check if resampling is needed
        if sr != target_sr:
          # Resample the audio to the target rate
          y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
    
          new_filename = f"{filename[:-4]}_resampled.wav"  
    
          # Save the resampled audio (overwrite or create new file)
          sf.write(os.path.join(folder_path, "resampled", new_filename), y_resampled, target_sr)
          print(f"Resampled '{filename}' to {target_sr} Hz (saved as '{new_filename}')")
        else:
          print(f"'{filename}' already has sample rate {sr} Hz")

In [62]:
target_sr = 44100
resample_dir("/Users/glebmokeev/Rask/data/music/clean", target_sr)

Resampled 'music-hd-0056.wav' to 44100 Hz (saved as 'music-hd-0056_resampled.wav')
Resampled 'musdb-sample97.wav' to 44100 Hz (saved as 'musdb-sample97_resampled.wav')
Resampled 'musdb-sample83.wav' to 44100 Hz (saved as 'musdb-sample83_resampled.wav')
Resampled 'music-hd-0042.wav' to 44100 Hz (saved as 'music-hd-0042_resampled.wav')
Resampled 'musdb-sample54.wav' to 44100 Hz (saved as 'musdb-sample54_resampled.wav')
Resampled 'musdb-sample40.wav' to 44100 Hz (saved as 'musdb-sample40_resampled.wav')
Resampled 'musdb-sample68.wav' to 44100 Hz (saved as 'musdb-sample68_resampled.wav')
Resampled 'musdb-sample69.wav' to 44100 Hz (saved as 'musdb-sample69_resampled.wav')
Resampled 'musdb-sample41.wav' to 44100 Hz (saved as 'musdb-sample41_resampled.wav')
Resampled 'musdb-sample55.wav' to 44100 Hz (saved as 'musdb-sample55_resampled.wav')
Resampled 'music-hd-0043.wav' to 44100 Hz (saved as 'music-hd-0043_resampled.wav')
Resampled 'musdb-sample82.wav' to 44100 Hz (saved as 'musdb-sample82_re

In [63]:
target_sr = 44100
resample_dir("/Users/glebmokeev/Rask/data/vocals/mixed/audio", target_sr)

Resampled 'speech-librivox-0076.wav' to 44100 Hz (saved as 'speech-librivox-0076_resampled.wav')
Resampled 'speech-librivox-0062.wav' to 44100 Hz (saved as 'speech-librivox-0062_resampled.wav')
Resampled 'speech-librivox-0089.wav' to 44100 Hz (saved as 'speech-librivox-0089_resampled.wav')
Resampled 'speech-librivox-0102.wav' to 44100 Hz (saved as 'speech-librivox-0102_resampled.wav')
Resampled 'speech-librivox-0116.wav' to 44100 Hz (saved as 'speech-librivox-0116_resampled.wav')
Resampled 'speech-librivox-0117.wav' to 44100 Hz (saved as 'speech-librivox-0117_resampled.wav')
Resampled 'speech-librivox-0103.wav' to 44100 Hz (saved as 'speech-librivox-0103_resampled.wav')
Resampled 'speech-librivox-0088.wav' to 44100 Hz (saved as 'speech-librivox-0088_resampled.wav')
Resampled 'speech-librivox-0063.wav' to 44100 Hz (saved as 'speech-librivox-0063_resampled.wav')
Resampled 'speech-librivox-0077.wav' to 44100 Hz (saved as 'speech-librivox-0077_resampled.wav')
Resampled 'speech-librivox-004

### Cut 

In [64]:
def split_wav(input_file, output_dir, fragment_length=10):
  """
  Splits a wav file into 10-second fragments (or specified length) and saves them in the output directory.

  Args:
      input_file (str): Path to the wav file to split.
      output_dir (str): Path to the directory where fragments will be saved.
      fragment_length (int, optional): Length of each fragment in seconds. Defaults to 10.
  """
  with wave.open(input_file, mode="rb") as wav_file:
    # Get file parameters
    num_channels = wav_file.getnchannels()
    sample_width = wav_file.getsampwidth()
    framerate = wav_file.getframerate()

    # Calculate frame count per fragment
    frames_per_fragment = int(framerate * fragment_length)

    # Read frames in chunks
    frames = wav_file.readframes(frames_per_fragment)
    fragment_count = 0

    while frames:
      # Create output filename with format (original_filename}_{fragment_count}.wav
      output_filename = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(input_file))[0]}_{fragment_count}.wav")
      
      # Open output file in write mode
      with wave.open(output_filename, mode="wb") as output_wav:
        output_wav.setnchannels(num_channels)
        output_wav.setsampwidth(sample_width)
        output_wav.setframerate(framerate)
        output_wav.writeframes(frames)

      fragment_count += 1
      frames = wav_file.readframes(frames_per_fragment)

In [65]:
os.makedirs("/Users/glebmokeev/Rask/data/music/clean/frag")

In [67]:
wav_folder = "/Users/glebmokeev/Rask/data/music/clean/resampled"
output_folder = "/Users/glebmokeev/Rask/data/music/clean/frag"

for filename in os.listdir(wav_folder):
  if filename.endswith(".wav"):
    full_path = os.path.join(wav_folder, filename)
    split_wav(full_path, output_folder)

In [68]:
os.makedirs("/Users/glebmokeev/Rask/data/vocals/mixed/audio/frag")

In [69]:
wav_folder = "/Users/glebmokeev/Rask/data/vocals/mixed/audio/resampled"
output_folder = "/Users/glebmokeev/Rask/data/vocals/mixed/audio/frag"

for filename in os.listdir(wav_folder):
  if filename.endswith(".wav"):
    full_path = os.path.join(wav_folder, filename)
    split_wav(full_path, output_folder)

### Calculate new mean length

In [70]:
music_count, music_length = mean_length(os.path.join(data_dir, "music/clean/frag"))
print(f"Mean length {round(music_length/music_count, 3)} seconds")

Failed to proccess 0
Mean length 9.91 seconds


In [71]:
music_count, music_length = mean_length(os.path.join(data_dir, "vocals/mixed/audio/frag"))
print(f"Mean length {round(music_length/music_count, 3)} seconds")

Failed to proccess 0
Mean length 9.89 seconds


### Play sounds using pydub

In [72]:
random_dir = os.path.join(data_dir, "music/clean/frag")
all_files = [f for f in os.listdir(random_dir) if not f.startswith('.')]
chosen_files = random.sample(all_files, 2)
full_paths = [os.path.join(random_dir, f) for f in chosen_files]
print(full_paths)
for path in full_paths:
    sound = AudioSegment.from_wav(path)
    play(sound)

['/Users/glebmokeev/Rask/data/music/clean/frag/music-hd-0074_resampled_15.wav', '/Users/glebmokeev/Rask/data/music/clean/frag/musdb-sample56_resampled_74.wav']


Input #0, wav, from '/var/folders/14/gktzbf5x5_18n56bjls0y7cw0000gn/T/tmpt4y9fium.wav':
  Duration: 00:00:10.00, bitrate: 705 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 1 channels, s16, 705 kb/s
   9.95 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/var/folders/14/gktzbf5x5_18n56bjls0y7cw0000gn/T/tmpgvaxw_n0.wav':
  Duration: 00:00:10.00, bitrate: 705 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 1 channels, s16, 705 kb/s
   9.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




### IPython display audio

In [60]:
IPython.display.Audio("/Users/glebmokeev/Rask/data/music/clean/frag/musdb-sample48_resampled_12.wav")