In [1]:
import os
import subprocess
import soundfile as sf
import pyrubberband as pyrb
from pydub import AudioSegment
from pydub import AudioSegment
from pydub.effects import normalize
import math


In [2]:
trackone_filename = "vocals.m4a"
tracktwo_filename = "music.mp3"
output_format = "mp3"

# NOISE REMOVAL
track_one_noise_separator = True
track_two_noise_separator = True

trackone = AudioSegment.from_file(trackone_filename)
tracktwo = AudioSegment.from_file(tracktwo_filename)

In [3]:
raw_data = tracktwo.raw_data
samples = list(tracktwo.get_array_of_samples())
max_sample = max(samples)
min_sample = min(samples)
max_amplitude = max(abs(max_sample), abs(min_sample))
min_amplitude = min(abs(max_sample), abs(min_sample))
max_db = 20 * math.log10(max_amplitude)
min_db = 20 * math.log10(min_amplitude)
print("Maximum volume (dB):", max_db)
print("Minimum volume (dB):", min_db)

Maximum volume (dB): 77.17435229720583
Minimum volume (dB): 75.9231575381383


In [4]:
reverb_trackone = True
reverb_tracktwo = True

In [5]:
# Reverb the track
if reverb_trackone:
    # Prepare edited audio file with new filename
    cmd = [
        "ffmpeg",
        "-y",
        "-i",
       trackone_filename,
        "-af",
        "aecho=0.8:0.9:1000|500:0.3|0.2",
        "edited_" + os.path.splitext(trackone_filename)[0] + '.wav'
    ]
    # Prepare edited audio file with new filename
    filename = os.path.splitext(trackone_filename)[0]
    filename = "edited_" + filename + '.wav'
    try:
        subprocess.run(cmd, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print("Command returned non-zero exit status", e.returncode)
        print("Error output:\n", e.stderr)
    else:
        print("Command executed successfully.")

if reverb_tracktwo:
    # Prepare edited audio file with new filename
    cmd = [
        "ffmpeg",
        "-y",
        "-i",
        tracktwo_filename,
        "-af",
        "aecho=0.8:0.9:1000|500:0.3|0.2",
        "edited_" + os.path.splitext(tracktwo_filename)[0] + '.wav'
    ]
    # Prepare edited audio file with new filename
    filename = os.path.splitext(tracktwo_filename)[0]
    filename = "edited_" + filename + '.wav'
    try:
        subprocess.run(cmd, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print("Command returned non-zero exit status", e.returncode)
        print("Error output:\n", e.stderr)
    else:
        print("Command executed successfully.")

Command returned non-zero exit status 1
Error output:
 ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 14.0.0 (clang-1400.0.29.202)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/6.0 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --en

In [18]:
if track_one_noise_separator:
    cmd = ["python3", "-m", "spleeter", "separate", trackone_filename, "-o", "./", "-f", "{filename}_{instrument}.{codec}"]

    try:
        subprocess.run(cmd, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print("Command returned non-zero exit status", e.returncode)
        print("Error output:\n", e.stderr)
    else:
        print("Command executed successfully.")

if track_two_noise_separator:
    cmd = ["python3", "-m", "spleeter", "separate", tracktwo_filename, "-o", "./", "-f", "{filename}_{instrument}.{codec}"]

    try:
        subprocess.run(cmd, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print("Command returned non-zero exit status", e.returncode)
        print("Error output:\n", e.stderr)
    else:
        print("Command executed successfully.")

Command executed successfully.
Command executed successfully.


In [19]:
# SLICING OR TRIM TRACK
slice_trackone = False
slice_tracktwo = False

slice_start_track_one = 0
slice_end_track_one = 10
slice_start_track_two = 0
slice_end_track_two = 10

if track_one_noise_separator:
    trackone_filename = "vocals_vocals.wav"

if track_one_noise_separator:
    tracktwo_filename = "music_accompaniment.wav"

# Extract edited audio properties using PyDub
trackone = AudioSegment.from_file(trackone_filename)
tracktwo = AudioSegment.from_file(tracktwo_filename)

In [20]:
if slice_trackone:
    # Slicing and trimming
    slice_start = float(slice_start_track_one) * 1000
    slice_end = float(slice_end_track_one) * 1000
    trackone = trackone[slice_start:slice_end]

    # Export the edited audio file
    trackone.export(trackone_filename, format='wav')

if slice_tracktwo:
    # Slicing and trimming
    slice_start = float(slice_start_track_two) * 1000
    slice_end = float(slice_end_track_two) * 1000
    tracktwo = tracktwo[slice_start:slice_end]

    # Export the edited audio file
    tracktwo.export(tracktwo_filename, format='wav')

In [21]:
# CHANGING SAMPLE RATE AND CHANNEL
change_trackone_sample_rate = True
change_tracktwo_sample_rate = True

trackone_sample_rate = 44100
tracktwo_sample_rate = 44100

trackone_channel = 1
tracktwo_channel = 1

# Extract edited audio properties using PyDub
trackone = AudioSegment.from_file(trackone_filename)
tracktwo = AudioSegment.from_file(tracktwo_filename)

In [22]:
if change_trackone_sample_rate:
    trackone = trackone.set_frame_rate(int(trackone_sample_rate))
    trackone = trackone.set_channels(int(trackone_channel))

    # Export the edited audio file
    trackone.export(trackone_filename, format='wav')

if change_tracktwo_sample_rate:
    tracktwo = tracktwo.set_frame_rate(int(tracktwo_sample_rate))
    tracktwo = tracktwo.set_channels(int(tracktwo_channel))

    # Export the edited audio file  
    tracktwo.export(tracktwo_filename, format='wav')

In [23]:
change_track_one_tempo = True
change_track_two_tempo = True

track_one_tempo = 1
track_two_tempo = 1

In [24]:
if change_track_one_tempo:
    data, samplerate = sf.read(trackone_filename)

    # Apply time stretching to adjust the tempo without affecting pitch
    audio = pyrb.time_stretch(data, samplerate, float(track_one_tempo))

    # Export the edited audio file
    sf.write(trackone_filename, audio, samplerate, format='wav')


if change_track_two_tempo:
    data, samplerate = sf.read(tracktwo_filename)

    # Apply time stretching to adjust the tempo without affecting pitch
    audio = pyrb.time_stretch(data, samplerate, float(track_two_tempo))

    # Export the edited audio file
    sf.write(tracktwo_filename, audio, samplerate, format='wav')

In [25]:
change_track_one_pitch = True
change_track_two_pitch = True

track_one_pitch = 1
track_two_pitch = 1

# Extract edited audio properties using PyDub
trackone = AudioSegment.from_file(trackone_filename)
tracktwo = AudioSegment.from_file(tracktwo_filename)

In [26]:
if change_track_one_pitch:
    data, samplerate = sf.read(trackone_filename)

    # Apply time stretching to adjust the pitch without affecting tempo
    audio = pyrb.pitch_shift(data, samplerate, float(track_one_pitch))

    # Export the edited audio file
    sf.write(trackone_filename, audio, samplerate, format='wav')

if change_track_two_pitch:
    data, samplerate = sf.read(tracktwo_filename)

    # Apply time stretching to adjust the pitch without affecting tempo
    audio = pyrb.pitch_shift(data, samplerate, float(track_two_pitch))

    # Export the edited audio file
    sf.write(tracktwo_filename, audio, samplerate, format='wav')

In [29]:
concatenation_format = "overlay" # overlay and append
concatenation_loop_track = 2

change_trackone_volume = True
change_tracktwo_volume = True
trackone_volume = 0
tracktwo_volume = 0


normalize_trackone = True
normalize_tracktwo = True
normalize_trackone_headroom = -12
normalize_tracktwo_headroom = -12

fade_in_duration_trackone = 5000
fade_out_duration_trackone = 5000
fade_in_duration_tracktwo = 5000
fade_out_duration_tracktwo = 5000

# Extract edited audio properties using PyDub
trackone = AudioSegment.from_file(trackone_filename)
tracktwo = AudioSegment.from_file(tracktwo_filename)

panning_value = 0
enable_track_filter = False
track_audio_filters_pass = "low-pass" #low-pass, high-pass, band-pass
track_low_pass = 0
track_high_pass = 0
track_band_pass_low = 0
track_band_pass_high = 0

crossfade = True
crossfade_duration = 0

In [30]:
if concatenation_format == "overlay":
    if concatenation_loop_track == '1':
        # Calculate the number of repetitions required
        repetitions = int(tracktwo.duration_seconds / trackone.duration_seconds) + 1
        # Create the looped version of the shorter track
        trackone = trackone * repetitions
        # Overlay the section on top of the base audio

        if change_trackone_volume:
            trackone = trackone + trackone_volume

        if change_tracktwo_volume:
            tracktwo = tracktwo + tracktwo_volume

        # Normalize the track
        if normalize_trackone:
            trackone = normalize(trackone, headroom=normalize_trackone_headroom)

        # Normalize the track
        if normalize_tracktwo:
            tracktwo = normalize(tracktwo, headroom=normalize_tracktwo_headroom)

        combined_audio = tracktwo.overlay(trackone.fade_in(fade_in_duration_trackone).fade_out(fade_out_duration_trackone))

    elif concatenation_loop_track == '2':
        # Create a silent audio segment with the desired duration
        silent_segment = AudioSegment.silent(duration=3000)

        trackone = silent_segment + trackone + silent_segment

        # Calculate the number of repetitions required
        repetitions = int(trackone.duration_seconds / tracktwo.duration_seconds) + 1

        # Create the looped version of the shorter track
        tracktwo = tracktwo * repetitions

        if change_trackone_volume:
            trackone = trackone + trackone_volume

        if change_tracktwo_volume:
            tracktwo = tracktwo + tracktwo_volume

        # Normalize the track
        if normalize_trackone:
            trackone = normalize(trackone, headroom=normalize_trackone_headroom)

        # Normalize the track
        if normalize_tracktwo:
            tracktwo = normalize(tracktwo, headroom=normalize_tracktwo_headroom)

        # Overlay the section on top of the base audio
        combined_audio = trackone.overlay(tracktwo.fade_in(fade_in_duration_tracktwo).fade_out(fade_out_duration_tracktwo))

    else:
        if change_trackone_volume:
            trackone = trackone + trackone_volume

        if change_tracktwo_volume:
            tracktwo = tracktwo + tracktwo_volume

        # Normalize the track
        if normalize_trackone:
            trackone = normalize(trackone, headroom=normalize_trackone_headroom)

        # Normalize the track
        if normalize_tracktwo:
            tracktwo = normalize(tracktwo, headroom=normalize_tracktwo_headroom)

        combined_audio = trackone.overlay(tracktwo)

    # Apply the panning effect
    combined_audio = combined_audio.pan(panning_value)

    if enable_track_filter:
        if track_audio_filters_pass == "low-pass":                    
            lowpass_cutoff = int(track_low_pass)
            combined_audio = combined_audio.low_pass_filter(lowpass_cutoff)
        elif track_audio_filters_pass == "high-pass":                                    
            highpass_cutoff = int(track_high_pass)
            combined_audio = combined_audio.high_pass_filter(highpass_cutoff)
        elif track_audio_filters_pass == "band-pass":                                    
            bandpass_lowcut = int(track_band_pass_low)
            bandpass_highcut = int(track_band_pass_high)
            combined_audio = combined_audio.low_pass_filter(bandpass_highcut).high_pass_filter(bandpass_lowcut)

    # Export the appended audio to a new file
    combined_audio.export("final_audio." + output_format, format=output_format)            
else:
    if change_trackone_volume:
        trackone = trackone + trackone_volume

    if change_tracktwo_volume:
        tracktwo = tracktwo + tracktwo_volume

    # Normalize the track
    if normalize_trackone:
        trackone = normalize(trackone, headroom=normalize_trackone_headroom)

    # Normalize the track
    if normalize_tracktwo:
        tracktwo = normalize(tracktwo, headroom=normalize_tracktwo_headroom)

    # Append the audio files
    if crossfade:
        # Extract the overlapping portions for crossfading
        overlap1 = trackone[-crossfade_duration:]
        overlap2 = tracktwo[:crossfade_duration]

        # Apply crossfading by manipulating volume levels
        fade_in = overlap2.fade(to_gain=-120, start=0, duration=crossfade_duration)
        fade_out = overlap1.fade(from_gain=-120, start=0, duration=crossfade_duration)

        # Crossfade the audio segments
        appended_audio = trackone[:-crossfade_duration] + fade_out + fade_in + tracktwo[crossfade_duration:]
    else:
        appended_audio = trackone + tracktwo

    # Apply the panning effect
    appended_audio = appended_audio.pan(panning_value)

    if enable_track_filter:
        if track_audio_filters_pass == "low-pass":                    
            lowpass_cutoff = int(track_low_pass)
            appended_audio = appended_audio.low_pass_filter(lowpass_cutoff)
        elif track_audio_filters_pass == "high-pass":                                    
            highpass_cutoff = int(track_high_pass)
            appended_audio = appended_audio.high_pass_filter(highpass_cutoff)
        elif track_audio_filters_pass == "band-pass":                                    
            bandpass_lowcut = int(track_band_pass_low)
            bandpass_highcut = int(track_band_pass_high)
            appended_audio = appended_audio.low_pass_filter(bandpass_highcut).high_pass_filter(bandpass_lowcut)

    # Export the appended audio to a new file
    appended_audio.export("final_audio." + output_format, format=output_format)            