In [1]:
import glob
import importlib
import logging
import os
import subprocess
import types

import pandas as pd

from soundclash.preprocessing import detect_silences, generate_split_commands, analyze_audio_chunks

def find_package_location(package_name_or_module: str | types.ModuleType) -> str:
    """
    Find the location of a package, whether it's already imported or not.

    This function takes either a package name as a string or an already imported
    module object and returns the file path of the package.

    Parameters
    ----------
    package_name_or_module : Union[str, types.ModuleType]
        Either a string representing the package name or an imported module object.

    Returns
    -------
    str
        The file path of the package or an explanatory message if the package
        cannot be found or imported.

    Raises
    ------
    TypeError
        If the input is neither a string nor a module object.

    Examples
    --------
    >>> find_package_location('numpy')
    '/path/to/site-packages/numpy'
    >>> import pandas
    >>> find_package_location(pandas)
    '/path/to/site-packages/pandas'

    Notes
    -----
    The function assumes that the package is installed in a standard location
    within site-packages. It may not work correctly for packages installed in
    non-standard locations or for built-in modules.
    """

    package: types.ModuleType

    if isinstance(package_name_or_module, str):
        # If it's a string, treat it as a package name and try to import
        try:
            package = importlib.import_module(package_name_or_module)
        except ImportError:
            return f"Package '{package_name_or_module}' is not installed or cannot be imported."
    elif isinstance(package_name_or_module, types.ModuleType):
        # If it's already a module object, use it directly
        package = package_name_or_module
    else:
        raise TypeError("Argument must be a string (package name) or a module object.")
    
    # Get the file path of the package
    try:
        # Split the path and remove the last two components (usually 'site-packages' and the package name)
        package_path: str = "/".join(package.__file__.split("/")[:-3])
        return package_path
    except AttributeError:
        # Handle cases where __file__ is not available (e.g., for built-in modules)
        return f"Unable to determine the location of '{package.__name__}'. It might be a built-in module."
    
def run_command(command: str) -> None:
    """
    Execute an ffmpeg command and log the result.

    Parameters
    ----------
    command : str
        The ffmpeg command to execute.

    Returns
    -------
    None

    Notes
    -----
    This function logs the execution result or any errors that occur during execution.
    """
    try:
        process: subprocess.Popen = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout: str
        stderr: str
        stdout, stderr = process.communicate()
        if process.returncode != 0:
            logging.error(f"Error executing command: {command}")
            logging.error(f"stderr: {stderr}")
        else:
            logging.info(f"Successfully executed: {command}")
    except Exception as e:
        logging.error(f"Exception occurred while executing command: {command}")
        logging.error(str(e))

In [2]:
host_dir: str = find_package_location("soundclash")
host_dir

'/home/asabaal/asabaal_ventures/repos/soundclash'

In [3]:
#I'm going to see if I can find the silences in the audio files so I can get song lengths.
data_files: str = sorted(glob.glob(os.path.join(host_dir, "data", "*.mp3")))
data_files

['/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings2.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings3.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings4.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings5.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings6.mp3',
 '/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings7.mp3']

In [4]:
silences_1 = detect_silences(data_files[0], silence_thresh = -40, min_silence_len=1.5)

In [5]:
output_dir = os.path.join(host_dir,"data/detected_audio_1")

In [6]:
output_dir

'/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1'

In [7]:
split_commands = generate_split_commands(data_files[0], silences_1, output_dir)

In [8]:
# Remove files if already exist
import glob
for file in glob.glob(os.path.join(output_dir, "*")):
    os.remove(file)

# Execute commands
split_commands = [cmd for cmd in split_commands if "-to" in cmd]
for i, command in enumerate(split_commands):
    print(i, command)
    run_command(command)

0 ffmpeg -i "/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3" -ss 3.310 -to 254.752 -c copy "/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1/chunk_000.mp3"
1 ffmpeg -i "/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3" -ss 257.329 -to 621.528 -c copy "/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1/chunk_001.mp3"
2 ffmpeg -i "/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3" -ss 624.226 -to 779.584 -c copy "/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1/chunk_002.mp3"
3 ffmpeg -i "/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3" -ss 783.728 -to 1388.940 -c copy "/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1/chunk_003.mp3"
4 ffmpeg -i "/home/asabaal/asabaal_ventures/repos/soundclash/data/SoundclashRecordings1.mp3" -ss 1393.580 -to 1603.310 -c copy "/home/asabaal/asabaal_venture

In [9]:
df = pd.read_csv(os.path.join(host_dir, "data", "SoundclashRemixContestWinners.csv"))

In [10]:
df

Unnamed: 0,User,Track Length,Track Name,Prompt
0,I+AI,4:14,Sound the Violin,"chanber ensemble House,female voices, voilins,..."
1,Hotpot of Genres,3:29,I hate this Tune!,"Orchestra, Violin, Cello, Piano, Goa, Hardstyl..."
2,Stei Camel,2:39,Raptor Siren,"Velocirapter Thirst, Psychotic, [Gothic, Baroq..."
3,,2:34,,"Miku voice, Vocaloid, complex electroswing, ee..."
4,Sebastiaan VW,10:09,(Start 1:14 SoundWash),"Slow melancholy waves crashing, cold, organ gl..."
...,...,...,...,...
494,Coda,3:44,I'm Alive,"Dark atmospheric metal with slow, depressive r..."
495,EchoingPercussionists0703,3:03,,"Dance, Electronic, house,EDM, Energetic Beat, ..."
496,MrmediamanX,1:59,Soundclash of the Gods,"Glitch hop, Turntablism, bounce, dubstep"
497,FutureSounds243,5:29,,"House, tech, Cachengue"


In [11]:
chunks_df = analyze_audio_chunks(os.path.join(host_dir, "data/detected_audio_1"))

Error processing file chunk_024.mp3: Command '['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', '/home/asabaal/asabaal_ventures/repos/soundclash/data/detected_audio_1/chunk_024.mp3']' returned non-zero exit status 1.


In [12]:
chunks_df

Unnamed: 0,chunk_number,duration,bit_rate,sample_rate,channels
0,0,251.454694,128014,44100,2
1,1,364.199184,128010,44100,2
2,2,155.350204,128023,44100,2
3,3,605.204898,128006,44100,2
4,4,209.737143,128017,44100,2
5,5,374.543673,128009,44100,2
6,6,134.321633,128027,44100,2
7,7,246.230204,128015,44100,2
8,8,103.993469,128035,44100,2
9,9,226.115918,128016,44100,2


In [13]:
df = df.reset_index().rename(columns={"index":"track_number"})

In [14]:
df_1_comp = df.merge(chunks_df, left_on="track_number", right_on="chunk_number").iloc[0:44]

In [15]:
from datetime import timedelta
df_1_comp["duration"] = df_1_comp["duration"].apply(lambda x: timedelta(seconds=x))

In [16]:
df_1_comp

Unnamed: 0,track_number,User,Track Length,Track Name,Prompt,chunk_number,duration,bit_rate,sample_rate,channels
0,0,I+AI,4:14,Sound the Violin,"chanber ensemble House,female voices, voilins,...",0,0 days 00:04:11.454694,128014,44100,2
1,1,Hotpot of Genres,3:29,I hate this Tune!,"Orchestra, Violin, Cello, Piano, Goa, Hardstyl...",1,0 days 00:06:04.199184,128010,44100,2
2,2,Stei Camel,2:39,Raptor Siren,"Velocirapter Thirst, Psychotic, [Gothic, Baroq...",2,0 days 00:02:35.350204,128023,44100,2
3,3,,2:34,,"Miku voice, Vocaloid, complex electroswing, ee...",3,0 days 00:10:05.204898,128006,44100,2
4,4,Sebastiaan VW,10:09,(Start 1:14 SoundWash),"Slow melancholy waves crashing, cold, organ gl...",4,0 days 00:03:29.737143,128017,44100,2
5,5,Zero Nanashi,3:31,Station,Catchy instrumental inro. [electro swing- witc...,5,0 days 00:06:14.543673,128009,44100,2
6,6,MrWholesome,2:51,Sleep,"Power Metal,Melodic,Epic Metal,Explosive,Drama...",6,0 days 00:02:14.321633,128027,44100,2
7,7,LightJourner,3:23,LJ,rapcrack-glitch[speedphonk candydiscoamor [pos...,7,0 days 00:04:06.230204,128015,44100,2
8,8,Renhaul,2:17,STOP IT!!,"tech-house, rap",8,0 days 00:01:43.993469,128035,44100,2
9,9,Teemuth,5:54,Soundlash Hype,Teemuth,9,0 days 00:03:46.115918,128016,44100,2
