Data adopted from:

## [FMA: A Dataset For Music Analysis](https://github.com/mdeff/fma)
1. Go through the [paper] to understand what the data is about.
2. Dataset downloaded from <https://github.com/mdeff/fma>.



## Imports

In [None]:
%matplotlib inline

import os
import shutil
import json

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display

import utils

plt.rcParams['figure.figsize'] = (17, 5)

## Load Data

In [None]:
# Directory where mp3 are stored.
AUDIO_DIR = 'data/fma_files/'

# Load metadata and features.
_tracks = utils.load('data/fma_metadata/tracks.csv')
_genres = utils.load('data/fma_metadata/genres.csv')
_features = utils.load('data/fma_metadata/features.csv')
_echonest = utils.load('data/fma_metadata/echonest.csv')

np.testing.assert_array_equal(_features.index, _tracks.index)
assert _echonest.index.isin(_tracks.index).all()

_tracks.shape, _genres.shape, _features.shape, _echonest.shape

## Inpect data

In [None]:
tracks = _tracks[_tracks['set', 'subset'] <= 'small']
print('Shape of data: ',tracks.shape)
print(tracks.iloc[2])
# print(len(tracks.columns))
# print(tracks.columns)

## Prepare data

- Collect audio files into `data/audiofiles` directory
- write metadata describing each audio file in `metadata.json`

In [None]:
class AudioMetadata:
    def __init__(self, file, title=None, artist=None, album=None):
        self.file = file
        self.title = title
        self.artist=artist
        self.album = album

# Get track IDs
track_ids = tracks.index.tolist()

print('Tracks: ',len(track_ids))


# Initialize list for metadata objects
metadata = []

# Generate filenames and other metadata for each track ID
for track_id in track_ids:
    # Assuming utils.get_audio_path() generates the filename
    filepath_ = utils.get_audio_path(AUDIO_DIR, track_id)
    file_ = str(track_id) + '.mp3'
    title_=tracks.loc[track_id, ('track', 'title')]
    artist_=tracks.loc[track_id, ('artist', 'name')]
    album_ = tracks.loc[track_id, ('album', 'title')]
    
    # Create an instance of AudioMetadata with multiple parameters
    metadata_object = AudioMetadata(file=file_,title=title_,artist=artist_,album=album_ )

    # organize audio files in data/audiofiles
    # destination_dir = 'data/audiofiles' # Destination directory
    # os.makedirs(destination_dir, exist_ok=True) # Create the destination directory if it doesn't exist
    # dest_file_path = os.path.join(destination_dir, filename_) # destination path
    # shutil.copy(filepath_, dest_file_path) # copy
    
    # Add the object to the list
    metadata.append(metadata_object)

# Convert the list of objects to a list of dictionaries
filenames_dict_list = [
    {
        'file': 'fma_dataset/'+obj.file,
        'title':obj.title,
        'description':'fma dataset',
        'type':'MUSIC',
        'artist':obj.artist,
        'album':obj.album
    }
    for obj in metadata
]

# Save the list of dictionaries as a JSON file
output_file = 'data/metadata.json'
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(filenames_dict_list, json_file, indent=4, ensure_ascii=False)
print('File written: ',output_file)


## Select random 1000 from the dataset

Filter the metadata for 1000 random objects, Write these in 1000metadata.json

In [None]:
import random

# Step 1: Read the JSON file
metadata8000 = 'data/metadata.json'  # Path to your input JSON file
with open(metadata8000, 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)  # Load the JSON content into a Python object

# Step 2: Select 1000 random objects
num_objects_to_select = 1000
if len(data) < num_objects_to_select:
    print(f"Warning: The file contains only {len(data)} objects. Adjusting to read all available.")
    num_objects_to_select = len(data)

random_objects = random.sample(data, num_objects_to_select)  # Randomly select 1000 objects

# Step 3: Write the selected objects to a new JSON file
output_file = 'data/metadata1000.json'  # Path for the new JSON file
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(random_objects, json_file, indent=4, ensure_ascii=False)  # Write the data to the new file

print(f"Successfully wrote {len(random_objects)} random objects to '{output_file}'.")


## create simulation broadcasts  

concatenate audio files by group to create 'broadcasts'

In [None]:
import os
import json
from pydub import AudioSegment
broadcastDIR='data/broadcasts'
sourceDIR='data/audiofiles'

if not os.path.exists(broadcastDIR):
    os.makedirs(broadcastDIR)

# Function to generate group name
def get_group_name(counter):
    # Calculate the first and second characters of the group name
    first_char = chr((counter // 26) + ord('A'))
    second_char = chr((counter % 26) + ord('A'))
    return first_char + second_char

# Function to concatenate audio files for a group
def concatenate_audio_files(file_paths, output_name):
    combined = AudioSegment.empty()
    for file_path in file_paths:
        audio = AudioSegment.from_file(file_path)
        combined += audio
    combined.export(output_name, format="mp3")

# Function to group and concatenate audio files
def process_files(objects):
    group_counter = 0
    current_group = []

    for i, obj in enumerate(objects):
        current_group.append(obj)  # Add current object to group
        
        # If group has 5 elements or it's the last element
        if len(current_group) == 5 or i == len(objects) - 1:
            # Create group name
            group_name = get_group_name(group_counter)
            
            # Collect file paths for concatenation
            # file_paths = [os.path.join('audiofiles', obj['file']) for obj in current_group]
            file_paths = [sourceDIR + '/' + os.path.basename(obj['file']) for obj in current_group]

            output_file = f"{broadcastDIR}/broadcast_{group_name}.mp3"
            
            # Concatenate files for this group
            concatenate_audio_files(file_paths, output_file)         
            
            # Move to the next group
            group_counter += 1
            current_group = []  # Reset group for the next batch

# Step 1: Read the JSON file
metadata1000 = 'data/metadata1000.json'  # Path to your input JSON file
with open(metadata1000, 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)  # Load the JSON content into a Python object

# Process files and group them
process_files(data)
