## dividing song samples from the Free Music Archive into top-level directories

In [1]:
from notebook_functions import *
from pathlib import Path
from config import *

import eyed3, os
import pandas as pd

# ignore 'uncommon genre' warning
eyed3.log.setLevel("ERROR")

### import data from local directory

In [16]:
metadata = z_path + "\\fma_metadata"
os.chdir(metadata)

genres_df = pd.read_csv('genres.csv')
tracks_df = pd.read_csv('raw_tracks.csv')

### build dataframes and transform data

In [17]:
# drop rows missing the 'genre' value
tracks_df = tracks_df[tracks_df['track_genres'].notna()]

# string conversion to list of genre dictionaries
tracks_df['track_genres'] = tracks_df['track_genres'].apply(eval)

# assign primary genre classification id, then make an int
tracks_df['genre_ids'] = tracks_df['track_genres'].apply(get_genre_id)

# convert IDs to ints for comparisons
tracks_df['genre_ids'] = tracks_df['genre_ids'].apply(id_to_int)
tracks_df['track_id'] = tracks_df['track_id'].apply(id_to_int)

### dictionaries for lookups

In [18]:
# dictionaries lookups for genre IDs and names
genre_lookup = genres_df.set_index('genre_id').to_dict()['title']
id_lookup = genres_df.set_index('title').to_dict()['genre_id']
parent_lookup = genres_df.set_index('genre_id').to_dict()['top_level']

# list for iterating through loop
top_genres = genres_df[genres_df['parent'] == 0]['title'].to_list()

### create directories to sort music files into

In [None]:
# genre parent directory
gp_dir = z_path + "\\genres"
Path(gp_dir).mkdir(exist_ok=True)

# create directories
for genre in top_genres:
    if "/" in genre:
        genre = genre.replace('/', '&')
    genre_dir = gp_dir + "\\" + genre
    # print(genre_dir)
    Path(genre_dir).mkdir(exist_ok=True)

### get current song paths then move to new folders

In [19]:
# get current song pathsfor relocation
root_dir = z_path + "\\fma_medium"
song_paths = make_file_list(root_dir)

In [None]:
moved = 0
# start counting genres
for i in range(len(song_paths)):
    # load audio file
    audiofile = eyed3.load(song_paths[i])

    # get genre and strip genre ID
    song_genre = str(audiofile.tag.genre)
    # will either be 0 or idx after (id)
    end = song_genre.find(')') + 1
    # for Romany(gypsy) genre
    if end == len(song_genre):
        end = 0
    song_genre = song_genre[end:]

    # change to top-level genre
    song_genre = genre_lookup[parent_lookup[id_lookup[song_genre]]]

    # fix for directory moving
    if "/" in song_genre:
        song_genre = song_genre.replace('/', '&')
    
    # rename and move files
    new_path = z_path + "\\genres\\" + song_genre + "\\" + song_paths[i][-10:]
    os.replace(song_paths[i], new_path)
    moved += 1
print('moved ' + str(moved) + ' songs.')