# exploring genre and sub-genre data from the Free Music Archive

In [65]:
import pandas as pd
from pathlib import Path
import eyed3
import os
from config import *

eyed3.log.setLevel("ERROR")

### notebook functions

In [68]:
def make_file_list(root_dir):
    '''given a directory, loops through folders and files and return list of files paths
        and file names'''
    path_list = []
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            path_list.append(os.path.join(subdir, file))

    return path_list

def get_genre_id(genre_dict_list):
    return genre_dict_list[0]['genre_id']

def id_to_int(val):
    return int(val)

### import data from targeted directory

In [67]:
metadata = z_path + "\\fma_metadata"
os.chdir(metadata)

genres_df = pd.read_csv('genres.csv')
tracks_df = pd.read_csv('raw_tracks.csv')

### make dataframes and adjustments

In [69]:
# drop rows missing the 'genre' value
tracks_df = tracks_df[tracks_df['track_genres'].notna()]

# string conversion to list of genre dictionaries
tracks_df['track_genres'] = tracks_df['track_genres'].apply(eval)

# assign primary genre classification id, then make an int
tracks_df['genre_ids'] = tracks_df['track_genres'].apply(get_genre_id)

# convert IDs to ints for comparisons
tracks_df['genre_ids'] = tracks_df['genre_ids'].apply(id_to_int)
tracks_df['track_id'] = tracks_df['track_id'].apply(id_to_int)

### dictionaries for lookups

In [70]:
# dictionaries for top-level genre lookup from ID values
genre_lookup = genres_df.set_index('genre_id').to_dict()['title']
id_lookup = genres_df.set_index('title').to_dict()['genre_id']

# if we ever want everything under only a few genres
# key = current genre, value = parent genre
parent_lookup = genres_df.set_index('genre_id').to_dict()['top_level']

# when a genre has no parent, it is top-level
top_genres = genres_df[genres_df['parent'] == 0]['title'].to_list()

In [74]:
# get songs to count genres
root_dir = z_path + "\\fma_medium"

song_paths = make_file_list(root_dir)

### create directories to sort music files into

In [75]:
# genre parent directory
gp_dir = z_path + "\\genres"
Path(gp_dir).mkdir(exist_ok=True)

# create directories
for genre in top_genres:
    if "/" in genre:
        genre = genre.replace('/', '&')
    genre_dir = gp_dir + "\\" + genre
    # print(genre_dir)
    Path(genre_dir).mkdir(exist_ok=True)

In [76]:
moved = 0
# start counting genres
for i in range(len(song_paths)):
    # load audio file
    audiofile = eyed3.load(song_paths[i])
    filename = song_paths[i][-10:]

    # get genre and strip genre ID
    song_genre = str(audiofile.tag.genre)
    # will either be 0 or idx after (id)
    end = song_genre.find(')') + 1
    # for Romany(gypsy) genre
    if end == len(song_genre):
        end = 0
    song_genre = song_genre[end:]

    # change to top-level genre
    song_genre = genre_lookup[parent_lookup[id_lookup[song_genre]]]

    # fix for directory moving
    if "/" in song_genre:
        song_genre = song_genre.replace('/', '&')
    
    # rename and move files
    new_path = z_path + "\\genres\\" + song_genre + "\\" + filename
    os.replace(song_paths[i], new_path)
    moved += 1
print('moved ' + str(moved) + ' songs.')

moved 25000 songs.
