# exploring genre and sub-genre data from the Free Music Archive

In [45]:
import pandas as pd
from pathlib import Path
import eyed3
import os
from config import *

eyed3.log.setLevel("ERROR")

### notebook functions

In [37]:
def make_file_list(root_dir):
    '''given a directory, loops through folders and files and return list of files paths
        and file names'''
    path_list = []
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith(('.mp3', '.wav')):
                path_list.append(os.path.join(subdir, file))
            else:
                print('does not')
        #path_list = [os.path.join(subdir, file) for file in files if file.endswith(('.mp3', '.wav'))]
    return path_list

def get_genre_id(genre_dict_list):
    return genre_dict_list[0]['genre_id']

def id_to_int(val):
    return int(val)

### import data from targeted directory

In [3]:
metadata = a_path + "/fma_metadata"
os.chdir(metadata)

genres_df = pd.read_csv('genres.csv')
tracks_df = pd.read_csv('raw_tracks.csv')

### make dataframes and adjustments

In [4]:
# drop rows missing the 'genre' value
tracks_df = tracks_df[tracks_df['track_genres'].notna()]

# string conversion to list of genre dictionaries
tracks_df['track_genres'] = tracks_df['track_genres'].apply(eval)

# assign primary genre classification id, then make an int
tracks_df['genre_ids'] = tracks_df['track_genres'].apply(get_genre_id)

# convert IDs to ints for comparisons
tracks_df['genre_ids'] = tracks_df['genre_ids'].apply(id_to_int)
tracks_df['track_id'] = tracks_df['track_id'].apply(id_to_int)

### dictionaries for lookups

In [5]:
# dictionaries for top-level genre lookup from ID values
genre_lookup = genres_df.set_index('genre_id').to_dict()['title']
id_lookup = genres_df.set_index('title').to_dict()['genre_id']

# if we ever want everything under only a few genres
# key = current genre, value = parent genre
parent_lookup = genres_df.set_index('genre_id').to_dict()['top_level']

# when a genre has no parent, it is top-level
top_genres = genres_df[genres_df['parent'] == 0]['title'].to_list()

In [38]:
# get songs to count genres
root_dir = a_path + "/fma_small"

song_paths = make_file_list(root_dir)

does not
does not
does not


In [39]:
print(root_dir)
print(song_paths)

ll/069/069767.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069765.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069201.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069567.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069822.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069188.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069823.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069200.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069764.mp3', '/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/069/069002.mp3', '/Users/

### create directories to sort music files into

In [40]:
# genre parent directory
gp_dir = a_path + "/genres"
print(gp_dir)

/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres


In [41]:
try:
    Path(gp_dir).mkdir(mode=0o777, parents=False, exist_ok=True)
except ValueError:
    print('did not make genre dir')

In [43]:
# create directories
for genre in top_genres:
    if "/" in genre:
        genre = genre.replace('/', '&')
    genre_dir = gp_dir + "/" + genre
    # print(genre_dir)
    Path(genre_dir).mkdir(mode=0o777, parents=False, exist_ok=True)

In [44]:
moved = 0
# start counting genres
print(song_paths)
for i in range(len(song_paths)):
    # load audio file
    audiofile = eyed3.load(song_paths[i])
    filename = song_paths[i][-10:]

    # get genre and strip genre ID
    song_genre = str(audiofile.tag.genre)
    # will either be 0 or idx after (id)
    end = song_genre.find(')') + 1
    # for Romany(gypsy) genre
    if end == len(song_genre):
        end = 0
    song_genre = song_genre[end:]

    # change to top-level genre
    song_genre = genre_lookup[parent_lookup[id_lookup[song_genre]]]

    # fix for directory moving
    if "/" in song_genre:
        song_genre = song_genre.replace('/', '&')
    
    # rename and move files
    new_path = a_path + "/genres/" + song_genre + "/" + filename
    print(song_paths[i], new_path)
    os.replace(song_paths[i], new_path)
    moved += 1
print('moved ' + str(moved) + ' songs.')

box/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres/Folk/051333.mp3
/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/051/051118.mp3 /Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres/Experimental/051118.mp3
/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/051/051291.mp3 /Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres/Pop/051291.mp3
/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/067/067470.mp3 /Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres/Electronic/067470.mp3
/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/fma_small/067/067673.mp3 /Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/data/genres/Rock/067673.mp3
/Users/aprilcastaneda/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/fma/dat