In [1]:
import os
import IPython.display as ipd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import librosa
from librosa import display, beat
import glob 

import mutagen
from mutagen.mp3 import MP3, EasyMP3
from mutagen.easyid3 import EasyID3
from mutagen.id3 import ID3, TCON

## Mutagen ID3 tags

[ID3](http://id3.org/id3v2.3.0) tags are a data container that standardizes the format the data is stored as.  This typically includes Title, Artist, Genre, etc.  ID3 tags can be difficult to see or understand at first glance but the Mutagen package has a convenient module called EasyID3 which outputs a more human readable format, eg "genre" instead of "TCON."

Note about EDA: After some initial exploration of the ID3 tags, I realized the genre tags were missing or incorrect.  This led to me finding that a lot of the songs that were categorized into a genre were not in any way related to that genre.  Because of this, most of the data cleaning was done in the folder on the computer because I had to listen to samples of as many songs as I could to ensure that they all were correct.  As a result I had to delete almost two-thirds of my songs and search the internet for more royalty-free music to download (and also make sure those were properly sorted).   

In [None]:
# # Changing all the genre tags to match their specific genres

# def id3_change(folder_path, ID3='TCON', text=None, file_type=".mp3"):
    
#     path = (folder_path + '/*' + file_type)
    
#     for track in glob.glob(path):
#         track = mutagen.File(track)
#         if track.tags == None:
#             add_details(track, text)
#         else:
#             track.tags.add(TCON(text=[text]))
#             track.save()

        
# #     return "All tags updated!"

In [None]:
# # Changing all the genre tags to match their specific genres

# def tag_change(folder_path, easyID3_tag='genre', text=None, file_type=".mp3"):
    
#     path = (folder_path + '/*' + file_type) 
    
#     for track in glob.glob(path):
#         track = EasyID3(track)
#         track[easyID3_tag] = text
#         audio.save()
        
#     return "All tags updated!"

In [None]:
def add_details(file_name, tag='genre', text):
    '''
    Adds the details to song
    '''

    tags = EasyMP3(file_name)
    tags[tag] = text
    tags.save()

    tags = ID3(file_name)

    tags.save(file_name)

In [None]:
# Making sure all the labels for tracks are set

path1 = "D:/DnB/*.mp3"
for track in glob.glob(path1):
    add_details(track, 'DnB')
    
path2 = "D:/Dubstep/*.mp3"
for track in glob.glob(path2):
    add_details(track, 'Dubstep')
    
path3 = "D:/Garage/*.mp3"
for track in glob.glob(path3):
    add_details(track, 'Garage Rock')
    
path4 = "D:/House/*.mp3"
for track in glob.glob(path4):
    add_details(track, 'House')
    
path5 = "D:/Indie-Rock/*.mp3"
for track in glob.glob(path5):
    add_details(track, 'Indie Rock')
    
path6 = "D:/Techno/*.mp3"
for track in glob.glob(path6):
    add_details(track, 'Techno')

In [2]:
tracks = [tracks for tracks in glob.glob("D:\MusicSubset\*\*.mp3")]     # Get list of all the tracks
song_list = [dict(EasyID3(tracks).items()) for tracks in tracks]  # Create a list of dictionaries for ID3 tags to create a data frame from

In [3]:
track_df = pd.DataFrame(song_list)
track_df = track_df[['title', 'genre']]  # Only need Title and Genre 

In [4]:
track_df.fillna('', inplace=True)                     # Title not necessary since I'll be adding file paths as well
track_df = track_df.astype(str)                       # All columns were lists of length 1 so converting to strings
track_df['title'] = track_df['title'].str.strip("[]") 
track_df['genre'] = track_df['genre'].str.strip("[]")
track_df.head()

Unnamed: 0,title,genre
0,'test','DnB'
1,'one04 866609','DnB'
2,'Dead or Alive','DnB'
3,'*DnB* Panacea 1Hit Free DL!!!','DnB'
4,'*Jungle* Sensi |#1Hit Free DL!!!','DnB'


In [5]:
track_df['file_path'] = tracks   # Adding column for file paths 
track_df.head()

Unnamed: 0,title,genre,file_path
0,'test','DnB',D:\MusicSubset\DnB\614music+dancingwithspirits...
1,'one04 866609','DnB',D:\MusicSubset\DnB\axaschaller+one04.mp3
2,'Dead or Alive','DnB',D:\MusicSubset\DnB\beatcomplot+deadoralive.mp3
3,'*DnB* Panacea 1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+dnbpanacea1h...
4,'*Jungle* Sensi |#1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+junglesensi1...


In [6]:
track_df.isna().sum()  # Checking to make sure all nulls are dealt with

title        0
genre        0
file_path    0
dtype: int64

In [7]:
track_df.to_csv('subset_df.csv', index=False)  # Only need to add MFCCs for each track

In [9]:
df = pd.read_csv('subset_df.csv')
df.head()

Unnamed: 0,title,genre,file_path
0,'test','DnB',D:\MusicSubset\DnB\614music+dancingwithspirits...
1,'one04 866609','DnB',D:\MusicSubset\DnB\axaschaller+one04.mp3
2,'Dead or Alive','DnB',D:\MusicSubset\DnB\beatcomplot+deadoralive.mp3
3,'*DnB* Panacea 1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+dnbpanacea1h...
4,'*Jungle* Sensi |#1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+junglesensi1...


In [10]:
df.isnull().sum()

title        3
genre        0
file_path    0
dtype: int64

In [11]:
df[df.title.isnull()]

Unnamed: 0,title,genre,file_path
212,,'Techno',D:\MusicSubset\Techno\01_Sector4_Distance.mp3
229,,'Techno',D:\MusicSubset\Techno\02_Sector4_Lighttube.mp3
238,,'Techno',D:\MusicSubset\Techno\03_Sector4_Tempelhof.mp3


In [12]:
df.fillna(' ', inplace=True) # Again the title isn't as important since the file name is also there

In [13]:
df.isna().sum()

title        0
genre        0
file_path    0
dtype: int64

In [14]:
df.to_csv('subset_df.csv', index=False)

In [15]:
test = pd.read_csv('subset_df.csv')

In [16]:
test.isna().sum()  # Make sure null values were removed

title        0
genre        0
file_path    0
dtype: int64

Unnamed: 0,title,genre,file_path
0,'test','DnB',D:\MusicSubset\DnB\614music+dancingwithspirits...
1,'one04 866609','DnB',D:\MusicSubset\DnB\axaschaller+one04.mp3
2,'Dead or Alive','DnB',D:\MusicSubset\DnB\beatcomplot+deadoralive.mp3
3,'*DnB* Panacea 1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+dnbpanacea1h...
4,'*Jungle* Sensi |#1Hit Free DL!!!','DnB',D:\MusicSubset\DnB\beatjunkierato+junglesensi1...
5,'War Paint','DnB',D:\MusicSubset\DnB\blackheartthesamurai+warpai...
6,'Bloctronic - get gone (Final)','DnB',D:\MusicSubset\DnB\bloctronic+bloctronicgetgon...
7,'Splashback','DnB',D:\MusicSubset\DnB\breakbeatpimp+splashback.mp3
8,'cspaceSlowGrooveVersion 408620','DnB',D:\MusicSubset\DnB\chemicalmike+cspaceslowgroo...
9,'ArpeggioHeaven','DnB',D:\MusicSubset\DnB\chromophore+arpeggioheaven.mp3
