In [1]:
import os
import IPython.display as ipd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import librosa
from librosa import display, beat
import glob 

import mutagen
from mutagen.mp3 import MP3, EasyMP3
from mutagen.easyid3 import EasyID3
from mutagen.id3 import ID3, TCON

## Quick look at Librosa

In [None]:
data, sampling_rate = librosa.load('./data/Chad_Crouch_-_06_-_Pacing.mp3')

In [None]:
plt.figure(figsize=(12, 4))
librosa.display.waveplot(data, sr=sampling_rate);   # Audio Time series that will be used to create spectrograms

In [None]:
# Estimate a static tempo, not really necessary but why not?
y, sr = librosa.load("./data/Chad_Crouch_-_06_-_Pacing.mp3")
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
tempo                                           

## Mutagen ID3 tags

In [None]:
# # Changing all the genre tags to match their specific genres

# def id3_change(folder_path, ID3='TCON', text=None, file_type=".mp3"):
    
#     path = (folder_path + '/*' + file_type)
    
#     for track in glob.glob(path):
#         track = mutagen.File(track)
#         if track.tags == None:
#             add_details(track, text)
#         else:
#             track.tags.add(TCON(text=[text]))
#             track.save()

        
# #     return "All tags updated!"

In [None]:
# # Changing all the genre tags to match their specific genres

# def tag_change(folder_path, easyID3_tag='genre', text=None, file_type=".mp3"):
    
#     path = (folder_path + '/*' + file_type) 
    
#     for track in glob.glob(path):
#         track = EasyID3(track)
#         track[easyID3_tag] = text
#         audio.save()
        
#     return "All tags updated!"

In [None]:
def add_details(file_name, tag='genre', text):
    '''
    Adds the details to song
    '''

    tags = EasyMP3(file_name)
    tags[tag] = text
    tags.save()

    tags = ID3(file_name)

    tags.save(file_name)

In [None]:
# Making sure all the labels for tracks are set

path1 = "D:/DnB/*.mp3"
for track in glob.glob(path1):
    add_details(track, 'DnB')
    
path2 = "D:/Dubstep/*.mp3"
for track in glob.glob(path2):
    add_details(track, 'Dubstep')
    
path3 = "D:/Garage/*.mp3"
for track in glob.glob(path3):
    add_details(track, 'Garage Rock')
    
path4 = "D:/House/*.mp3"
for track in glob.glob(path4):
    add_details(track, 'House')
    
path5 = "D:/Indie-Rock/*.mp3"
for track in glob.glob(path5):
    add_details(track, 'Indie Rock')
    
path6 = "D:/Techno/*.mp3"
for track in glob.glob(path6):
    add_details(track, 'Techno')

In [140]:
tracks = [tracks for tracks in glob.glob("D:\Music\*\*.mp3")]     # Get list of all the tracks
song_list = [dict(EasyID3(tracks).items()) for tracks in tracks]  # Create a list of dictionaries for ID3 tags to create a data frame from

In [232]:
track_df = pd.DataFrame(song_list)
track_df = track_df[['title', 'genre']]  # Only need Title and Genre 

In [233]:
track_df.fillna('', inplace=True)                     # Title not necessary since I'll be adding file paths as well
track_df = track_df.astype(str)                       # All columns were lists of length 1 so converting to strings
track_df['title'] = track_df['title'].str.strip("[]") 
track_df['genre'] = track_df['genre'].str.strip("[]")
track_df.head()

Unnamed: 0,title,genre
0,'002 Donald Trump','DnB'
1,'003 Traffic Jam','DnB'
2,'004 Circles Squares','DnB'
3,'005 I Want To Lick The Moon','DnB'
4,'006 Lentil Breakdown','DnB'


In [234]:
track_df['file_path'] = tracks   # Adding column for file paths 
track_df.head()

Unnamed: 0,title,genre,file_path
0,'002 Donald Trump','DnB',D:\Music\DnB\002_Donald_Trump.mp3
1,'003 Traffic Jam','DnB',D:\Music\DnB\003_Traffic_Jam.mp3
2,'004 Circles Squares','DnB',D:\Music\DnB\004_Circles_Squares.mp3
3,'005 I Want To Lick The Moon','DnB',D:\Music\DnB\005_I_Want_To_Lick_The_Moon.mp3
4,'006 Lentil Breakdown','DnB',D:\Music\DnB\006_Lentil_Breakdown.mp3


In [236]:
track_df.isna().sum()  # Checking to make sure all nulls are dealt with

title        0
genre        0
file_path    0
dtype: int64

In [237]:
track_df.to_csv('track_df.csv', index=False)  # Only need to add MFCCs for each track