In [1]:
import pandas as pd
from pathlib import Path
import eyed3
import os
from config import *
import librosa
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

eyed3.log.setLevel("ERROR")

In [2]:
# Global Variables
csv_header = ['filename', 'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20', 'genre']

genre_list = ['Hip-Hop', 'Trip-Hop', 'Ambient Electronic', 'Pop', 'Metal', 'Reggae', 'Electronic', 'Classical', 'Indie-Rock', 'Rock', 'Chiptune', 'Folk', 'Punk', 'Post-Rock', 'Country', 'Techno', 'Jazz', 'Psych-Rock']
genre_list.sort()

filename = csv_header[0]
features_list = csv_header[1:-1]
header = features_list.copy()
header.insert(0, 'Genre')

filenames_idx = 0
feature_idx = 0
genre_idx = 0

feature_count = 26
num_decimals = 4
sd = 0
cv = 0
mean = 0

ideal = "IDEAL (0 <= cv < 1)"
not_ideal_neg = "NOT_IDEAL (cv < 0)"
not_ideal_large = "NOT_IDEAL (cv >= 1)"
cv_ideal = ""
graph_path = ""
cv_genres = "cv_genres"

cv_all = []

print(genre_list)
print(features_list)

['Ambient Electronic', 'Chiptune', 'Classical', 'Country', 'Electronic', 'Folk', 'Hip-Hop', 'Indie-Rock', 'Jazz', 'Metal', 'Pop', 'Post-Rock', 'Psych-Rock', 'Punk', 'Reggae', 'Rock', 'Techno', 'Trip-Hop']
['chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20']


In [3]:
# Functions
def get_cv_ideal(cv):
    '''Returns string that determines whether input cv is an ideal value'''
    if cv >= 0 and cv < 1:
        cv_ideal = ideal
    elif cv < 0:
        cv_ideal = not_ideal_neg
    else:
        cv_ideal = not_ideal_large
    return cv_ideal

def set_graph_paths(genre, feature):
    '''Set up genre dir and file path to store graph at, 
    e.g., $HOME/Users/ac/Electronic/Electronic_mfcc1.png'''
    graph_path = os.path.join(graph_path_parent, f'{genre}')
    Path(graph_path).mkdir(mode=0o755, parents=False, exist_ok=True)
    # os.mkdir(graph_path, mode=0o755) # Create genre dir
    graph_path = os.path.join(graph_path, f'{genre}_{feature}.pdf') # Create file path
    return graph_path

def graph_feature(x, y, genre, feature, cv, sd, mean, cv_ideal):
    '''Generates feature graph. Shows if cv is ideal, and values for cv, sd,
    and mean'''
    plt.title(f'{genre}')                           # Set up graph variables and labels
    plt.plot(x, y, label=f'{feature}')
    plt.xlabel(f'CV: {cv} | SD: {sd} | Mean: {mean} ')
    plt.ylabel(f'{cv_ideal}')
    plt.legend()
    graph_path = set_graph_paths(genre, feature)    # Set up path to store graph
    plt.savefig(graph_path)                         # Save graph 
    # plt.show() To display plot in ntbk
    plt.clf()                                       # Clear graph

def feature_calculations(y):
    '''Perform calculations on feature and round values'''
    sd = y.std()
    mean = y.mean()
    cv = sd / mean
    cv_ideal = get_cv_ideal(cv)

    # Round values
    sd = round(sd, num_decimals)
    mean = round(mean, num_decimals)
    cv = round(cv, num_decimals)

    return sd, mean, cv, cv_ideal

def loop_thru_features(df, x, cv_genre, genre):
    '''Loop through all features to get feature column from data frame, perform calculations, 
    plot graphs, and append cv information to cv_genre list'''
    for feature in features_list:
        y = df[feature]         # Get feature column
        sd, mean, cv, cv_ideal = feature_calculations(y) # Perform calculations on feature and round values
        cv_genre.append(cv)     # Add cv value to genre_cv list
        graph_feature(x, y, genre, feature, cv, sd, mean, cv_ideal) # Graph feature
    return cv_genre

def loop_thru_genres(cv_all):
    '''Loop through genre csvs to create graphs and perform calculations'''
    for genre in genre_list: 
        x, y, cv_genre = [], [], [] # Set up graph vars
        cv_genre.append(genre)      # Add genre as first entry in cv_genre row

        # Read in genre csv file and sort by filename column
        df = pd.read_csv(f'../Genre_CSVs/{genre}.csv')
        df = df.sort_values(by=[filename])
        
        x = df[filename]            # Get filename column

        # Loop through all features for genre to perform calculations and create graphs
        cv_genre = loop_thru_features(df, x, cv_genre, genre)

        cv_all.append(cv_genre)     # Append cv_genre to cv_all
        #cv_genre = cv_genre[0:1]    # Clear out cv_genre
    return cv_all

In [4]:
# Add Features header row
cv_all.append(header)

[['Genre', 'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20']]


In [None]:
# Loop through all genre csvs to create feature graphs and calculate cvs
cv_all = loop_thru_genres(cv_all)

In [None]:
# Write cv_genre data frame to csv
cv_genres = 'cv_genres'
with open(f'{cv_genres}.csv', 'w+') as cv_csv:
    csv_writer = csv.writer(cv_csv, delimiter=',')
    csv_writer.writerows(cv_all)