In [None]:
import pandas as pd
import numpy as np
import os 
import librosa           
import librosa.display   
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold 
from sklearn.preprocessing import StandardScaler, LabelEncoder 
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC                   
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix 
import matplotlib.pyplot as plt 
import seaborn as sns    
import warnings
warnings.filterwarnings('ignore')       

base_path = "C:/Users/anish/OneDrive/Desktop/ML/MusicClassifier/Data/genres_original" # Need to change so everyone can use it 
all_files = os.listdir(base_path)
list_of_rows  = []
for genre_folder in all_files:
    genre_path = os.path.join(base_path, genre_folder) # ensures correct destination for windows/mac
    songs = os.listdir(genre_path)
    for song in songs:
        try:
            song_path = os.path.join(genre_path, song)
            y, sr = librosa.load(song_path, sr=None, mono=True) # Mono ensures only 1 signal is accepted so only 1D array is created
            zcr = librosa.feature.zero_crossing_rate(y)
            zcr_mean = np.mean(zcr)
            zcr_sd = np.std(zcr)
            spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
            sc_mean = np.mean(spectral_centroid)
            sc_sd = np.std(spectral_centroid)
            new_row = {'Genre': genre_folder ,'ZCR Mean':zcr_mean, 'ZCR STD': zcr_sd, 'Spectral Centroid Mean':sc_mean,
                       'Spectral Centroid STD': sc_sd}
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40,n_fft=256, n_mels=40)
            for i in range(len(mfccs)):
                idx = str(i+1)
                mfcc_mean = np.mean(mfccs[i])
                mfcc_sd = np.std(mfccs[i])
                mean_name = 'MFCC ' + idx + ' MEAN'
                std_name = 'MFCC ' + idx + ' STD'
                new_row[mean_name]=mfcc_mean
                new_row[std_name]=mfcc_sd
            list_of_rows.append(new_row)
        except Exception as e:
            print(e)
            continue

music_analytics = pd.DataFrame(list_of_rows)
music_analytics.to_csv('features.txt', sep='\t', index=False)





     Genre  ZCR Mean   ZCR STD  Spectral Centroid Mean  Spectral Centroid STD  \
0    blues  0.083045  0.027694             1784.122641             360.202005   
1    blues  0.056040  0.038046             1530.261767             613.119490   
2    blues  0.076291  0.031731             1552.832481             395.564168   
3    blues  0.033309  0.020561             1070.153418             429.378632   
4    blues  0.101461  0.044205             1835.128513             585.874983   
..     ...       ...       ...                     ...                    ...   
994   rock  0.089227  0.036121             2008.174521             531.126678   
995   rock  0.097664  0.029454             2006.827265             426.773755   
996   rock  0.121823  0.041192             2077.565822             481.369184   
997   rock  0.048724  0.028417             1398.649504             490.312357   
998   rock  0.076305  0.046597             1609.785181             649.865532   

     MFCC 1 MEAN  MFCC 1 S