In [8]:
import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [17]:
def new_data_prep():
       
    # Creating an empty list to store all file names
    files = []
    labels = []
    zcrs = []
    spec_centroids = []
    spec_rolloffs = []
    mfccs_1 = []
    mfccs_2 = []
    mfccs_3 = []
    mfccs_4 = []
    mfccs_5 = []
    mfccs_6 = []
    mfccs_7 = []
    mfccs_8 = []
    mfccs_9 = []
    mfccs_10 = []
    mfccs_11 = []
    mfccs_12 = []
    mfccs_13 = []
    
    # Looping through each file in the directory
    for file in os.scandir("../Data/New_test_data"):
        
        # Loading in the audio file
        y, sr = librosa.core.load(file)
        
        # Adding the file to our list of files
        files.append(file)
        
        # Adding the label to our list of labels
        label = str(file).split('.')[0]
        labels.append(label)
        
        # Calculating zero-crossing rates
        zcr = librosa.feature.zero_crossing_rate(y)
        zcrs.append(np.mean(zcr))
        
        # Calculating the spectral centroids
        spec_centroid = librosa.feature.spectral_centroid(y)
        spec_centroids.append(np.mean(spec_centroid))
        
        # Calculating the spectral rolloffs
        spec_rolloff = librosa.feature.spectral_rolloff(y)
        spec_rolloffs.append(np.mean(spec_rolloff))
        
        # Calculating the first 13 mfcc coefficients
        mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_mfcc=13)
        mfcc_scaled = np.mean(mfcc.T, axis=0)
        mfccs_1.append(mfcc_scaled[0])
        mfccs_2.append(mfcc_scaled[1])
        mfccs_3.append(mfcc_scaled[2])
        mfccs_4.append(mfcc_scaled[3])
        mfccs_5.append(mfcc_scaled[4])
        mfccs_6.append(mfcc_scaled[5])
        mfccs_7.append(mfcc_scaled[6])
        mfccs_8.append(mfcc_scaled[7])
        mfccs_9.append(mfcc_scaled[8])
        mfccs_10.append(mfcc_scaled[9])
        mfccs_11.append(mfcc_scaled[10])
        mfccs_12.append(mfcc_scaled[11])
        mfccs_13.append(mfcc_scaled[12])
    
    # Creating a data frame with the values we collected
    df = pd.DataFrame({
        'files': files,
        'zero_crossing_rate': zcrs,
        'spectral_centroid': spec_centroids,
        'spectral_rolloff': spec_rolloffs,
        'mfcc_1': mfccs_1,
        'mfcc_2': mfccs_2,
        'mfcc_3': mfccs_3,
        'mfcc_4': mfccs_4,
        'mfcc_5': mfccs_5,
        'mfcc_6': mfccs_6,
        'mfcc_7': mfccs_7,
        'mfcc_8': mfccs_8,
        'mfcc_9': mfccs_9,
        'mfcc_10': mfccs_10,
        'mfcc_11': mfccs_11,
        'mfcc_12': mfccs_12,
        'mfcc_13': mfccs_13,
        'labels': labels
    })
    
    songs = df['labels'].map(lambda x: x[11:])
    songs.to_csv('../csv/song_name.csv')
    
    df2 = df.drop(columns=["files","labels"])
    df1= pd.read_csv('../csv/genre_clean.csv')
    df1= df1.drop(columns=['y'])
        
    # Scaling the New Data
    scalar = MinMaxScaler()
    scalar.fit(df1)
    trans = scalar.transform(df2)
    X=pd.DataFrame(trans)
    X.to_csv('../csv/new_test_transformed.csv', index=False)
    
    return X

In [18]:
new_data_prep()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.308168,0.564491,0.662179,0.869273,0.407445,0.723863,0.210356,0.723302,0.519147,0.47419,0.574486,0.441521,0.475728,0.615114,0.580782,0.779685
1,0.336732,0.484284,0.561395,0.97645,0.470244,0.588738,0.413311,0.617333,0.46076,0.60597,0.433778,0.623499,0.533345,0.506835,0.45799,0.628718
2,0.248228,0.45688,0.575797,0.873063,0.464724,0.683919,0.474371,0.638834,0.596702,0.708882,0.407134,0.50881,0.510615,0.578155,0.544224,0.56122
3,0.415904,0.51177,0.57493,0.92606,0.459016,0.480736,0.18088,0.425885,0.430513,0.481201,0.338393,0.428693,0.420279,0.558797,0.455358,0.519666
4,0.465018,0.606618,0.661545,1.001113,0.347377,0.559447,0.537925,0.500585,0.46139,0.591729,0.51097,0.699198,0.602554,0.619419,0.64031,0.51566
