# Data visualisation and exploration

In [None]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio

import warnings

from tensorflow.keras.preprocessing.sequence import pad_sequences

from tqdm import tqdm, tqdm_pandas

In [None]:
CREMA = '../raw_data/wav_files/'
dir_list = os.listdir(CREMA)
dir_list.sort()
print(dir_list[0:10])

In [None]:
gender = []
emotion = []
path = []
female = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]

for i in dir_list: 
    part = i.split('_')
    if int(part[0]) in female:
        temp = 'female'
    else:
        temp = 'male'
    gender.append(temp)
    
    if part[2] == 'SAD':
        emotion.append('sad')
    elif part[2] == 'ANG':
        emotion.append('angry')
    elif part[2] == 'DIS':
        emotion.append('disgust')
    elif part[2] == 'FEA':
        emotion.append('fear')
    elif part[2] == 'HAP':
        emotion.append('happy')
    elif part[2] == 'NEU':
        emotion.append('neutral')
    else:
        emotion.append('unknown')
    path.append(CREMA + i)
    
CREMA_df = pd.DataFrame(emotion, columns = ['emotion'])
#CREMA_df['source'] = 'CREMA'
CREMA_df = pd.concat([CREMA_df,pd.DataFrame(gender, columns = ['gender'])],axis=1)
CREMA_df = pd.concat([CREMA_df,pd.DataFrame(path, columns = ['path'])],axis=1)

In [None]:
def combo(df):
    return f'{df[1]}_{df[0]}'

def sad(x):
    return 1 if x.lower() == 'sad' else 0

def angry(x):
    return 1 if x.lower() == 'angry' else 0

def disgust(x):
    return 1 if x.lower() == 'disgust' else 0

def fear(x):
    return 1 if x.lower() == 'fear' else 0

def happy(x):
    return 1 if x.lower() == 'happy' else 0

def neutral(x):
    return 1 if x.lower() == 'neutral' else 0

In [None]:
CREMA_df['gender_emotion'] = CREMA_df.apply(combo,axis=1)
CREMA_df['sad'] = CREMA_df['emotion'].apply(sad)
CREMA_df['angry'] = CREMA_df['emotion'].apply(angry)
CREMA_df['disgust'] = CREMA_df['emotion'].apply(disgust)
CREMA_df['fear'] = CREMA_df['emotion'].apply(fear)
CREMA_df['happy'] = CREMA_df['emotion'].apply(happy)
CREMA_df['neutral'] = CREMA_df['emotion'].apply(neutral)

In [None]:
CREMA_df

In [None]:
plt.figure(figsize=(8,5))
sns.histplot(CREMA_df.emotion);

In [None]:
plt.figure(figsize=(4,5))
sns.histplot(CREMA_df.gender);

In [None]:
plt.figure(figsize=(18,5))
sns.histplot(CREMA_df.gender_emotion);

In [None]:
# use the well known Librosa library for this task 
fname = CREMA + '1029_IWW_HAP_XX.wav'  
data, sampling_rate = librosa.load(
    fname,
    sr=44100,
    mono=True,
    offset=0.0,
    duration=None,
    res_type='kaiser_best')

plt.figure(figsize=(15, 5))
librosa.display.waveshow(data, sr=sampling_rate)

# Lets play the audio 
Audio(fname)

In [None]:
SAMPLE_RATE = 44100  
y, sr = librosa.load(fname, sr=SAMPLE_RATE, duration = 5) # Chop audio at 5 secs... 
mfcc = librosa.feature.mfcc(y=y,
                            sr=SAMPLE_RATE,
                            S=None,
                            n_mfcc=8,
                            dct_type=2,
                            norm='ortho',
                            lifter=0,)

mfcc.shape
plt.figure(figsize=(12, 6))
plt.subplot(3,1,1)
librosa.display.specshow(mfcc)
plt.ylabel('MFCC')
plt.colorbar();

In [None]:
mfcc.shape

In [None]:
y, sr = librosa.load(fname, sr=SAMPLE_RATE, duration = 5) # Chop audio at 5 secs... 
melspec = librosa.feature.melspectrogram(y,
                                         sr=sr,
                                         n_mels=128,
                                         n_fft=2048,
                                         hop_length=512,
                                         win_length=None,
                                         window='hann',
                                         center=True,
                                         pad_mode='constant',
                                         power=2.0)

# Convert to log scale (dB). We'll use the peak power (max) as reference.
log_S = librosa.amplitude_to_db(melspec)

# Display the log mel spectrogram
plt.figure(figsize=(12,4))
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+02.0f dB')
plt.tight_layout();

In [None]:
log_S.shape

In [None]:
y, sr = librosa.load(fname, sr=SAMPLE_RATE, duration = 5) 
y_harmonic, y_percussive = librosa.effects.hpss(y)
Audio(y_harmonic, rate=sr)

In [None]:
Audio(y_percussive, rate=sr)

In [None]:
# harmonic 
melspec = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
log_h = librosa.amplitude_to_db(melspec)

# percussive
melspec = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)
log_p = librosa.amplitude_to_db(melspec)

# Display the log mel spectrogram of both harmonic and percussive
plt.figure(figsize=(12,6))

plt.subplot(2,1,1)
librosa.display.specshow(log_h, sr=sr, x_axis='time', y_axis='mel')
plt.title('harmonic')
plt.colorbar(format='%+02.0f dB')

plt.subplot(2,1,2)
librosa.display.specshow(log_p, sr=sr, x_axis='time', y_axis='mel')
plt.title('percussive')
plt.colorbar(format='%+02.0f dB')

In [None]:
y, sr = librosa.load(fname, sr=SAMPLE_RATE, duration = 5)
C = librosa.feature.chroma_stft(y=y,
                                sr=sr,
                                S=None,
                                n_fft=2048,
                                hop_length=512,
                                win_length=None,
                                window='hann',
                                center=True,
                                pad_mode='constant',
                                tuning=None,
                                n_chroma=8)

# Make a new figure
plt.figure(figsize=(12,4))
# To make sure that the colors span the full range of chroma values, set vmin and vmax
librosa.display.specshow(C, sr=sr, x_axis='time', y_axis='chroma')
plt.title('Chromagram')
plt.colorbar()
plt.tight_layout()

In [None]:
C.shape

In [None]:
C = librosa.feature.chroma_cqt(y=y,
                           sr=sr,
                           C=None,
                           hop_length=512,
                           fmin=None,
                           threshold=0.0,
                           tuning=None,
                           n_chroma=12,
                           n_octaves=7,
                           window=None,
                           bins_per_octave=36,
                           cqt_mode='full')
# Make a new figure
plt.figure(figsize=(12,4))
# To make sure that the colors span the full range of chroma values, set vmin and vmax
librosa.display.specshow(C, sr=sr, x_axis='time', y_axis='chroma')
plt.title('Chromagram')
plt.colorbar()
plt.tight_layout()

In [None]:
C.shape

In [None]:
C = librosa.feature.chroma_cens(y=y,
                               sr=sr,
                                C=None,
                                hop_length=512,
                                fmin=None,
                                tuning=None,
                                n_chroma=12,
                                n_octaves=7,
                                bins_per_octave=36,
                                cqt_mode='full',
                                window=None,
                                norm=2,
                                win_len_smooth=41,
                                smoothing_window='hann')

# Make a new figure
plt.figure(figsize=(12,4))
# To make sure that the colors span the full range of chroma values, set vmin and vmax
librosa.display.specshow(C, sr=sr, x_axis='time', y_axis='chroma')
plt.title('Chromagram')
plt.colorbar()
plt.tight_layout()

In [None]:
C.shape

In [None]:
p0 = librosa.feature.poly_features(y=y,
                                     sr=sr,
                                     S=None,
                                     n_fft=2048,
                                     hop_length=512,
                                     win_length=None,
                                     window='hann',
                                     center=True,
                                     pad_mode='constant',
                                     order=0,
                                     freq=None)
p1 = librosa.feature.poly_features(y=y,
                                     sr=sr,
                                     S=None,
                                     n_fft=2048,
                                     hop_length=512,
                                     win_length=None,
                                     window='hann',
                                     center=True,
                                     pad_mode='constant',
                                     order=1,
                                     freq=None)
p2 = librosa.feature.poly_features(y=y,
                                     sr=sr,
                                     S=None,
                                     n_fft=2048,
                                     hop_length=512,
                                     win_length=None,
                                     window='hann',
                                     center=True,
                                     pad_mode='constant',
                                     order=2,
                                     freq=None)
p0.shape

In [None]:
p2.shape

In [None]:
fig, ax = plt.subplots(nrows=3, sharex=True, figsize=(8, 8))
times = librosa.times_like(p0)
ax[0].plot(times, p0[0], label='order=0', alpha=0.8)
ax[0].plot(times, p1[1], label='order=1', alpha=0.8)
ax[0].plot(times, p2[2], label='order=2', alpha=0.8)
ax[0].legend()
ax[0].label_outer()
ax[0].set(ylabel='Constant term ')
ax[1].plot(times, p1[0], label='order=1', alpha=0.8)
ax[1].plot(times, p2[1], label='order=2', alpha=0.8)
ax[1].set(ylabel='Linear term')
ax[1].label_outer()
ax[1].legend()
ax[2].plot(times, p2[0], label='order=2', alpha=0.8)
ax[2].set(ylabel='Quadratic term')
ax[2].legend()

# MFCC Inspection

In [None]:
mfccs = []
mfcc = librosa.feature.mfcc(wav[0], sr=44000, n_mfcc=5)
#print(mfcc)
#print(mfcc.shape)
mfcc = mfcc.T
print(mfcc)
print(mfcc.shape)
mfccs.append(mfcc)
mfccs = np.append(mfccs,mfcc)



In [None]:
mfccs = np.array(mfccs)

In [None]:
mfccs.shape

In [None]:
day_1 = [10, 25, 50]  # OBSERVATION 1 [Temp, speed, pollution]
day_2 = [13, 10, 70]  # OBSERVATION 2 [Temp, speed, pollution]
day_3 = [ 9,  5, 90]
day_4 = [ 7,  0, 95]

sequence_a = [day_1, day_2, day_3, day_4]

y_a = 110 # Pollution day 5

# --- SEQUENCE B (Berlin)
sequence_b = [[25, 20, 30], [26, 24, 50], [28, 20, 80], [22, 3, 110]]
y_b = 125

# --- SEQUENCE C (London)
sequence_c = [[15, 10, 60], [25, 20, 65], [35, 10, 75], [36, 15, 70]]
y_c = 30

X = np.array([sequence_a, sequence_b, sequence_c]).astype(np.float32)
y = np.array([y_a, y_b, y_c]).astype(np.float32)

print(X.shape)
print(y.shape)

# Padding All Clips

In [None]:
SAMPLE_RATE

In [None]:
features = []
n = 50
for path in CREMA_df['path'][:n]:
    wav = librosa.load(path, sr=SAMPLE_RATE, duration = 5)
    mfcc = librosa.feature.mfcc(wav[0], sr=44000, n_mfcc=5)
    poly = librosa.feature.poly_features(y=wav[0],sr=SAMPLE_RATE,order=4)
    print(mfcc.shape, mfcc)
    print(poly.shape, poly)
    mfcc = np.vstack((mfcc,poly))
    
    mfcc = mfcc.T
    
    
    

    
    #mfcc = np.concatenate((mfcc,poly),axis=0,dtype=None)
    
    
    features.append(mfcc)
    
    
    

X_pad = pad_sequences(features, dtype='float32', padding='post', value=0)
X_pad.shape

In [None]:
X_pad.shape

In [None]:
targets = pd.read_csv('targets.csv')
group = targets.groupby('gender_emotion')
sample = group.head(50)
sample

In [None]:
features = []
for path in sample['path']:
    wav = librosa.load(path, sr=SAMPLE_RATE, duration = 5)
    mfcc = librosa.feature.mfcc(wav[0], sr=44000, n_mfcc=5)
    
    mfcc = mfcc.T
    
    features.append(mfcc)
    
    
    

X_pad = pad_sequences(features, dtype='float32', padding='post', value=-1000)
X_pad.shape

In [None]:
import keras
from keras import layers
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from tensorflow.keras import utils
from keras.callbacks import (EarlyStopping, LearningRateScheduler,
                             ModelCheckpoint, TensorBoard, ReduceLROnPlateau)
from tensorflow.keras import losses, models, optimizers
from keras.activations import relu, softmax
from keras.layers import (Convolution2D, GlobalAveragePooling2D, BatchNormalization, Flatten, Dropout,
                          GlobalMaxPool2D, MaxPool2D, concatenate, Activation, Input, Dense)

model = Sequential()
model.add(layers.Conv1D(256, 5,padding='same',
                 input_shape=(274,20)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling1D(pool_size=(4)))
model.add(layers.Dropout(0.2))
model.add(layers.Conv1D(128, 5,padding='same'))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling1D(pool_size=(4)))
model.add(layers.Dropout(0.1))
model.add(layers.Flatten())
model.add(layers.Dense(64))
model.add(layers.Dense(6))
model.add(layers.Activation('softmax'))

model.compile(optimizer='adam',
    loss='categorical_crossentropy',
    metrics='acc')

In [None]:
#y = CREMA_df['emotion'][:50]
y = sample['emotion']

In [None]:
X_pad.shape

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)
y.shape

In [None]:
y

In [None]:
y_cat = to_categorical(y)
y_cat.shape

In [None]:
y_cat

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_pad, y_cat, test_size=0.20, random_state=42)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.math import confusion_matrix

In [None]:
model = Sequential()
model.add(layers.Masking(mask_value = -1000., input_shape=(431, 5)))
model.add(layers.LSTM(units=5, return_sequences=True, activation='tanh'))
model.add(layers.LSTM(64, return_sequences=False, activation='tanh'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(6, activation="softmax"))

# The compilation
optimiser = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', 
              optimizer=optimiser,metrics='acc')  # Recommanded optimizer for RNN

model.summary()

In [None]:
# The fit
model.fit(X_train, y_train, validation_data=(X_val,y_val),
         batch_size=32,
         epochs=5, verbose=1)

In [None]:
model.evaluate()

In [None]:
pred = model.predict(X_val)

In [None]:
pred

In [None]:
pred[0].max()

In [None]:
y_val

In [None]:
confusion_matrix(pred, y_val)