In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#       print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Modules

In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

In [None]:
paths=[]
labels=[]
for dirname, _, filenames in os.walk('/kaggle/input/tess-dataset'):
   for filename in filenames:
    paths.append(os.path.join(dirname,filename))
    label = filename.split('_')[-1]
    label = label.split('.')[0]
    labels.append(label.lower())
print('Dataset is loaded')
    

In [None]:
paths[:5]

In [None]:
labels[:5]

In [None]:
##Create a dataframe
df = pd.DataFrame()
df['speech']= paths
df['label']= labels
df.head()

In [None]:
df['label'].value_counts()

# # Exploratory Data Analysis

In [None]:
sns.countplot(x='label', data=df)

In [None]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10, 4))
    plt.title(emotion, size=20)
    plt.plot(data)
    plt.show()

def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11, 4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
emotion = 'fear'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'angry'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'disgust'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'neutral'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'sad'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'ps'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'happy'
path = np.array(df['speech'][df['label'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

# Feature Extarction

In [None]:
from scipy.signal import hamming

In [None]:
import librosa
import numpy as np

def extract_mfcc_stft(file_path):

    y, sr = librosa.load(file_path, duration=3)
    
    # STFT
    D = librosa.stft(y)
    S, phase = librosa.magphase(D)
    
    # Log scale
    S = np.log(S + 1e-8) 
    
    # Extract MFCCs from STFT
    mfcc = np.mean(librosa.feature.mfcc(S=S, sr=sr, n_mfcc=40).T, axis=0)
    
    return mfcc

In [None]:
extract_mfcc_stft(df['speech'][0])

In [None]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc_stft(x))

In [None]:
X_mfcc

In [None]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

## input split


In [None]:
X=np.expand_dims(X,-1)
X.shape

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [None]:
y = y.toarray()

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D,Dense
from tensorflow.keras.layers import GlobalAveragePooling1D
from keras.layers import Dropout  
from keras import regularizers



model = Sequential()
model.add(Conv1D(16, 3, padding='same', activation='relu'))  
model.add(Dropout(0.5))
model.add(Conv1D(32, 3, kernel_regularizer=regularizers.l2(0.01)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.5))
model.add(GlobalAveragePooling1D())
model.add(Dense(7, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) 

# Train with fewer epochs
history = model.fit(X_train, y_train,  
                   epochs=100, 
                   batch_size=32, 
                   validation_data=(X_test, y_test))

train_acc = history.history['accuracy'][-1]
print('Train accuracy:', train_acc)

In [None]:
# Evaluate on training set
import sklearn.metrics as metrics
train_loss, train_acc = model.evaluate(X_train, y_train)

# Get predictions

y_train_pred = model.predict(X_train).argmax(axis=1)

# Calculate metrics

train_precision = metrics.precision_score(y_train.argmax(axis=-1), y_train_pred , average='weighted')

train_recall = metrics.recall_score(y_train.argmax(axis=-1), y_train_pred , average='weighted')

train_f1 = metrics.f1_score(y_train.argmax(axis=-1), y_train_pred , average='weighted')

# Print results

print("Training Accuracy: {:.4f}".format(train_acc))

print("Training Precision: {:.4f}".format(train_precision))

print("Training Recall: {:.4f}".format(train_recall))

print("Training F1-score: {:.4f}".format(train_f1))

print("Training Confusion Matrix: ")

print(metrics.confusion_matrix(y_train.argmax(axis=-1), y_train_pred))

In [None]:
import sklearn.metrics as metrics

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test)

# Get predictions 
y_test_pred = model.predict(X_test).argmax(axis=1)

# Calculate metrics
test_precision = metrics.precision_score(y_test.argmax(axis=-1), y_test_pred, average='weighted')
test_recall = metrics.recall_score(y_test.argmax(axis=-1), y_test_pred, average='weighted') 
test_f1 = metrics.f1_score(y_test.argmax(axis=-1), y_test_pred, average='weighted')

# Print results  
print("Test Accuracy: {:.4f}".format(test_acc))
print("Test Precision: {:.4f}".format(test_precision))
print("Test Recall: {:.4f}".format(test_recall))
print("Test F1-score: {:.4f}".format(test_f1)) 

print("Test Confusion Matrix: ")
print(metrics.confusion_matrix(y_test.argmax(axis=-1), y_test_pred))