Explain where we got the text and audio features from the files
 

In [None]:
import pandas as pd
import numpy as np
import pickle

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Embedding, GRU, Bidirectional, concatenate, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score

# load the feature files
audio_features = pd.read_csv('audio_features.csv')
text_features = pd.read_csv('text_features.csv')

# combine the features and labels
combined_features = pd.concat([audio_features, text_features], axis=1)

# drop the duplicate labels column
combined_features = combined_features.loc[:,~combined_features.columns.duplicated()]

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(combined_features.drop('label', axis=1), 
                                                    combined_features['label'], test_size=0.2, random_state=42)


GRU

In [None]:
# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['transcription'])
vocab_size = len(tokenizer.word_index) + 1

# Convert text data to sequences
maxlen = 200
x_train_text_seq = tokenizer.texts_to_sequences(X_train['transcription'])
x_train_text_seq = pad_sequences(x_train_text_seq, padding='post', maxlen=maxlen)
x_test_text_seq = tokenizer.texts_to_sequences(X_test['transcription'])
x_test_text_seq = pad_sequences(x_test_text_seq, padding='post', maxlen=maxlen)

# Define the audio model architecture
audio_model = Sequential()
audio_model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]-1))
audio_model.add(Dense(32, activation='relu'))
audio_model.add(Dense(6, activation='softmax'))

# Compile the audio model
audio_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# Train the audio model
audio_model.fit(X_train.drop('transcription', axis=1), tf.keras.utils.to_categorical(y_train, num_classes=6),
                epochs=25, batch_size=8, validation_data=(X_test.drop('transcription', axis=1), tf.keras.utils.to_categorical(y_test, num_classes=6)))

# Define the text model architecture
text_input = Input(shape=(maxlen,))
embedding_layer = Embedding(vocab_size, 100, input_length=maxlen)(text_input)
text_gru_1 = Bidirectional(GRU(64, return_sequences=True))(embedding_layer)
text_drop_1 = Dropout(0.5)(text_gru_1)
text_gru_2 = Bidirectional(GRU(32))(text_drop_1)
text_drop_2 = Dropout(0.5)(text_gru_2)
text_dense = Dense(64, activation='relu')(text_drop_2)
text_output = Dense(6, activation='softmax')(text_dense)
text_model = Model(inputs=text_input, outputs=text_output)

# Compile the text model
text_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# Train the text model
text_model.fit(x_train_text_seq, tf.keras.utils.to_categorical(y_train, num_classes=6),
               epochs=25, batch_size=8

# Train the model
history = model.fit(x_train_text_seq, tf.keras.utils.to_categorical(y_train, num_classes=6),
                    epochs=25, batch_size=8, validation_data=(x_test_text_seq, tf.keras.utils.to_categorical(y_test, num_classes=6)))

# Compile the audio model
audio_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# Train the audio model
audio_model.fit(X_train.drop('transcription', axis=1), tf.keras.utils.to_categorical(y_train, num_classes=6),
                epochs=25, batch_size=8, validation_data=(X_test.drop('transcription', axis=1), tf.keras.utils.to_categorical(y_test, num_classes=6)))

# Evaluate the model performance
_, train_acc = model.evaluate(x_train_text_seq, tf.keras.utils.to_categorical(y_train, num_classes=6), verbose=0)
_, test_acc = model.evaluate(x_test_text_seq, tf.keras.utils.to_categorical(y_test, num_classes=6), verbose=0)
print('Train Accuracy: %.3f, Test Accuracy: %.3f' % (train_acc, test_acc))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Train Accuracy: 0.8215  Test Accuracy: 0.7413
