In [1]:
import pandas as pd
import re
from pathlib import Path
import pyarabic.araby as ar
from tqdm.notebook import tqdm
from sklearn.utils import shuffle
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
import pickle as pkl

In [3]:
from tensorflow.keras.models import load_model

In [4]:
class Config:
    raw_data_file = 'tweets.csv'
    processed_data_file = 'tweets.csv'
    
    RAW_DATA_PATH = Path('../../data/raw')
    PROCESSED_DATA_PATH = Path('../../data/processed')
    
    RAW_DATA_FILE = RAW_DATA_PATH / raw_data_file
    PROCESSED_DATA_FILE = PROCESSED_DATA_PATH / processed_data_file
    
    TEXT_VECTORIZER_PATH = Path('text_vectorizer.pkl')
    
    LYRICS_MODEL_PATH = Path('../../model/lyrics')
    AROUSAL_MODEL_PATH = Path('../../model/music/arousal_model.h5')
    VALENCE_MODEL_PATH = Path('../../model/music/valence_model.h5')
    
    PLOT_PATH = Path('../../plots')
    PLOT_HEAT_MAP_FILE = PLOT_PATH / Path('heatmap.png')

In [5]:
arousal_model = load_model(Config.AROUSAL_MODEL_PATH)

In [6]:
arousal_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6373)]            0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 50992     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 36        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5         
Total params: 51,033
Trainable params: 51,033
Non-trainable params: 0
_________________________________________________________________


In [7]:
valence_model = load_model(Config.VALENCE_MODEL_PATH)

In [8]:
valence_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6373)]            0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 50992     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 36        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5         
Total params: 51,033
Trainable params: 51,033
Non-trainable params: 0
_________________________________________________________________


In [9]:
# construct final model

In [10]:
class_num = 3

In [11]:
arousal_input = arousal_model.layers[0].input
valence_input = valence_model.layers[0].input

In [12]:
arousal_output = arousal_model.layers[-2].output
valence_output = valence_model.layers[-2].output

In [13]:
with open(str(Config.TEXT_VECTORIZER_PATH), 'rb') as file:
    from_disk = pkl.load(file)
    
    encoder = tf.keras.layers.experimental.preprocessing.TextVectorization.from_config(from_disk['config'])
    encoder.set_weights(from_disk['weights'])

In [14]:
def create_lyrics_model(class_num, encoder, vocab_size):
    
    model = tf.keras.Sequential()
    
    model.add(encoder)
    
    model.add(
        tf.keras.layers.Embedding(
        input_dim=vocab_size+1,
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True)
    )
    
    model.add(
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))
    )
    
    model.add(
        tf.keras.layers.Dense(64, activation='relu')
    )
    
    model.add(
        tf.keras.layers.Dense(class_num)
    )
    
    model.compile(
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(1e-4),
        metrics=[tfa.metrics.F1Score(class_num)]
    )
    
    
    model.load_weights(str(Config.LYRICS_MODEL_PATH / 'lyrics_model'))
    
    return model

In [15]:
lyrics_model = create_lyrics_model(class_num, encoder, vocab_size=10000)

In [16]:
def evaluate(model):
    Config.PLOT_PATH.mkdir(parents=True, exist_ok=True)
    predictions = model.predict(X_test)
    
    y_pred = predictions.argmax(axis=1)
    y_true = y_test.argmax(axis=1)
    
    report = classification_report(y_true, y_pred, output_dict=True)
    df = pd.DataFrame(report).transpose()
    
    plt.title('Classification Report')
    plt.legend([])
    sns.heatmap(df, annot=True, vmin=0, vmax=1, linewidths=.5)
    plt.savefig(str(Config.PLOT_HEAT_MAP_FILE))
    plt.show()
    plt.clf()