# 1. Import Packages and Libraries

In [288]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Embedding
import keras.backend as K
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix

import scipy
import pandas as pd
import numpy as np
import gensim

import nltk
from nltk.data import find
import matplotlib.pyplot as plt
import shap

import pickle
import random
import multiprocessing

# 2. Read in Dataset + Create Train/Test Set

In [218]:
sample_dataset = pd.read_csv('Language_Detection/Train_Test_Data/train.csv')[['Lyric','language label']].iloc[:5000]
print('Before Remapping')
display(sample_dataset['language label'].value_counts())
print('After Remapping')
sample_dataset['language label'] = sample_dataset['language label'].apply(lambda x: x if x in ['English','Portuguese','Spanish'] else 'Other')
display(sample_dataset['language label'].value_counts())
train_set = sample_dataset.iloc[:4000]
val_set = sample_dataset.iloc[4000:4500]
test_set = sample_dataset.iloc[4500:]

Before Remapping


English        2693
Portuguese     2073
Spanish         148
Italian          25
Kinyarwanda      22
Other            14
German           14
French           11
Name: language label, dtype: int64

After Remapping


English       2693
Portuguese    2073
Spanish        148
Other           86
Name: language label, dtype: int64

# 3. Resample (Oversample on Minority Classes) Training Set to Deal with Class Imbalance

In [219]:
random.seed(50)
max_class_counts = train_set['language label'].value_counts().iloc[0]
resampled_train_set = pd.DataFrame()
for lang in train_set['language label'].unique():
    subset = train_set[train_set['language label'] == lang].copy()
    if len(subset) == max_class_counts:
        resampled_train_set = pd.concat([resampled_train_set,subset],ignore_index=True)
    else:
        added_subset = subset.iloc[random.choices(np.arange(0,len(subset)),k=max_class_counts - len(subset))]
        resampled_train_set = pd.concat([resampled_train_set,subset,added_subset],ignore_index=True)
        
display(resampled_train_set)

Unnamed: 0,Lyric,language label
0,Uma menina me ensinou\nQuase tudo que eu sei\n...,Portuguese
1,Foram lá fora buscar\nComo atração singular\nD...,Portuguese
2,"Sei que vou morrer,\nNão sei o dia,\nLevarei s...",Portuguese
3,Facção central\nO poder que eu não quero\n\n\n...,Portuguese
4,Ele não me esquece / Sabe o meu nome!\nEle é o...,Portuguese
...,...,...
8651,"La lluvia de tu corazón, desaparecerá,\nla llu...",Spanish
8652,"Hoy, lo pude ver\nmurió este amor, lo vi en tu...",Spanish
8653,"Comenzó la fiesta\nTe enciende, te enciende\nT...",Spanish
8654,"Pon tu lengua bajo la mía,\nEl silencio dice m...",Spanish


# 4. Term Density Transformation of Text Data

In [220]:
resampled_train_set['language label'].value_counts()

Portuguese    2164
English       2164
Other         2164
Spanish       2164
Name: language label, dtype: int64

In [221]:
def preprocess_text(text):
    text = text.lower()
    text = text.replace('\n', ' ')
    text = text.replace('  ',' ')
    return text

vectorizer = CountVectorizer(preprocessor=preprocess_text)

In [222]:
#Vectorize Train Lyrics
train_lyrics = vectorizer.fit_transform(resampled_train_set['Lyric'])
train_lyrics = pd.DataFrame(train_lyrics.todense(),columns = vectorizer.get_feature_names())
train_lyrics_token_count = train_lyrics.sum(axis=1)
train_lyrics = train_lyrics/np.array(train_lyrics_token_count.repeat(len(train_lyrics.columns))).reshape(train_lyrics.shape)

#Vectorize Val Lyrics
val_lyrics = vectorizer.transform(val_set['Lyric'])
val_lyrics = pd.DataFrame(val_lyrics.todense(),columns = vectorizer.get_feature_names())
val_lyrics_token_count = val_lyrics.sum(axis=1)
val_lyrics = val_lyrics/np.array(val_lyrics_token_count.repeat(len(val_lyrics.columns))).reshape(val_lyrics.shape)

#Vectorize Test Lyrics
test_lyrics = vectorizer.transform(test_set['Lyric'])
test_lyrics = pd.DataFrame(test_lyrics.todense(),columns = vectorizer.get_feature_names())
test_lyrics_token_count = test_lyrics.sum(axis=1)
test_lyrics = test_lyrics/np.array(test_lyrics_token_count.repeat(len(test_lyrics.columns))).reshape(test_lyrics.shape)

train_labels = resampled_train_set['language label']
val_labels = val_set['language label']
test_labels = test_set['language label']

In [223]:
test_labels.value_counts()

English       272
Portuguese    211
Spanish        13
Other           4
Name: language label, dtype: int64

#### Mapping to map text labels to numeric labels

In [224]:
mapping = {}
count = 0
for label in train_labels.unique():
    mapping[label] = count
    count = count + 1

# 5. Quick Evaluation of Classical ML Models

In [455]:
def optimal_model_id(xtrain,xval,xtest,ytrain,yval,ytest,estimator,param_grid,metric='accuracy'):
    
    #Concatenate training and validation data
    train_val_feats = pd.concat([xtrain,xval],ignore_index=True)
    train_val_labels = pd.concat([ytrain,yval],ignore_index=True)
    #Instantiate Grid Search with model and param grid to ID which hyperparameter combo enables the model to generalize
    #best on the validation set
    grid = GridSearchCV(estimator = estimator, param_grid= param_grid,
                        scoring=metric,cv=[(np.arange(0,len(xtrain)),np.arange(len(xtrain),len(train_val_feats)))])

    grid.fit(train_val_feats,train_val_labels.map(mapping))
    
    #Store Best Performing Model Output
    best_estimator = grid.best_estimator_
    best_val_score = grid.best_score_
    
    #Predictions on test set with optimal model
    test_preds = best_estimator.predict(xtest)
    #performance on test set
    oos_score = accuracy_score(ytest.map(mapping),test_preds)
    label_options = list(ytest.unique())
    
    #Confustion matrix of true for predicted values on the test set
    confuse = pd.DataFrame(confusion_matrix(ytest.map(mapping),test_preds),index = label_options,columns = label_options)
    
    #return optimal model results
    return {'best_estimator':best_estimator,
           'best_val_score':best_val_score,
           'best_test_score':oos_score,
           'metric':metric,
           'test_set_confusion_matrix':confuse}

#### KNN Test

In [226]:
test = optimal_model_id(train_lyrics,val_lyrics,test_lyrics,train_labels,val_labels,test_labels,
                KNeighborsClassifier(),{'n_neighbors':[1,3,5,7,9]},'accuracy')
display(test)
display(test['test_set_confusion_matrix'])

{'best_estimator': KNeighborsClassifier(),
 'best_val_score': 0.988,
 'best_test_score': 0.996,
 'metric': 'accuracy',
 'test_set_confusion_matrix':             Portuguese  English  Spanish  Other
 Portuguese         211        0        0      0
 English              1      271        0      0
 Spanish              0        0        4      0
 Other                0        1        0     12}

Unnamed: 0,Portuguese,English,Spanish,Other
Portuguese,211,0,0,0
English,1,271,0,0
Spanish,0,0,4,0
Other,0,1,0,12


#### XGBoost Classifier Test

In [227]:
test = optimal_model_id(train_lyrics,val_lyrics,test_lyrics,train_labels,val_labels,test_labels,
                XGBClassifier(),{'max_depth':[2,3,4],'max_features':['auto'],'n_estimators':[10]},'accuracy')
display(test)
display(test['test_set_confusion_matrix'])

Parameters: { "max_features" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "max_features" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "max_features" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "max_features" } might not be

{'best_estimator': XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
               colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
               early_stopping_rounds=None, enable_categorical=False,
               eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
               importance_type=None, interaction_constraints='',
               learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
               max_delta_step=0, max_depth=4, max_features='auto', max_leaves=0,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=10, n_jobs=0, num_parallel_tree=1,
               objective='multi:softprob', predictor='auto', random_state=0, ...),
 'best_val_score': 0.988,
 'best_test_score': 0.974,
 'metric': 'accuracy',
 'test_set_confusion_matrix':             Portuguese  English  Spanish  Other
 Portuguese         206        0        5      0
 English              0      266        5      1

Unnamed: 0,Portuguese,English,Spanish,Other
Portuguese,206,0,5,0
English,0,266,5,1
Spanish,0,0,4,0
Other,0,1,1,11


# 6. Basic Feedforward NN w/ Keras Sequential API

#### Input goes sequentially from one hidden layer to the next "left to right"

In [456]:
#Define Model Architecture Sequentially
model = keras.Sequential([
    keras.layers.Dense(100,activation='relu'),
    keras.layers.Dense(100,activation='relu'),
    keras.layers.Dense(4,activation='softmax')
])

#Compile the model, specifying loss function, optimizer, and performance metric
model.compile(loss = keras.losses.SparseCategoricalCrossentropy(),
             optimizer = keras.optimizers.Adam(learning_rate=0.01),
             metrics=['accuracy'],
             )

#Fit model and validate on val set between epochs, set multiprocessing
model.fit(x = np.array(train_lyrics),y = train_labels.map(mapping),batch_size=8,epochs=2,
         validation_data=(np.array(val_lyrics),val_labels.map(mapping)),
         use_multiprocessing=True,workers=multiprocessing.cpu_count() - 1)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f8fcc621df0>

In [457]:
preds = model.predict(np.array(test_lyrics))



In [458]:
accuracy_score(test_labels.map(mapping),[x.argmax() for x in preds])

0.996

In [459]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_27 (Dense)            (None, 100)               3958200   
                                                                 
 dense_28 (Dense)            (None, 100)               10100     
                                                                 
 dense_29 (Dense)            (None, 4)                 404       
                                                                 
Total params: 3,968,704
Trainable params: 3,968,704
Non-trainable params: 0
_________________________________________________________________


# 7. Word Embedding Based Models That Build Vector Representation of Input, Captures General Meaning Before Pass into Feed Forward NN

#### Build Embedding Matrix

In [460]:
word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False)

#construct embedding matrix w/ prebuilt embedding
vocab_dict = model.key_to_index.copy()
embedding_matrix = np.zeros((43982,300))
for word,index in model.key_to_index.items():
    embedding_matrix[index] = model[word]

#Construct custom embedding matrix for this task
vocab_dict_custom = {}
count = 0
for word in vectorizer.get_feature_names():
    vocab_dict_custom[word] = count
    count = count + 1
embedding_matrix_custom = np.random.random((len(vectorizer.get_feature_names()) + 1,300))
embedding_matrix_custom[-1] = 0

#### Map tokens in train, val, test set to row in embedding matrices for both word2vec and custom embedding matrix

In [461]:
def text_to_index(text_data,mapping,max_size):
    return_data = []
    for text in text_data:
        new_text = text.lower()
        new_text = text.replace('\n',' ')
        new_text = text.replace('  ',' ')
        new_text = new_text.split()
        mapped_text = []
        for token in new_text:
            try:
                mapped_text.append(mapping[token])
            except:
                mapped_text.append(len(mapping))
        
        if len(mapped_text) > max_size:
            mapped_text = mapped_text[:max_size]
        else:
            while len(mapped_text) < max_size:
                mapped_text.append(len(mapping))
                
        return_data.append(mapped_text)
    
    return return_data

In [462]:
train_tokens_prebuilt = text_to_index(resampled_train_set['Lyric'],vocab_dict,1000)
train_tokens_custom = text_to_index(resampled_train_set['Lyric'],vocab_dict_custom,1000)

val_tokens_prebuilt = text_to_index(val_set['Lyric'],vocab_dict,1000)
val_tokens_custom = text_to_index(val_set['Lyric'],vocab_dict_custom,1000)

test_tokens_prebuilt = text_to_index(test_set['Lyric'],vocab_dict,1000)
test_tokens_custom = text_to_index(test_set['Lyric'],vocab_dict_custom,1000)

### Deep Averaging Network (DAN) w/ Functional Keras API and Custom Embedding Matrix

In [478]:
def create_dan_model(retrain_embeddings=False, 
                     max_sequence_length=1000,
                     embedding_matrix=embedding_matrix_custom, 
                     hidden_dim=[100,100,100],
                     dropout_rate=0.3,
                     hidden_layer_activation = 'relu',
                     output_layer_size = 4,
                     output_activation = 'softmax',
                     learning_rate=0.001):
    """
    Construct the DAN model including the compilation and return it. Parametrize it using the arguments.
    retrain_embeddings: bool, indicates whether embeddings are retrainable
    max_sequence_length: Number of token IDs to expect in a given input
    embedding_matrix: initialize embedding layer with embedding matrix, specifying weights
    hidden_dim = number of neurons in hidden layers
    dropout = dropout rate
    output_layer_size = # of neurons in output layer corresponding to # of classes, each neuron predicts P(class K | x)
    output_activation = activation function for output layer
    learning_rate = learning rate for gradient descent for finding model params to optimize loss
    """
    
    #Specify Embedding Layer, including shape, intialize with weights, expected input length, and whether it is trainable
    dan_embedding_layer = Embedding(embedding_matrix.shape[0],
                                  embedding_matrix.shape[1],
                                  weights = [embedding_matrix],
                                  input_length=max_sequence_length,
                                  trainable=retrain_embeddings,
                                   name = 'embedding_layer')
    
    
    #Input Layer, sequence of max_sequence_length tokens
    dan_input_layer = tf.keras.layers.Input(shape=(max_sequence_length,), dtype='int64',name='input')
    #Inputs go into embedding layer, form max_sequence_length x embedding dim matrix
    dan_embeddings = dan_embedding_layer(dan_input_layer)
    #Embeddings are averaged, forming single vector represenation of size embedding matrix
    dan_avg_input_embeddings = tf.keras.layers.Lambda(lambda x: K.mean(x, axis=1), name='averaging')(dan_embeddings)
    
    #input into hidden layers
    x = dan_avg_input_embeddings #hidden layer initial input
    count = 1
    for layer in hidden_dim:
        hidden = tf.keras.layers.Dense(layer,activation = hidden_layer_activation,name='hidden_' + str(count))(x)
        dropout = tf.keras.layers.Dropout(dropout_rate,name='dropout_' + str(count))(hidden)
        count = count + 1
        x = dropout
        
    #dan_hidden_out_1 = tf.keras.layers.Dense(hidden_dim, activation='relu', name='hidden_1')(dan_avg_input_embeddings)
    #dan_hidden_out_1 = tf.keras.layers.Dropout(dropout)(dan_hidden_out_1)
    dan_classification = tf.keras.layers.Dense(output_layer_size, activation='softmax', name='dan_classification')(x)
    dan_model = tf.keras.models.Model(inputs=dan_input_layer, outputs=[dan_classification])
    dan_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                                beta_1=0.9,
                                                beta_2=0.999,
                                                epsilon=1e-07,
                                                amsgrad=False,
                                                name='Adam'),
                 metrics='accuracy')
    
    print(dan_model.summary())

    return dan_model

In [479]:
dan_model_sorted = create_dan_model(retrain_embeddings=True,embedding_matrix=embedding_matrix_custom)
dan_sorted_history = dan_model_sorted.fit(np.array(train_tokens_custom),
                        np.array(train_labels.map(mapping)),
                        validation_data=(np.array(val_tokens_custom), np.array(val_labels.map(mapping))),
                        batch_size=8,
                        epochs=2,
                        shuffle=True,
                        use_multiprocessing=True,workers=multiprocessing.cpu_count() - 1)

Model: "model_39"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 1000)]            0         
                                                                 
 embedding_layer (Embedding)  (None, 1000, 300)        11874600  
                                                                 
 averaging (Lambda)          (None, 300)               0         
                                                                 
 hidden_1 (Dense)            (None, 100)               30100     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 hidden_2 (Dense)            (None, 100)               10100     
                                                                 
 dropout_2 (Dropout)         (None, 100)               0  

In [480]:
accuracy_score(test_labels.map(mapping),[x.argmax() for x in dan_model_sorted.predict(test_tokens_custom)])



0.97

In [481]:
dan_model_sorted.weights[0].shape

TensorShape([39582, 300])

### Weighted Attention Network (WAN) with Custom Embeddings, allows for computation of multiple attention based representations of input before a final attention layer learns how to balance attention vectors from prior layer

In [557]:
def create_wan_model(retrain_embeddings=False, 
                     max_sequence_length=1000,
                     embedding_matrix=embedding_matrix_custom,
                     num_attention = 1,
                     hidden_dim=[100,100,100],
                     dropout_rate=0.3,
                     hidden_layer_activation = 'relu',
                     output_layer_size = 4,
                     output_activation = 'softmax',
                     learning_rate=0.001):
    """
    Construct the WAN model including the compilation and return it. Parametrize it using the arguments.
    retrain_embeddings: bool, indicates whether embeddings are retrainable
    max_sequence_length: Number of token IDs to expect in a given input
    embedding_matrix: initialize embedding layer with embedding matrix, specifying weights
    num_attention = number of parallel attention computations that learn how to balance embeddings into a single
    vector representation, final attention layer weights prior attention based representations
    hidden_dim = number of neurons in hidden layers
    dropout = dropout rate
    output_layer_size = # of neurons in output layer corresponding to # of classes, each neuron predicts P(class K | x)
    output_activation = activation function for output layer
    learning_rate = learning rate for gradient descent for finding model params to optimize loss
    """
    
    #Specify Embedding Layer, including shape, intialize with weights, expected input length, and whether it is trainable
    wan_embedding_layer = Embedding(embedding_matrix.shape[0],
                                  embedding_matrix.shape[1],
                                  weights = [embedding_matrix],
                                  input_length=max_sequence_length,
                                  trainable=retrain_embeddings,
                                   name = 'embedding_layer')
    
    
    #Input Layer, sequence of max_sequence_length tokens
    wan_input_layer = tf.keras.layers.Input(shape=(max_sequence_length,), dtype='int64',name='input')
    #Inputs go into embedding layer, form max_sequence_length x embedding dim matrix
    wan_embeddings = wan_embedding_layer(wan_input_layer)
    
    #Create attention based single vector representations of words according to alternative query vectors
    attention_embeddings = []
    for num in range(num_attention):
        #Apply Query Vector to words in embeddings, returning a max_sequence_length x 1 tensor
        l1_query = tf.keras.layers.Dense(1,activation='linear',use_bias=False,name='attention_query' + str(num+1))(wan_embeddings)
        #reshape to 1 x max_sequence_length
        l1_reshape_query = tf.keras.layers.Reshape((1,max_sequence_length))(l1_query)
        #Softmax over query * key (words) to obtain weights
        l1_weights = tf.keras.layers.Lambda(lambda x:tf.keras.activations.softmax(x),
                                            name='attention_weights' + str(num+1))(l1_reshape_query)
        #weight embeddings according to weights
        l1_attention = tf.keras.layers.Flatten()(tf.keras.layers.Dot((1,2))((wan_embeddings,l1_weights)))
        attention_embeddings.append(l1_attention)
    
    concat_attention = tf.keras.layers.Concatenate()(attention_embeddings)
    concat_attention = tf.keras.layers.Reshape((num_attention,embedding_matrix.shape[1]))(concat_attention)
    
    #Apply Query Vector to attention based representations, returning a num_attention x 1 tensor
    wan_query = tf.keras.layers.Dense(1,activation='linear',use_bias=False,name='attention_query')(concat_attention)
    #reshape to 1 x num_attention
    reshaped_query = tf.keras.layers.Reshape((1,num_attention))(wan_query)
    #Softmax over query * key (words) to obtain weights
    wan_weights = tf.keras.layers.Lambda(lambda x:tf.keras.activations.softmax(x),
                                        name='attention_weights')(reshaped_query)
    #weight attention embeddings according to weights, learning how to balance attention based vector representations 
    #from prior layer
    wan_attention = tf.keras.layers.Flatten()(tf.keras.layers.Dot((1,2))((concat_attention,wan_weights)))
    
    #input into hidden layers
    x = wan_attention #hidden layer initial input
    count = 1
    for layer in hidden_dim:
        hidden = tf.keras.layers.Dense(layer,activation = hidden_layer_activation,name='hidden_' + str(count))(x)
        dropout = tf.keras.layers.Dropout(dropout_rate,name='dropout_' + str(count))(hidden)
        count = count + 1
        x = dropout
        
    #wan_hidden_out_1 = tf.keras.layers.Dense(hidden_dim, activation='relu', name='hidden_1')(wan_avg_input_embeddings)
    #wan_hidden_out_1 = tf.keras.layers.Dropout(dropout)(wan_hidden_out_1)
    wan_classification = tf.keras.layers.Dense(output_layer_size, activation='softmax', name='wan_classification')(x)
    wan_model = tf.keras.models.Model(inputs=wan_input_layer, outputs=[wan_classification])
    wan_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                                beta_1=0.9,
                                                beta_2=0.999,
                                                epsilon=1e-07,
                                                amsgrad=False,
                                                name='Adam'),
                 metrics='accuracy')
    
    print(wan_model.summary())

    return wan_model


In [556]:
wan_model_sorted = create_wan_model(retrain_embeddings=True,embedding_matrix=embedding_matrix_custom,
                                   num_attention=5)
wan_sorted_history = wan_model_sorted.fit(np.array(train_tokens_custom),
                        np.array(train_labels.map(mapping)),
                        validation_data=(np.array(val_tokens_custom), np.array(val_labels.map(mapping))),
                        batch_size=8,
                        epochs=2,
                        shuffle=True)

Model: "model_64"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 1000)]       0           []                               
                                                                                                  
 embedding_layer (Embedding)    (None, 1000, 300)    11874600    ['input[0][0]']                  
                                                                                                  
 attention_query1 (Dense)       (None, 1000, 1)      300         ['embedding_layer[0][0]']        
                                                                                                  
 attention_query2 (Dense)       (None, 1000, 1)      300         ['embedding_layer[0][0]']        
                                                                                           

 dropout_1 (Dropout)            (None, 100)          0           ['hidden_1[0][0]']               
                                                                                                  
 hidden_2 (Dense)               (None, 100)          10100       ['dropout_1[0][0]']              
                                                                                                  
 dropout_2 (Dropout)            (None, 100)          0           ['hidden_2[0][0]']               
                                                                                                  
 hidden_3 (Dense)               (None, 100)          10100       ['dropout_2[0][0]']              
                                                                                                  
 dropout_3 (Dropout)            (None, 100)          0           ['hidden_3[0][0]']               
                                                                                                  
 wan_class

In [559]:
accuracy_score(test_labels.map(mapping),[x.argmax() for x in wan_model_sorted.predict(test_tokens_custom)])



0.98