In [41]:
import numpy as np
import string
import nltk
import typing

from tqdm.notebook import tqdm
from tensorflow.keras.preprocessing.text import Tokenizer, tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Embedding, LSTM, Dense, SpatialDropout1D, Conv1D, MaxPool1D, SimpleRNN, Bidirectional, Reshape, Conv2D, MaxPooling2D, TimeDistributed
from gensim.models import Word2Vec
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

In [37]:
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import nltk
import json

In [55]:
max_len = 200
embedding_dim = 200

# Loading pretrained tokenizer
with open('tokenizer.json') as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)
word_index = tokenizer.word_index

# Loading embedding matrix
with open('embedding.in.npy', 'rb') as f:
    embedding_matrix = np.load(f)

In [126]:
# Initializing model
model = Sequential([
    Embedding(input_dim=len(word_index) + 1, 
              output_dim=embedding_dim, 
              weights=[embedding_matrix], 
              input_length=max_len, 
              trainable=False),
    Conv1D(filters=20, kernel_size = 3, activation="relu", padding = 'same',
           kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    MaxPool1D(strides = 2),
    Dropout(rate = 0.2, seed = 42),
    Conv1D(filters=30, kernel_size = 3, activation="relu", padding = 'same',
           kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    MaxPool1D(strides = 2),
    Dropout(rate = 0.2, seed = 42),
    LSTM(40, dropout=0.2, recurrent_dropout=0.2,
         kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    Dense(5, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.build(input_shape=(None, 200))
model.summary()

regr_model = Sequential([
    Embedding(input_dim=len(word_index) + 1, 
              output_dim=embedding_dim, 
              weights=[embedding_matrix], 
              input_length=max_len, 
              trainable=False),
    Conv1D(filters=20, kernel_size = 3, activation="relu", padding = 'same',
           kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    MaxPool1D(strides = 2),
    Dropout(rate = 0.2, seed = 42),
    Conv1D(filters=30, kernel_size = 3, activation="relu", padding = 'same',
           kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    MaxPool1D(strides = 2),
    Dropout(rate = 0.2, seed = 42),
    LSTM(40, dropout=0.2, recurrent_dropout=0.2, 
         kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.L2(1e-4)),
    Dense(1, activation='linear')
])
regr_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
regr_model.build(input_shape=(None, 200))
regr_model.summary()

In [127]:
# Loading trained weights
model.load_weights('cnn_lstm.weights.h5')
regr_model.load_weights('cnn_lstm_regr.weights.h5')

In [130]:
def main():
    text = input("Input your text: ")
    text_seq = tokenizer.texts_to_sequences([text])
    text_pad = pad_sequences(text_seq, maxlen = max_len)
    print(f'Predicted label (classification): {model.predict(text_pad, verbose = 0).argmax() + 1}')
    print(f'Predicted label (regression): {max(0, min(4, int(np.round(regr_model.predict(text_pad, verbose = 0)[0][0])))) + 1}')

In [133]:
main()

Input your text: I bought mine used but payed for the premium and its still came with a few scratches but still a good phone!
Predicted label (classification): 4
Predicted label (regression): 3
