In [64]:
# import data process tools
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
import pandas as pd
# import neural network tools
# tensorflow version: 2.10.1
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
# import mathematic tools
import numpy as np
# import illustration tools
import matplotlib.pyplot as plt
# import other python modules
import random
import os
from itertools import chain
from functools import partial
import ast

[nltk_data] Downloading package punkt to D:\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [65]:
# initial settings
seed = 100
tf.random.set_seed(seed)

In [66]:
# define functions
def convolution_predict(token, model, window, mode=0):
    # mode 0: return subtext that has closest score to text for binary label
    # mode 1: return subtext that has highest score in same direction with text for binary label
    # mode 2: return subtext that has closest score to text for multi label
    # mode 3: return subtext that has highest score in same direction with text for multi label
    text = ' '.join(token)
    token_windows = [token[i:i+window] for i in range(len(token)-window+1)]
    text_windows = list(map(lambda x: ' '.join(x), token_windows))
    prediction = model.predict([text])[0]
    window_prediction = model.predict(text_windows)
    if mode == 0:
        result = np.argmin(np.sum(np.absolute(window_prediction - prediction), axis=1))
    elif mode == 1:
        result = np.argmax(np.sum(window_prediction * prediction, axis=1))
    elif mode == 2:
        result = np.argmin(np.sum(np.absolute(window_prediction - prediction), axis=1))
    elif mode == 3:
        result = np.argmax(window_prediction[:, np.argmax(prediction)])
    # get window of subtext of best similarity to original text
    cause = text_windows[result]
    return cause, (np.argmax(prediction) - 3)

def get_formality(label):
    if label > 0:
        return 'formal'
    elif label < 0:
        return 'informal'
    else:
        return 'natural'

In [67]:
# get data
example_data = pd.read_excel('tokenized_answers.xlsx')[['text', 'token', 'score']]
example_data['token'] = example_data['token'].map(ast.literal_eval)

In [68]:
# get model
model = tf.keras.models.load_model(os.getcwd() + "/model/structure")
model.load_weights(os.getcwd() + "/model/weights")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x21c93f19de0>

In [69]:
# predict formality for a piece of text
sample_index = random.sample(range(len(example_data['text'])), 1)[0]
sample_text = example_data['text'][sample_index]
sample_token = example_data['token'][sample_index]
sample_score = example_data['score'][sample_index]
prediction = np.argmax(model.predict([sample_text])) - 3
print(model.predict([sample_text]))
print('The formality predicted for text "' + sample_text + '" is: ' + str(prediction))
print('The actual score is: ' + str(sample_score))

[[8.9285839e-01 3.9258625e-02 6.1685503e-02 4.3453872e-03 1.7473558e-03
  9.8729790e-05 6.0565035e-06]]
The formality predicted for text "3 ) You ' re complaining about your stupid mistake" is: -3
The actual score is: -2.6


In [70]:
interpret = convolution_predict(sample_token, model, max(int(len(sample_token)/2),1), mode=3)
print('''The text "''' + sample_text + '''" is ''' + get_formality(int(interpret[1])) + ' as it contains: ')
print(interpret[0])

The text "3 ) You ' re complaining about your stupid mistake" is informal as it contains: 
) You ' re complaining


In [85]:
def trying(user_input=None):
    if user_input == None:
        user_input = input('Please input a piece of text, to check formality: ')
    tokens = word_tokenize(user_input)
    interpret = convolution_predict(tokens, model, max(int(len(tokens)/2),1), mode=3)
    print('''The text "''' + user_input + '''" is ''' + get_formality(int(interpret[1])) + ' (' + str(int(interpret[1])) + ') as it contains: ')
    print(interpret[0])

In [86]:
# good performance to classify informal
trying("yeah no big deal w u")

The text "yeah no big deal w u" is informal (-2) as it contains: 
big deal w


In [87]:
# good performance to classify natural
trying("It is not a big deal.")

The text "It is not a big deal." is natural (0) as it contains: 
It is not


In [88]:
# not good performance to classify formal
trying("From previous lemma in section 3.1, the authors can come to the conclusion that such a founding should not be a worrying sign. ")

The text "From previous lemma in section 3.1, the authors can come to the conclusion that such a founding should not be a worrying sign. " is natural (0) as it contains: 
to the conclusion that such a founding should not be a worrying


In [89]:
# It's your turn
trying()

Please input a piece of text, to check formality: test
The text "test" is informal (-3) as it contains: 
test
