In [1]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
import random

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from transformers import T5Config
from transformers import T5ForConditionalGeneration
from transformers import T5Tokenizer

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score



# 1.1 LSTM Model

In [5]:
# Read in model
model = tf.keras.models.load_model('lstm_2')

In [6]:
# Read in data, clean data
train_data = pd.read_csv('train_data.csv')
train_data['title'] = train_data['title'].apply(lambda x: x.replace(u'\xa0',u' '))
train_data['title'] = train_data['title'].apply(lambda x: x.replace('\u200a',' '))

# Set tokenizer for words not in vocab
tokenizer = Tokenizer(oov_token='<oov>')
tokenizer.fit_on_texts(train_data['title'])
total_words = len(tokenizer.word_index) + 1

# Create list of sequences
input_sequences = []
for line in train_data['title']:
    token_list = tokenizer.texts_to_sequences([line])[0]    
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Convert labels to binary predictions
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# Print number of sequences
num_sequences = len(input_sequences)
print("Total input sequences: ", num_sequences)

Total input sequences:  41193


# 1.2 Predict Top Word for LSTM

In [7]:
def lstm_predict(input_string):
    
    token_list = tokenizer.texts_to_sequences([input_string])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted=model.predict(token_list) 
    predicted=np.argmax(predicted,axis=1)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    input_string += " " + output_word
        
    return output_word

In [8]:
lstm_predict("Computational cognitive modeling is")

'like'

# 1.3 Predict Top K Words for LSTM

In [9]:
def predict_top_k(input_string, k):
    
    # Subtract 1 from k (account for not double counting first word)
    k -= 1
    
    # Calculate probabilities for top k words
    token_list = tokenizer.texts_to_sequences([input_string])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    prob = model.predict(token_list)
    predicted = np.argmax(prob,axis=1)    
    top_k_preds = np.argpartition(prob, k)
    top_k_preds = top_k_preds[0][:k]
    
    # Predict most likely word
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            top_word = word
    
    # Predict next k-1 words
    top_k_words = []
    for i in range(k):
        pred_i = top_k_preds[i]
        for word, index in tokenizer.word_index.items():
            if index == pred_i:
                top_k_words.append(word)
    
    # Print results
    #print(f'Best prediction: {top_word}')
    #print(f'Next {k} best predictions: {top_k_words}')
    
    # Add best result to list of words
    top_k_words.insert(0, top_word)
    return top_k_words

In [10]:
predict_top_k("Computational cognitive modeling is", 10)

['like',
 'overthinking',
 'ensure',
 'eat',
 'keyword',
 'surveillance',
 'responses',
 'decide',
 'teach',
 'medical']

# 2.1 Transformer Model

In [11]:
model_name = "allenai/t5-small-next-word-generator-qoogle"
t5_tokenizer = T5Tokenizer.from_pretrained(model_name)
t5_model = T5ForConditionalGeneration.from_pretrained(model_name)

# 2.2 Predict Top Word for Transformer Model

In [12]:
def transformer_prediction(input_string, **generator_args):
    input_ids = t5_tokenizer.encode(input_string, return_tensors="pt")
    res = t5_model.generate(input_ids, **generator_args)
    output = t5_tokenizer.batch_decode(res, skip_special_tokens=True)
    return output[0]

In [13]:
transformer_prediction("Computational cognitive modeling projects are")

'typically'

# 3.1 Evaluate Models on All Test Data

In [25]:
# Read in test data
test_data = pd.read_csv('test_data.csv')
test_titles = test_data['title']

# Collect list of sequences and predicted words
x_list = []
y_list = []
for i in range(len(test_titles)):    
    text_i = test_titles.iloc[i].split(' ')
    seq_i = text_i[:-1]
    
    x_i = ''    
    for j in seq_i:
        x_i += j + ' '
    x_list.append(x_i)
    
    y_i = text_i[-1]
    y_list.append(y_i)
    
# Collect model predictions
lstm_preds = []
trans_preds = []
lstm_counter = 0
trans_counter = 0
for i in range(len(x_list)):

    # Make lowercase
    x_i = x_list[i].lower()
    y_i = y_list[i].lower()
    
    # Get number of correct LSTM predictions
    lstm_pred = lstm_predict(x_i)
    lstm_preds.append(lstm_pred)
    if lstm_pred == y_i:
        lstm_counter += 1
    
    # Get number of correct Transformer predictions
    trans_pred = transformer_prediction(x_i)
    trans_preds.append(trans_pred)
    if trans_pred == y_i:
        trans_counter += 1

In [26]:
# Calculate accuracy, precision, recall, and categorical cross entropy
num_decs = 5
avg_setting = 'weighted'
lstm_accuracy = round((lstm_counter / len(x_list)) * 100, num_decs)
trans_accuracy = round((trans_counter / len(x_list)) * 100, num_decs)
lstm_precision = round(precision_score(y_list, lstm_preds, average=avg_setting), num_decs)
trans_precision = round(precision_score(y_list, trans_preds, average=avg_setting), num_decs)
lstm_recall = round(recall_score(y_list, lstm_preds, average=avg_setting), num_decs)
trans_recall = round(recall_score(y_list, trans_preds, average=avg_setting), num_decs)

# Print results
print(f'LSTM Accuracy: {lstm_accuracy}%')
print(f'Transformer Accuracy: {trans_accuracy}%\n')
print(f'LSTM Precision: {lstm_precision}')
print(f'Transformer Precision: {trans_precision}\n')
print(f'LSTM Recall: {lstm_recall}')
print(f'Transformer Recall: {trans_recall}')

LSTM Accuracy: 5.63525%
Transformer Accuracy: 8.60656%

LSTM Precision: 0.00891
Transformer Precision: 0.01976

LSTM Recall: 0.0123
Transformer Recall: 0.02254


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
###########################
from collections import Counter

In [31]:
lstm_counter = Counter(lstm_preds)
lstm_counter.most_common(10)

[('the', 42),
 ('and', 24),
 ('a', 20),
 ('in', 18),
 ('you', 14),
 ('your', 14),
 ('strong', 12),
 ('with', 10),
 ('to', 9),
 ('learning', 9)]

In [32]:
trans_counter = Counter(trans_preds)
trans_counter.most_common(10)

[('a', 120),
 ('is', 94),
 ('the', 34),
 ('of', 16),
 ('learning', 15),
 ('and', 15),
 ('how', 14),
 ('are', 13),
 ('work', 10),
 ('to', 10)]

# 3.2 Create Subset of Human Sequences

In [16]:
# Pick subset of 10 articles
num_human_sequences = 10
subset = test_data.sample(num_human_sequences)
subset_titles = subset['title']

# Print each title
for title in subset_titles:
    print(title)

In [33]:
subset_titles = ['How Small beat Big',
                 'How to foster a culture of customer obsession',
                 'PVANET: Deep but Lightweight Neural Networks for Real-time Object Detection',
                 'UX candidates are asking the wrong questions',
                 'Smart Farming In Agriculture Sector',
                 'Rosenblatt’s perceptron, the very first neural network',
                 'Adaptive Normalization and Fuzzy Targets — Time Series Forecasting tricks',
                 'Startup India and Job Creation',
                 'New Software: When and How to Implement into Your Business',
                 'My Analysis from 50+ papers on the Application of ML in Credit Lending']

In [34]:
# Collect list of sequences and predicted words
x_list = []
y_list = []
for i in subset_titles:
#for i in range(len(subset_titles)):  
    text_i = i.split(' ')
    #text_i = subset_titles.iloc[i].split(' ')
    seq_i = text_i[:-1]

    x_i = ''    
    for j in seq_i:
        x_i += j + ' '
    x_list.append(x_i)
    
    y_i = text_i[-1]
    y_list.append(y_i)

In [36]:
x_list

['How Small beat ',
 'How to foster a culture of customer ',
 'PVANET: Deep but Lightweight Neural Networks for Real-time Object ',
 'UX candidates are asking the wrong ',
 'Smart Farming In Agriculture ',
 'Rosenblatt’s perceptron, the very first neural ',
 'Adaptive Normalization and Fuzzy Targets — Time Series Forecasting ',
 'Startup India and Job ',
 'New Software: When and How to Implement into Your ',
 'My Analysis from 50+ papers on the Application of ML in Credit ']

In [19]:
# Gather model predictions for subset
lstm_counter = 0
trans_counter = 0
lstm_preds = []
trans_preds = []
for i in range(len(x_list)):
    
    print(f'-----Question {i+1}-----')
    
    # Collect correct targets for given sequence
    x_i = x_list[i].lower()
    y_i = y_list[i].lower()
    
    print(f'Sequence: {x_i}')
    print(f'Target: {y_i}')
    
    # Make LSTM prediction
    lstm_pred = lstm_predict(x_i)
    lstm_preds.append(lstm_pred)
    if lstm_pred == y_i:
        lstm_counter += 1
        
    print(f'LSTM prediction: {lstm_pred}')
    
    # Make Transfromer prediction
    trans_pred = transformer_prediction(x_i)
    trans_preds.append(trans_pred)
    if trans_pred == y_i:
        trans_counter += 1
        
    print(f'Transformer prediction: {trans_pred}\n')

-----Question 1-----
Sequence: how small beat 
Target: big
LSTM prediction: big
Transformer prediction: a

-----Question 2-----
Sequence: how to foster a culture of customer 
Target: obsession
LSTM prediction: obsession
Transformer prediction: service

-----Question 3-----
Sequence: pvanet: deep but lightweight neural networks for real-time object 
Target: detection
LSTM prediction: detection
Transformer prediction: oriented

-----Question 4-----
Sequence: ux candidates are asking the wrong 
Target: questions
LSTM prediction: questions
Transformer prediction: answer

-----Question 5-----
Sequence: smart farming in agriculture 
Target: sector
LSTM prediction: sector
Transformer prediction: is

-----Question 6-----
Sequence: rosenblatt’s perceptron, the very first neural 
Target: network
LSTM prediction: network
Transformer prediction: network

-----Question 7-----
Sequence: adaptive normalization and fuzzy targets — time series forecasting 
Target: tricks
LSTM prediction: money
Transfor

In [20]:
# Calculate accuracy, precision, recall, and categorical cross entropy
num_decs = 5
avg_setting = 'weighted'
lstm_accuracy = round((lstm_counter / len(x_list)) * 100, num_decs)
trans_accuracy = round((trans_counter / len(x_list)) * 100, num_decs)
lstm_precision = round(precision_score(y_list, lstm_preds, average=avg_setting), num_decs)
trans_precision = round(precision_score(y_list, trans_preds, average=avg_setting), num_decs)
lstm_recall = round(recall_score(y_list, lstm_preds, average=avg_setting), num_decs)
trans_recall = round(recall_score(y_list, trans_preds, average=avg_setting), num_decs)

# Print results
print(f'LSTM Accuracy: {lstm_accuracy}%')
print(f'Transformer Accuracy: {trans_accuracy}%\n')
print(f'LSTM Precision: {lstm_precision}')
print(f'Transformer Precision: {trans_precision}\n')
print(f'LSTM Recall: {lstm_recall}')
print(f'Transformer Recall: {trans_recall}')

LSTM Accuracy: 90.0%
Transformer Accuracy: 10.0%

LSTM Precision: 0.3
Transformer Precision: 0.1

LSTM Recall: 0.3
Transformer Recall: 0.1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# 3.3 Human Sequence Options

In [21]:
# Create list of options for human participants to choose from
k = 10
subset_options = []
for i in range(len(subset_titles)):
    x_i = x_list[i]
    y_i = y_list[i]
    lstm_top_10 = predict_top_k(x_i, k)
    
    # Correct prediction
    if y_i in lstm_top_10:
        random.shuffle(lstm_top_10)            
        subset_options.append(lstm_top_10)
    
    # Incorrect prediction
    else:
        lstm_top_10 = predict_top_k(x_i, k-1)
        lstm_top_10.append(y_i)
        random.shuffle(lstm_top_10)
        subset_options.append(lstm_top_10)

In [22]:
# Save as dataframe
df_choices = pd.DataFrame(subset_titles)
df_choices['sequence'] = x_list
df_choices['target'] = y_list
df_choices['choices'] = subset_options
df_choices.to_csv('human_sequence_options.csv')
df_choices

Unnamed: 0,0,sequence,target,choices
0,How Small beat Big,How Small beat,Big,"[big, spice, managing, glorified, unclog, put,..."
1,How to foster a culture of customer obsession,How to foster a culture of customer,obsession,"[answer, reinventing, breaking, inspire, obses..."
2,PVANET: Deep but Lightweight Neural Networks f...,PVANET: Deep but Lightweight Neural Networks f...,Detection,"[compete, sustainability, inflation, rescue, c..."
3,UX candidates are asking the wrong questions,UX candidates are asking the wrong,questions,"[quitting, features, questions, comparison, im..."
4,Smart Farming In Agriculture Sector,Smart Farming In Agriculture,Sector,"[accomplish, staff, Sector, dirty, hadoop, fak..."
5,"Rosenblatt’s perceptron, the very first neural...","Rosenblatt’s perceptron, the very first neural",network,"[quantitative, hadoop, spring, bottom, buy, ef..."
6,Adaptive Normalization and Fuzzy Targets — Tim...,Adaptive Normalization and Fuzzy Targets — Tim...,tricks,"[meetings, compete, surveillance, mvp, nearest..."
7,Startup India and Job Creation,Startup India and Job,Creation,"[supply, storytelling, breath, perks, japan, c..."
8,New Software: When and How to Implement into Y...,New Software: When and How to Implement into Y...,Business,"[business, controversy, damn, Business, key, s..."
9,My Analysis from 50+ papers on the Application...,My Analysis from 50+ papers on the Application...,Lending,"[founders, fashion, those, lending, body, alte..."


In [23]:
# Make sure that all options include the correct answer
for i in range(len(df_choices)):
    target_i = df_choices['target'].iloc[i]
    choices = df_choices['choices'].iloc[i]
    print(f'\nTarget {i}: {target_i}')
    print('Choices:')
    for j in choices:
        print(j)


Target 0: Big
Choices:
big
spice
managing
glorified
unclog
put
difference
promotion
Big
finish

Target 1: obsession
Choices:
answer
reinventing
breaking
inspire
obsession
managing
extraordinary
nginx
radically
keras

Target 2: Detection
Choices:
compete
sustainability
inflation
rescue
childhood
catch
stuck
medical
Detection
detection

Target 3: questions
Choices:
quitting
features
questions
comparison
improving
leaving
action
node
i’m
ask

Target 4: Sector
Choices:
accomplish
staff
Sector
dirty
hadoop
fake
said
sector
exploratory
studying

Target 5: network
Choices:
quantitative
hadoop
spring
bottom
buy
efficient
12
morning
higher
network

Target 6: tricks
Choices:
meetings
compete
surveillance
mvp
nearest
money
biggest
share
tricks
level

Target 7: Creation
Choices:
supply
storytelling
breath
perks
japan
creation
improving
launching
shit
Creation

Target 8: Business
Choices:
business
controversy
damn
Business
key
shouldn’t
supply
sure
walt
overview

Target 9: Lending
Choices:
founder

# 3.4: Human Evaluation as Gold Standard

(In Report)