## Dependencies

In [1]:
import warnings
from tensorflow_hub import KerasLayer
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D, SpatialDropout1D
from googleqa_utilityscript import *
from googleqa_map_utilityscript import *
import bert_tokenization as tokenization


SEED = 0
seed_everything(SEED)
warnings.filterwarnings("ignore")

## Load data

In [2]:
model_path = '/kaggle/input/109-googleq-a-train-bert-base-no-dropout/model.h5'
BERT_PATH = '/kaggle/input/tf-hub-bert-base/bert_base_uncased'
TOKENIZER_PATH = BERT_PATH+'/assets/vocab.txt'

test = pd.read_csv('/kaggle/input/google-quest-challenge/test.csv')

print('Test samples: %s' % len(test))
display(test.head())

Test samples: 476


Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host
0,39,Will leaving corpses lying around upset my pri...,I see questions/information online about how t...,Dylan,https://gaming.stackexchange.com/users/64471,There is no consequence for leaving corpses an...,Nelson868,https://gaming.stackexchange.com/users/97324,http://gaming.stackexchange.com/questions/1979...,CULTURE,gaming.stackexchange.com
1,46,Url link to feature image in the portfolio,I am new to Wordpress. i have issue with Featu...,Anu,https://wordpress.stackexchange.com/users/72927,I think it is possible with custom fields.\n\n...,Irina,https://wordpress.stackexchange.com/users/27233,http://wordpress.stackexchange.com/questions/1...,TECHNOLOGY,wordpress.stackexchange.com
2,70,"Is accuracy, recoil or bullet spread affected ...","To experiment I started a bot game, toggled in...",Konsta,https://gaming.stackexchange.com/users/37545,You do not have armour in the screenshots. Thi...,Damon Smithies,https://gaming.stackexchange.com/users/70641,http://gaming.stackexchange.com/questions/2154...,CULTURE,gaming.stackexchange.com
3,132,Suddenly got an I/O error from my external HDD,I have used my Raspberry Pi as a torrent-serve...,robbannn,https://raspberrypi.stackexchange.com/users/17341,Your Western Digital hard drive is disappearin...,HeatfanJohn,https://raspberrypi.stackexchange.com/users/1311,http://raspberrypi.stackexchange.com/questions...,TECHNOLOGY,raspberrypi.stackexchange.com
4,200,Passenger Name - Flight Booking Passenger only...,I have bought Delhi-London return flights for ...,Amit,https://travel.stackexchange.com/users/29089,I called two persons who work for Saudia (tick...,Nean Der Thal,https://travel.stackexchange.com/users/10051,http://travel.stackexchange.com/questions/4704...,CULTURE,travel.stackexchange.com


In [3]:
question_target_cols = ['question_asker_intent_understanding','question_body_critical', 'question_conversational', 
                        'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer',
                        'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 
                        'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice',
                        'question_type_compare', 'question_type_consequence', 'question_type_definition', 
                        'question_type_entity', 'question_type_instructions', 'question_type_procedure',
                        'question_type_reason_explanation', 'question_type_spelling', 'question_well_written']
answer_target_cols = ['answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance',
                      'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 
                      'answer_type_reason_explanation', 'answer_well_written']
target_cols = question_target_cols + answer_target_cols

## Pre-process data

In [4]:
text_features = ['question_title', 'question_body', 'answer']
    
# for feature in text_features:
#     # Lower
#     test[feature] = test[feature].apply(lambda x: x.lower())
#     # Map misspellings
#     test[feature] = test[feature].apply(lambda x: map_misspellings(x))
#     # Map contractions
#     test[feature] = test[feature].apply(lambda x: map_contraction(x))
#     # Trim text
#     test[feature] = test[feature].apply(lambda x: x.strip())

# Model parameters

In [5]:
N_CLASS = len(target_cols)
MAX_SEQUENCE_LENGTH = 512

## Test set

In [6]:
tokenizer = tokenization.FullTokenizer(TOKENIZER_PATH, True)

# Test features
X_test = compute_input_arays(test, text_features, tokenizer, MAX_SEQUENCE_LENGTH)

# Model

In [7]:
input_word_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_word_ids')
input_masks = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_masks')
segment_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='segment_ids')

bert_layer = KerasLayer(BERT_PATH, trainable=True)
pooled_output, sequence_output = bert_layer([input_word_ids, input_masks, segment_ids])

output = Dense(N_CLASS, kernel_initializer='glorot_uniform', activation="sigmoid", name="output")(pooled_output)

model = Model(inputs=[input_word_ids, input_masks, segment_ids], outputs=output)

model.summary()

model.load_weights(model_path)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_word_ids (InputLayer)     [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 512)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 512)]        0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        [(None, 768), (None, 109482241   input_word_ids[0][0]             
                                                                 input_masks[0][0]            

# Make predictions

In [8]:
Y_test = model.predict(X_test)

In [9]:
submission = pd.read_csv('/kaggle/input/google-quest-challenge/sample_submission.csv')
submission[target_cols] = Y_test
submission.to_csv("submission.csv", index=False)
display(submission.head())
display(submission.describe())

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.925527,0.650222,0.212266,0.549956,0.488472,0.586418,0.571368,0.554063,0.365808,...,0.890713,0.95076,0.514618,0.978575,0.964225,0.850373,0.039192,0.031806,0.830216,0.943318
1,46,0.892924,0.545844,0.004178,0.772152,0.851531,0.943992,0.570252,0.444946,0.074565,...,0.725663,0.93866,0.649687,0.972292,0.980534,0.870171,0.974351,0.100763,0.028424,0.901227
2,70,0.940134,0.672681,0.013148,0.828573,0.880855,0.941084,0.581943,0.515157,0.140637,...,0.900161,0.882805,0.570849,0.975193,0.934081,0.792235,0.075611,0.045666,0.839274,0.939305
3,132,0.889306,0.522096,0.005682,0.730002,0.779454,0.927177,0.569717,0.461584,0.050861,...,0.740513,0.92629,0.660398,0.974157,0.981653,0.869878,0.863828,0.167725,0.455575,0.918637
4,200,0.936457,0.484016,0.018168,0.844024,0.794776,0.870851,0.555045,0.584598,0.096488,...,0.764763,0.914303,0.547327,0.979802,0.960244,0.792483,0.113451,0.068887,0.494801,0.919453


Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
count,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,...,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0
mean,5029.186975,0.893524,0.601852,0.028718,0.755897,0.830432,0.878825,0.565754,0.461496,0.186802,...,0.80633,0.919845,0.634516,0.969109,0.964357,0.853024,0.547293,0.120006,0.426428,0.911849
std,2812.67006,0.049319,0.123379,0.053693,0.099873,0.091284,0.122605,0.048183,0.084663,0.172693,...,0.086667,0.025867,0.047076,0.009566,0.01661,0.038345,0.358966,0.060774,0.295262,0.025973
min,39.0,0.70134,0.309119,0.002532,0.162865,0.442144,0.250301,0.430216,0.305564,0.015611,...,0.600191,0.801696,0.426768,0.917544,0.901563,0.704841,0.007871,0.01417,0.012445,0.820902
25%,2572.0,0.863781,0.498783,0.005107,0.706491,0.793593,0.865585,0.533183,0.403225,0.061028,...,0.732367,0.906067,0.606809,0.964385,0.956781,0.82956,0.139006,0.076864,0.149551,0.895911
50%,5093.0,0.899012,0.596935,0.008526,0.761229,0.840099,0.926991,0.56157,0.441895,0.120294,...,0.808241,0.924807,0.639485,0.971001,0.969045,0.861157,0.665647,0.115625,0.39548,0.915523
75%,7482.0,0.93371,0.69706,0.022633,0.824142,0.891695,0.950779,0.593838,0.502748,0.254761,...,0.884725,0.939274,0.66729,0.975357,0.976301,0.878843,0.892498,0.156664,0.687256,0.930344
max,9640.0,0.973238,0.867744,0.429782,0.963994,0.982841,0.981071,0.762047,0.750559,0.834768,...,0.95911,0.964593,0.743019,0.986042,0.988034,0.941433,0.977364,0.364924,0.980277,0.961782
