## Dependencies

In [1]:
import glob
import warnings
from tensorflow_hub import KerasLayer
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D, SpatialDropout1D, Concatenate
from googleqa_utilityscript import *
from googleqa_map_utilityscript import *
import bert_tokenization as tokenization


SEED = 0
seed_everything(SEED)
warnings.filterwarnings("ignore")

## Load data

In [2]:
BERT_PATH = '/kaggle/input/tf-hub-bert-base/bert_base_uncased'
VOCAB_PATH = BERT_PATH + '/assets/vocab.txt'
model_path_list = glob.glob('/kaggle/input/118-googleq-a-train-3fold-bert-base-unc-raw/' + '*.h5')
model_path_list.sort()
print('Models to predict:', model_path_list)

test = pd.read_csv('/kaggle/input/google-quest-challenge/test.csv')

print('Test samples: %s' % len(test))
display(test.head())

Models to predict: ['/kaggle/input/118-googleq-a-train-3fold-bert-base-unc-raw/model_fold_1.h5', '/kaggle/input/118-googleq-a-train-3fold-bert-base-unc-raw/model_fold_2.h5', '/kaggle/input/118-googleq-a-train-3fold-bert-base-unc-raw/model_fold_3.h5']
Test samples: 476


Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host
0,39,Will leaving corpses lying around upset my pri...,I see questions/information online about how t...,Dylan,https://gaming.stackexchange.com/users/64471,There is no consequence for leaving corpses an...,Nelson868,https://gaming.stackexchange.com/users/97324,http://gaming.stackexchange.com/questions/1979...,CULTURE,gaming.stackexchange.com
1,46,Url link to feature image in the portfolio,I am new to Wordpress. i have issue with Featu...,Anu,https://wordpress.stackexchange.com/users/72927,I think it is possible with custom fields.\n\n...,Irina,https://wordpress.stackexchange.com/users/27233,http://wordpress.stackexchange.com/questions/1...,TECHNOLOGY,wordpress.stackexchange.com
2,70,"Is accuracy, recoil or bullet spread affected ...","To experiment I started a bot game, toggled in...",Konsta,https://gaming.stackexchange.com/users/37545,You do not have armour in the screenshots. Thi...,Damon Smithies,https://gaming.stackexchange.com/users/70641,http://gaming.stackexchange.com/questions/2154...,CULTURE,gaming.stackexchange.com
3,132,Suddenly got an I/O error from my external HDD,I have used my Raspberry Pi as a torrent-serve...,robbannn,https://raspberrypi.stackexchange.com/users/17341,Your Western Digital hard drive is disappearin...,HeatfanJohn,https://raspberrypi.stackexchange.com/users/1311,http://raspberrypi.stackexchange.com/questions...,TECHNOLOGY,raspberrypi.stackexchange.com
4,200,Passenger Name - Flight Booking Passenger only...,I have bought Delhi-London return flights for ...,Amit,https://travel.stackexchange.com/users/29089,I called two persons who work for Saudia (tick...,Nean Der Thal,https://travel.stackexchange.com/users/10051,http://travel.stackexchange.com/questions/4704...,CULTURE,travel.stackexchange.com


In [3]:
question_target_cols = ['question_asker_intent_understanding','question_body_critical', 'question_conversational', 
                        'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer',
                        'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 
                        'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice',
                        'question_type_compare', 'question_type_consequence', 'question_type_definition', 
                        'question_type_entity', 'question_type_instructions', 'question_type_procedure',
                        'question_type_reason_explanation', 'question_type_spelling', 'question_well_written']
answer_target_cols = ['answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance',
                      'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 
                      'answer_type_reason_explanation', 'answer_well_written']
target_cols = question_target_cols + answer_target_cols

## Pre-process data

In [4]:
text_features = ['question_title', 'question_body', 'answer']
    
# for feature in text_features:
#     # Lower
#     test[feature] = test[feature].apply(lambda x: x.lower())
#     # Map misspellings
#     test[feature] = test[feature].apply(lambda x: map_misspellings(x))
#     # Map contractions
#     test[feature] = test[feature].apply(lambda x: map_contraction(x))
#     # Trim text
#     test[feature] = test[feature].apply(lambda x: x.strip())

# Model parameters

In [5]:
N_CLASS = len(target_cols)
MAX_SEQUENCE_LENGTH = 512

## Test set

In [6]:
tokenizer = tokenization.FullTokenizer(VOCAB_PATH, do_lower_case=True)

# Test features
X_test = compute_input_arays(test, text_features, tokenizer, MAX_SEQUENCE_LENGTH)

# Model

In [7]:
def model_fn():
    input_word_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_word_ids')
    input_masks = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_masks')
    segment_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='segment_ids')

    bert_layer = KerasLayer(BERT_PATH, trainable=True)
    pooled_output, sequence_output = bert_layer([input_word_ids, input_masks, segment_ids])

    x = GlobalAveragePooling1D()(sequence_output)
    x = Dropout(0.2)(x)
    output = Dense(N_CLASS, activation="sigmoid", name="output")(x)

    model = Model(inputs=[input_word_ids, input_masks, segment_ids], outputs=output)
    
    return model

# Make predictions

In [8]:
Y_test = np.zeros((len(test), N_CLASS))

for model_path in model_path_list:
    model = model_fn()
    model.load_weights(model_path)
    Y_test += model.predict(X_test) / len(model_path_list)

In [9]:
submission = pd.read_csv('/kaggle/input/google-quest-challenge/sample_submission.csv')
submission[target_cols] = Y_test
submission.to_csv("submission.csv", index=False)
display(submission.head())
display(submission.describe())

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.953656,0.631775,0.181473,0.449496,0.604781,0.586564,0.708592,0.656836,0.581526,...,0.889877,0.879266,0.539395,0.94754,0.938988,0.787083,0.078648,0.046334,0.840231,0.90892
1,46,0.889303,0.513355,0.00913,0.736814,0.728279,0.90937,0.540616,0.467624,0.059331,...,0.670739,0.941794,0.657085,0.963981,0.975416,0.859653,0.937128,0.110911,0.063073,0.856111
2,70,0.923475,0.661458,0.027083,0.788726,0.868312,0.911792,0.611414,0.499462,0.115271,...,0.866503,0.928277,0.626453,0.966124,0.969526,0.865211,0.088797,0.054572,0.845162,0.907314
3,132,0.861676,0.382333,0.009054,0.682236,0.734468,0.89273,0.563313,0.457017,0.12684,...,0.654511,0.940102,0.66089,0.963267,0.979788,0.873095,0.865975,0.155797,0.427999,0.898456
4,200,0.910561,0.345543,0.052445,0.774019,0.656023,0.779819,0.69973,0.54167,0.066997,...,0.648551,0.876082,0.583778,0.954265,0.955765,0.801171,0.15984,0.115648,0.664822,0.9049


Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
count,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,...,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0
mean,5029.186975,0.887127,0.57507,0.035059,0.700299,0.791448,0.84141,0.589861,0.482596,0.24019,...,0.770427,0.921485,0.656814,0.955427,0.966149,0.849099,0.52693,0.132233,0.511485,0.900671
std,2812.67006,0.050855,0.144643,0.060548,0.115765,0.104322,0.129142,0.054111,0.091795,0.206406,...,0.096293,0.027359,0.052311,0.014098,0.013177,0.043693,0.33154,0.065705,0.28705,0.02249
min,39.0,0.739752,0.271995,0.003148,0.188771,0.324489,0.224356,0.474875,0.319859,0.006664,...,0.529876,0.774688,0.526974,0.880654,0.89645,0.714919,0.007978,0.007485,0.023606,0.816569
25%,2572.0,0.851849,0.44527,0.009315,0.631579,0.739579,0.818377,0.550498,0.415348,0.090578,...,0.688202,0.906254,0.619857,0.946778,0.95999,0.822733,0.171103,0.083671,0.255474,0.885747
50%,5093.0,0.890981,0.566683,0.014561,0.705099,0.795236,0.888949,0.57932,0.454102,0.159249,...,0.772839,0.926204,0.654,0.956992,0.968596,0.854318,0.606883,0.13185,0.512548,0.90407
75%,7482.0,0.929217,0.692719,0.027818,0.77657,0.86274,0.921498,0.626371,0.530364,0.332786,...,0.859046,0.941471,0.688759,0.966012,0.975538,0.881438,0.831543,0.175741,0.770744,0.917098
max,9640.0,0.978794,0.903242,0.549691,0.960277,0.973536,0.977834,0.749786,0.758571,0.827065,...,0.955024,0.974526,0.824346,0.98294,0.989965,0.952214,0.967734,0.325958,0.986306,0.953985
