## Dependencies

In [1]:
import glob
import warnings
from tensorflow_hub import KerasLayer
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D, SpatialDropout1D, Concatenate
from googleqa_utilityscript import *
from googleqa_map_utilityscript import *
import bert_tokenization as tokenization


SEED = 0
seed_everything(SEED)
warnings.filterwarnings("ignore")

## Load data

In [2]:
BERT_PATH = '/kaggle/input/tf-hub-bert-base/bert_base_uncased'
VOCAB_PATH = BERT_PATH + '/assets/vocab.txt'
model_path_list = glob.glob('/kaggle/input/116-googleq-a-train-bert-base-uncased-3f-lrwarmup1/' + '*.h5')
model_path_list.sort()
print('Models to predict:', model_path_list)

test = pd.read_csv('/kaggle/input/google-quest-challenge/test.csv')

print('Test samples: %s' % len(test))
display(test.head())

Models to predict: ['/kaggle/input/116-googleq-a-train-bert-base-uncased-3f-lrwarmup1/model_fold_1.h5', '/kaggle/input/116-googleq-a-train-bert-base-uncased-3f-lrwarmup1/model_fold_2.h5', '/kaggle/input/116-googleq-a-train-bert-base-uncased-3f-lrwarmup1/model_fold_3.h5']
Test samples: 476


Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host
0,39,Will leaving corpses lying around upset my pri...,I see questions/information online about how t...,Dylan,https://gaming.stackexchange.com/users/64471,There is no consequence for leaving corpses an...,Nelson868,https://gaming.stackexchange.com/users/97324,http://gaming.stackexchange.com/questions/1979...,CULTURE,gaming.stackexchange.com
1,46,Url link to feature image in the portfolio,I am new to Wordpress. i have issue with Featu...,Anu,https://wordpress.stackexchange.com/users/72927,I think it is possible with custom fields.\n\n...,Irina,https://wordpress.stackexchange.com/users/27233,http://wordpress.stackexchange.com/questions/1...,TECHNOLOGY,wordpress.stackexchange.com
2,70,"Is accuracy, recoil or bullet spread affected ...","To experiment I started a bot game, toggled in...",Konsta,https://gaming.stackexchange.com/users/37545,You do not have armour in the screenshots. Thi...,Damon Smithies,https://gaming.stackexchange.com/users/70641,http://gaming.stackexchange.com/questions/2154...,CULTURE,gaming.stackexchange.com
3,132,Suddenly got an I/O error from my external HDD,I have used my Raspberry Pi as a torrent-serve...,robbannn,https://raspberrypi.stackexchange.com/users/17341,Your Western Digital hard drive is disappearin...,HeatfanJohn,https://raspberrypi.stackexchange.com/users/1311,http://raspberrypi.stackexchange.com/questions...,TECHNOLOGY,raspberrypi.stackexchange.com
4,200,Passenger Name - Flight Booking Passenger only...,I have bought Delhi-London return flights for ...,Amit,https://travel.stackexchange.com/users/29089,I called two persons who work for Saudia (tick...,Nean Der Thal,https://travel.stackexchange.com/users/10051,http://travel.stackexchange.com/questions/4704...,CULTURE,travel.stackexchange.com


In [3]:
question_target_cols = ['question_asker_intent_understanding','question_body_critical', 'question_conversational', 
                        'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer',
                        'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 
                        'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice',
                        'question_type_compare', 'question_type_consequence', 'question_type_definition', 
                        'question_type_entity', 'question_type_instructions', 'question_type_procedure',
                        'question_type_reason_explanation', 'question_type_spelling', 'question_well_written']
answer_target_cols = ['answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance',
                      'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 
                      'answer_type_reason_explanation', 'answer_well_written']
target_cols = question_target_cols + answer_target_cols

## Pre-process data

In [4]:
text_features = ['question_title', 'question_body', 'answer']
    
for feature in text_features:
    # Lower
    test[feature] = test[feature].apply(lambda x: x.lower())
    # Map misspellings
    test[feature] = test[feature].apply(lambda x: map_misspellings(x))
    # Map contractions
    test[feature] = test[feature].apply(lambda x: map_contraction(x))
    # Trim text
    test[feature] = test[feature].apply(lambda x: x.strip())

# Model parameters

In [5]:
N_CLASS = len(target_cols)
MAX_SEQUENCE_LENGTH = 512

## Test set

In [6]:
tokenizer = tokenization.FullTokenizer(VOCAB_PATH, do_lower_case=True)

# Test features
X_test = compute_input_arays(test, text_features, tokenizer, MAX_SEQUENCE_LENGTH)

# Model

In [7]:
def model_fn():
    input_word_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_word_ids')
    input_masks = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_masks')
    segment_ids = Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='segment_ids')

    bert_layer = KerasLayer(BERT_PATH, trainable=True)
    pooled_output, sequence_output = bert_layer([input_word_ids, input_masks, segment_ids])

    x = GlobalAveragePooling1D()(sequence_output)
    x = Dropout(0.2)(x)
    output = Dense(N_CLASS, activation="sigmoid", name="output")(x)

    model = Model(inputs=[input_word_ids, input_masks, segment_ids], outputs=output)
    
    return model

# Make predictions

In [8]:
Y_test = np.zeros((len(test), N_CLASS))

for model_path in model_path_list:
    model = model_fn()
    model.load_weights(model_path)
    Y_test += model.predict(X_test) / len(model_path_list)

In [9]:
submission = pd.read_csv('/kaggle/input/google-quest-challenge/sample_submission.csv')
submission[target_cols] = Y_test
submission.to_csv("submission.csv", index=False)
display(submission.head())
display(submission.describe())

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.946808,0.660082,0.180511,0.481685,0.649398,0.606196,0.706394,0.637561,0.641616,...,0.893254,0.912336,0.548579,0.951351,0.943412,0.790967,0.052567,0.045745,0.866182,0.89735
1,46,0.884756,0.552721,0.009072,0.739277,0.776764,0.916536,0.547012,0.453805,0.090536,...,0.69957,0.941872,0.628371,0.960797,0.971749,0.832499,0.881013,0.104898,0.102126,0.85631
2,70,0.918011,0.650012,0.036492,0.752541,0.857688,0.859361,0.620838,0.487699,0.175769,...,0.841656,0.912367,0.615545,0.952951,0.957105,0.845114,0.123044,0.072523,0.845904,0.889726
3,132,0.863552,0.417271,0.008037,0.651062,0.771823,0.895462,0.581458,0.430573,0.139321,...,0.659642,0.935014,0.633353,0.964886,0.974243,0.851162,0.871716,0.163355,0.367244,0.87849
4,200,0.915387,0.436234,0.063452,0.762277,0.763937,0.78746,0.673719,0.568029,0.110726,...,0.717439,0.887848,0.63368,0.951414,0.947219,0.799662,0.153022,0.112768,0.652318,0.871411


Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
count,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,...,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0
mean,5029.186975,0.885605,0.584368,0.037577,0.695829,0.803822,0.835079,0.59246,0.478151,0.253597,...,0.776357,0.922633,0.644113,0.954308,0.962545,0.841287,0.506434,0.136453,0.519778,0.887705
std,2812.67006,0.050344,0.135321,0.060517,0.107649,0.095227,0.126648,0.052235,0.087346,0.197092,...,0.09245,0.025098,0.049501,0.013577,0.013024,0.041673,0.317639,0.066547,0.27512,0.024477
min,39.0,0.727152,0.299356,0.003951,0.229134,0.370096,0.26352,0.477307,0.338498,0.010307,...,0.550532,0.791391,0.542906,0.890988,0.893204,0.710896,0.006018,0.00555,0.033426,0.795347
25%,2572.0,0.849573,0.467723,0.010051,0.632115,0.762323,0.807507,0.555544,0.417422,0.104428,...,0.698059,0.908531,0.610841,0.945944,0.955881,0.810604,0.164837,0.087006,0.285485,0.871698
50%,5093.0,0.887105,0.575348,0.014719,0.697221,0.80913,0.88127,0.583792,0.451296,0.180317,...,0.784243,0.926894,0.638914,0.955962,0.964661,0.846292,0.585271,0.140481,0.523788,0.890164
75%,7482.0,0.925468,0.696059,0.032305,0.769062,0.866863,0.914269,0.626861,0.520942,0.358556,...,0.857254,0.9398,0.674072,0.964332,0.971566,0.870185,0.799833,0.181985,0.740309,0.9058
max,9640.0,0.972271,0.874943,0.560838,0.967299,0.971867,0.970532,0.748581,0.761556,0.853496,...,0.941044,0.974526,0.801767,0.982876,0.988214,0.943501,0.958775,0.31233,0.987501,0.941195
