## Dependencies

In [1]:
import warnings
import tensorflow_hub as hub
from tensorflow.keras import Model
from tensorflow.keras.layers import Lambda, Input, Dense, Dropout, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from googleqa_utilityscript import *
from googleqa_map_utilityscript import *


SEED = 0
seed_everything(SEED)
warnings.filterwarnings("ignore")

## Load data

In [2]:
model_path_list = ['/kaggle/input/108-googleq-a-train-use-qa-text-pre-process2-5fold/model_fold_1.h5', 
                   '/kaggle/input/108-googleq-a-train-use-qa-text-pre-process2-5fold/model_fold_2.h5',
                   '/kaggle/input/108-googleq-a-train-use-qa-text-pre-process2-5fold/model_fold_3.h5']
module_url = '/kaggle/input/universalsentenceencodermodels/universal-sentence-encoder-models/use-qa'

test = pd.read_csv('/kaggle/input/google-quest-challenge/test.csv')

print('Test samples: %s' % len(test))
display(test.head())

Test samples: 476


Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host
0,39,Will leaving corpses lying around upset my pri...,I see questions/information online about how t...,Dylan,https://gaming.stackexchange.com/users/64471,There is no consequence for leaving corpses an...,Nelson868,https://gaming.stackexchange.com/users/97324,http://gaming.stackexchange.com/questions/1979...,CULTURE,gaming.stackexchange.com
1,46,Url link to feature image in the portfolio,I am new to Wordpress. i have issue with Featu...,Anu,https://wordpress.stackexchange.com/users/72927,I think it is possible with custom fields.\n\n...,Irina,https://wordpress.stackexchange.com/users/27233,http://wordpress.stackexchange.com/questions/1...,TECHNOLOGY,wordpress.stackexchange.com
2,70,"Is accuracy, recoil or bullet spread affected ...","To experiment I started a bot game, toggled in...",Konsta,https://gaming.stackexchange.com/users/37545,You do not have armour in the screenshots. Thi...,Damon Smithies,https://gaming.stackexchange.com/users/70641,http://gaming.stackexchange.com/questions/2154...,CULTURE,gaming.stackexchange.com
3,132,Suddenly got an I/O error from my external HDD,I have used my Raspberry Pi as a torrent-serve...,robbannn,https://raspberrypi.stackexchange.com/users/17341,Your Western Digital hard drive is disappearin...,HeatfanJohn,https://raspberrypi.stackexchange.com/users/1311,http://raspberrypi.stackexchange.com/questions...,TECHNOLOGY,raspberrypi.stackexchange.com
4,200,Passenger Name - Flight Booking Passenger only...,I have bought Delhi-London return flights for ...,Amit,https://travel.stackexchange.com/users/29089,I called two persons who work for Saudia (tick...,Nean Der Thal,https://travel.stackexchange.com/users/10051,http://travel.stackexchange.com/questions/4704...,CULTURE,travel.stackexchange.com


In [3]:
question_target_cols = ['question_asker_intent_understanding','question_body_critical', 'question_conversational', 
                        'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer',
                        'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 
                        'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice',
                        'question_type_compare', 'question_type_consequence', 'question_type_definition', 
                        'question_type_entity', 'question_type_instructions', 'question_type_procedure',
                        'question_type_reason_explanation', 'question_type_spelling', 'question_well_written']
answer_target_cols = ['answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance',
                      'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 
                      'answer_type_reason_explanation', 'answer_well_written']
target_cols = question_target_cols + answer_target_cols

## Pre-process data

In [4]:
text_features = ['question_title', 'question_body', 'answer']
    
for feature in text_features:
    # Lower
    test[feature] = test[feature].apply(lambda x: x.lower())
    # Map misspellings
    test[feature] = test[feature].apply(lambda x: map_misspellings(x))
    # Map contractions
    test[feature] = test[feature].apply(lambda x: map_contraction(x))
    # Trim text
    test[feature] = test[feature].apply(lambda x: x.strip())

# Model parameters

In [5]:
N_CLASS = len(target_cols)
EMBEDDDING_SIZE = 512

## Test set

In [6]:
# Test features
X_test_title = test['question_title']
X_test_body = test['question_body']
X_test_answer = test['answer']

X_test = [X_test_title, X_test_body, X_test_answer]

# Model

In [7]:
use_embed = hub.load(module_url)

def USEEmbedding(x):
    return use_embed(tf.squeeze(tf.cast(x, tf.string)))

In [8]:
def model_fn():
    input_title = Input(shape=(1,), dtype=tf.string, name='input_title')
    embedding_title = Lambda(USEEmbedding, output_shape=(EMBEDDDING_SIZE,))(input_title)

    input_body = Input(shape=(1,), dtype=tf.string, name='input_body')
    embedding_body = Lambda(USEEmbedding, output_shape=(EMBEDDDING_SIZE,))(input_body)

    input_answer = Input(shape=(1,), dtype=tf.string, name='input_answer')
    embedding_answer = Lambda(USEEmbedding, output_shape=(EMBEDDDING_SIZE,))(input_answer)

    x = Concatenate()([embedding_title, embedding_body, embedding_answer])
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(N_CLASS, activation='sigmoid', name='output')(x)
    model = Model(inputs=[input_title, input_body, input_answer], outputs=[output])
    
    return model

# Make predictions

In [9]:
Y_test = np.zeros((len(test), N_CLASS))

for model_path in model_path_list:
    model = model_fn()
    model.load_weights(model_path)
    Y_test += model.predict(X_test) / len(model_path_list)

In [10]:
submission = pd.read_csv('/kaggle/input/google-quest-challenge/sample_submission.csv')
submission[target_cols] = Y_test
submission.to_csv("submission.csv", index=False)
display(submission.head())
display(submission.describe())

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.910614,0.673725,0.29809,0.671754,0.51091,0.603409,0.62999,0.589875,0.256867,...,0.887495,0.880041,0.596263,0.941593,0.94627,0.791292,0.077243,0.095164,0.765104,0.90436
1,46,0.86871,0.573019,0.003475,0.62728,0.821513,0.898056,0.564431,0.47534,0.11439,...,0.746719,0.930346,0.612953,0.961178,0.969824,0.845507,0.930524,0.142213,0.080856,0.881314
2,70,0.886112,0.654396,0.015503,0.738917,0.875409,0.904433,0.598094,0.5002,0.32488,...,0.852946,0.904491,0.603241,0.953166,0.95215,0.815679,0.248098,0.152711,0.721918,0.882381
3,132,0.847237,0.436642,0.015665,0.68416,0.816434,0.872413,0.557292,0.491851,0.344344,...,0.698702,0.928111,0.68495,0.957787,0.974929,0.881536,0.668214,0.132874,0.72883,0.882062
4,200,0.946048,0.563502,0.010523,0.845188,0.839436,0.933179,0.628287,0.575341,0.276206,...,0.808109,0.938418,0.648557,0.972805,0.975696,0.862888,0.487025,0.202017,0.414709,0.9111


Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
count,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,...,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0
mean,5029.186975,0.87408,0.572547,0.038903,0.694686,0.787945,0.842125,0.57163,0.487153,0.237998,...,0.773684,0.922684,0.646319,0.957462,0.967315,0.851831,0.541425,0.146417,0.509499,0.897771
std,2812.67006,0.049233,0.121132,0.068636,0.096191,0.094813,0.122491,0.044292,0.080344,0.125055,...,0.085412,0.028596,0.038265,0.016703,0.014891,0.039504,0.304141,0.06348,0.252219,0.023202
min,39.0,0.72025,0.30969,0.001357,0.346126,0.383854,0.259231,0.493225,0.363135,0.042322,...,0.584077,0.813012,0.562018,0.888149,0.898018,0.721708,0.00602,0.011235,0.037647,0.802346
25%,2572.0,0.840499,0.477556,0.00557,0.638141,0.74089,0.80599,0.53591,0.426648,0.136758,...,0.705779,0.906008,0.619374,0.947752,0.959895,0.828412,0.237511,0.099688,0.294536,0.883384
50%,5093.0,0.874068,0.56841,0.011361,0.700434,0.797843,0.882864,0.55826,0.463914,0.208049,...,0.768509,0.928947,0.643171,0.960372,0.970832,0.857375,0.644957,0.148946,0.516405,0.898968
75%,7482.0,0.911318,0.664578,0.036593,0.758152,0.851409,0.923568,0.607584,0.526887,0.32491,...,0.850948,0.943629,0.672581,0.969796,0.977953,0.881149,0.795637,0.189235,0.725209,0.913335
max,9640.0,0.967555,0.834854,0.521709,0.927814,0.979907,0.978871,0.698425,0.748301,0.656336,...,0.933262,0.981936,0.759958,0.989129,0.993152,0.95525,0.9402,0.33853,0.980819,0.955978
