In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/kaggle-watson-bert-baseline/__results__.html
/kaggle/input/kaggle-watson-bert-baseline/watson-bert-base-multilingual-cased.h5
/kaggle/input/kaggle-watson-bert-baseline/__notebook__.ipynb
/kaggle/input/kaggle-watson-bert-baseline/__output__.json
/kaggle/input/kaggle-watson-bert-baseline/custom.css
/kaggle/input/contradictory-my-dear-watson/sample_submission.csv
/kaggle/input/contradictory-my-dear-watson/train.csv
/kaggle/input/contradictory-my-dear-watson/test.csv


In [2]:
# ライブラリのインポート
import pandas as pd
import numpy as np

from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf

In [3]:
# ハイパーパラメータの定義、トークナイザの初期化
model_name = "bert-base-multilingual-cased"
max_length = 120
batch_size = 16

tokenizer = AutoTokenizer.from_pretrained(model_name)

# 学習後のモデルの重みの読み込み
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
model.load_weights("/kaggle/input/kaggle-watson-bert-baseline/watson-bert-base-multilingual-cased.h5")

# テストデータの読み込み
test = pd.read_csv("/kaggle/input/contradictory-my-dear-watson/test.csv")

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/972k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.87M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.01G [00:00<?, ?B/s]

2022-03-21 05:07:55.513951: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 05:07:55.514967: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 05:07:55.515610: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 05:07:55.516412: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [4]:
# 前提、仮説のテキストのみを抽出
test_text = test[['premise', 'hypothesis']].values.tolist()

# トークナイズ + インデキシング
test_encoded = tokenizer.batch_encode_plus(test_text, padding=True, max_length=120, truncation=True)

# TensorFlowのDatasetに変換
test_dataset = (
    tf.data.Dataset.from_tensor_slices((dict(test_encoded))).batch(batch_size)
)

# 推論
test_preds = model.predict(test_dataset)

# 推論したデータの確認。logitsがモデルの最終層の出力
test_preds

2022-03-21 05:08:22.642375: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


TFSequenceClassifierOutput(loss=None, logits=array([[-1.2926717 , -1.1302493 ,  2.1753988 ],
       [-1.6144578 ,  1.7945433 , -0.05341987],
       [ 2.5566943 , -1.3768957 , -0.8350606 ],
       ...,
       [-0.6557856 ,  0.96328455, -0.13153726],
       [ 2.4181893 , -1.7348192 , -0.36748156],
       [-0.74102587, -1.5265067 ,  2.120946  ]], dtype=float32), hidden_states=None, attentions=None)

In [5]:
# 提出用ファイルの作成
submission = pd.read_csv("../input/contradictory-my-dear-watson/sample_submission.csv")
submission['prediction'] = test_preds["logits"].argmax(axis=1)
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,prediction
0,c6d58c3f69,2
1,cefcc82292,1
2,e98005252c,0
3,58518c10ba,1
4,c32b0d16df,2
