In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# THIS FILE HAS BEEN MODIFIED

import os
import shutil

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
import numpy as np
from official.nlp import optimization  # to create AdamW optimizer

import matplotlib.pyplot as plt

tf.get_logger().setLevel('ERROR')

In [2]:
#bring the model back in
dataset_name = 'imdb'
saved_model_path = './{}_bert'.format(dataset_name.replace('/', '_'))
reloaded_model = tf.saved_model.load(saved_model_path)

In [4]:
#get the RTP sentences in
with open("complete_data.csv", "r", encoding="utf-8") as f:
    df_sentences = pd.read_csv(f, usecols=[1], names=['sentences'])

sentences = df_sentences['sentences'].values.tolist()
results = tf.sigmoid(reloaded_model(tf.constant(sentences)))

In [5]:
np_results = results.numpy()
print("examples:")
print("sentence:", sentences[2], "       : score:", "{:.6f}".format(np_results[2][0]))
print("sentence:", sentences[17], "   : score:", "{:.6f}".format(np_results[17][0]))

examples:
sentence: Thus, I now propose that we look to science to solve the question of abortion.        : score: 0.696205
sentence: Many voters believe that this election hinged on each candidate’s abortion stance.    : score: 0.195648


In [6]:
#get the results to type=list
list_results = []
for i in range(len(np_results)):
    list_results.append("{:.6f}".format(np_results[i][0]))

In [7]:
#read in complete_data to DataFrame which will become the final dataset
with open("complete_data.csv", "r", encoding="utf-8") as f:
    df_final = pd.read_csv(f, header=None)

df_final.head()

Unnamed: 0,0,1,2,3
0,0,Call Inter—Campus programs 1800-327-6013 S...,1989-1990,abortion
1,1,"The Rlng-tum Phl, October 19, 1989 Abortion: ...",1989-1990,abortion
2,2,"Thus, I now propose that we look to science to...",1989-1990,abortion
3,3,"So trust me, I’ll not talk of a new utopia, ju...",1989-1990,abortion
4,4,Now the answer to abortion is easy.,1989-1990,abortion


In [8]:
#create a final DataFrame with all data (including the results)
df_final.insert(loc=1, column='results-inserted', value=list_results)
df_final.head()

Unnamed: 0,0,results-inserted,1,2,3
0,0,0.998409,Call Inter—Campus programs 1800-327-6013 S...,1989-1990,abortion
1,1,0.471447,"The Rlng-tum Phl, October 19, 1989 Abortion: ...",1989-1990,abortion
2,2,0.696205,"Thus, I now propose that we look to science to...",1989-1990,abortion
3,3,0.147789,"So trust me, I’ll not talk of a new utopia, ju...",1989-1990,abortion
4,4,0.985572,Now the answer to abortion is easy.,1989-1990,abortion


In [9]:
#save DataFrame to .csv
df_final.to_csv("final_data.csv", index=False, header=False)