In [1]:
# Importing the necessary libraries
import pandas as pd
import tensorflow as tf
from transformers import TFRobertaForSequenceClassification, RobertaTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [3]:
test_data = pd.read_csv("test.csv") # Load the test data

In [4]:
test_data['Evidence'] = test_data['Evidence'].fillna("") # Fill the missing values with empty strings

In [5]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base") # Load the tokenizer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [6]:
# Tokenize the test data inputs
test_encodings = tokenizer(
    test_data['Claim'].tolist(),
    test_data['Evidence'].tolist(),
    padding=True,
    truncation=True,
    return_tensors='tf'
)

# Create the dataset for predictions, ensuring the correct keys
test_dataset = tf.data.Dataset.from_tensor_slices(
    {
        'input_ids': test_encodings['input_ids'],
        'attention_mask': test_encodings['attention_mask'],
    }
).batch(16)


In [11]:
# Load the saved model from Google Drive
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')  
# Get the path to the saved model in Google Drive
model_path = "/content/drive/MyDrive/roberta_saved_model"  

# Load the model using Google Drive 
loaded_model = tf.keras.models.load_model(
    model_path,
    custom_objects={"TFRobertaForSequenceClassification": TFRobertaForSequenceClassification}
)


predicted_logits = loaded_model.predict(test_dataset).logits # Predict logits

predicted_probabilities = tf.nn.sigmoid(predicted_logits) # Convert logits to probabilities


# Convert probabilities to binary predictions (threshold at 0.5)
binary_predictions = (predicted_probabilities > 0.5).numpy().astype(int)  # Convert to integers


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
# Create a DataFrame with the predictions
prediction_df = pd.DataFrame({
    "prediction": binary_predictions.flatten()  # Flatten to ensure it's a single column
})

# Save the DataFrame to a CSV file
prediction_df.to_csv("predictions.csv", index=False)  # No index column in the CSV
