In [7]:
import numpy as np
import pandas as pd
import os
import random
os.environ["TF_USE_LEGACY_KERAS"] = "True"


def set_seed(seed: int):
    random.seed(seed) # Python
    np.random.seed(seed)  # Numpy, é o gerador utilizado pelo sklearn
    os.environ["PYTHONHASHSEED"] = str(seed)  # sistema operativo

set_seed(25)
import ktrain
from ktrain import text

In [8]:
import tensorflow as tf
from tensorflow import keras  # Optional, but good for structured access

# Load the saved predictor

predictor = ktrain.load_predictor('../code/nn_tensorflow/bertao')
  



In [9]:
# Load competition data
competition_input = pd.read_csv('dataset3_inputs.csv', sep=';')
print(f"Loaded competition input data with shape: {competition_input.shape}")
print(f"Columns: {competition_input.columns}")

Loaded competition input data with shape: (100, 2)
Columns: Index(['ID', 'Text'], dtype='object')


In [11]:
# Function to convert prediction to proper capitalization
def format_prediction(pred):
    if pred.lower() == 'ai':
        return 'AI'
    elif pred.lower() == 'human':
        return 'Human'
    else:
        return pred  # Return as-is for any other values

# Make predictions
predictions = []
for text in competition_input['Text']:
    # Get prediction - returns the class name
    pred = predictor.predict(text)
    # Convert to proper capitalization
    formatted_pred = format_prediction(pred)
    predictions.append(formatted_pred)


In [12]:
# Create output dataframe
output_df = pd.DataFrame({
    'ID': competition_input['ID'],
    'Label': predictions
})

print("\nSample predictions:")
print(output_df.head())

# Save predictions to CSV
output_df.to_csv('submissao2-grupo1-s1.csv', sep='\t', index=False)
print("\nPredictions saved to competition_predictions.csv")


Sample predictions:
     ID  Label
0  D3-1     AI
1  D3-2     AI
2  D3-3     AI
3  D3-4  Human
4  D3-5  Human

Predictions saved to competition_predictions.csv


In [6]:
# Optional: Verify against the provided dataset1_outputs.csv
try:
    ground_truth = pd.read_csv('dataset2_disclosed_outputs.csv', sep='\t')
    merged = output_df.merge(ground_truth, on='ID', suffixes=('_pred', '_true'))
    accuracy = (merged['Label_pred'] == merged['Label_true']).mean()
    print(f"\nAccuracy on dataset1: {accuracy:.4f}")
    
    # Print confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(merged['Label_true'], merged['Label_pred'], labels=['Human', 'AI'])
    print("\nConfusion Matrix:")
    print("              Predicted")
    print("             Human    AI")
    print(f"True Human:  {cm[0][0]:5d}  {cm[0][1]:5d}")
    print(f"     AI:     {cm[1][0]:5d}  {cm[1][1]:5d}")
    
except Exception as e:
    print(f"Could not verify against ground truth: {e}")


Accuracy on dataset1: 0.6800

Confusion Matrix:
              Predicted
             Human    AI
True Human:     12     14
     AI:         2     22
