In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import evaluate
import torch
import json
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split

In [None]:
def predict_whole_text(text, tokenizer, model):
    """
    Predicts if a text is human (label = 0) or AI (label = 1)
    Parameters:
        text: text to predict
    Returns:
        dict: prediction and probabilities
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True)

    model.eval()
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        
        probabilities = F.softmax(logits, dim=-1)

        predicted_class = torch.argmax(probabilities, dim=-1).item()
        
        human_prob = probabilities[0][0].item()
        ai_prob = probabilities[0][1].item()
    
    result = {
        "prediction": predicted_class,
        "confidence": max(human_prob, ai_prob),
        "probabilities": {
            "human": human_prob,
            "ai": ai_prob
        }
    }
    
    return result

In [None]:
def predict_paragraph(text, tokenizer, model):
    pass

In [None]:
def predict_sentence(text, tokenizer, model):
    pass

In [None]:
def aggregation_strategy1_prediction(input, whole_text_classification_model, whole_text_classification_tokenizer,
                          paragraph_classification_model, paragraph_classification_tokenizer,
                          sentence_classification_model, sentence_classification_tokenizer
                         ):
    """
    Uses all three models to classify given input string.
    Decision is made by majority vote
        Parameters:
            - input_text: input text to be classified
            - whole_text_classification_model: model for whole text classification
            - whole_text_classification_tokenizer: tokenizer for whole text model
            - paragraph_classification_model: model for paragraph classification
            - paragraph_classification_tokenizer: tokenizer for paragraph model
            - sentence_classification_model: model for sentence classification
            - sentence_classification_tokenizer: tokenizer for sentence model
        Returns:
            - prediction: prediction of the input text (0 = human, 1 = AI)
    """
    pred_whole_text = predict_whole_text(input, whole_text_classification_tokenizer, whole_text_classification_model)
    pred_paragraph = predict_paragraph(input, paragraph_classification_tokenizer, paragraph_classification_model)
    pred_sentence = predict_sentence(input, sentence_classification_tokenizer, sentence_classification_model)

    return 1 if pred_whole_text["prediction"] + pred_paragraph["prediction"] + pred_sentence["prediction"] >= 2 else 0