In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import SGDClassifier

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
train_prompts = pd.read_csv('/content/drive/MyDrive/dataset/train_prompts.csv')
train_essays = pd.read_csv('/content/drive/MyDrive/dataset/train_essays.csv')
test_essays = pd.read_csv('/content/drive/MyDrive/dataset/test_essays.csv')
df_train_extra = pd.read_csv('/content/drive/MyDrive/dataset/train_v4_drcat_01.csv')

In [4]:
df_train_extra.rename(columns = {"label":"generated"}, inplace=True)
df_train_essays_final = pd.concat([df_train_extra[["text", "generated"]], train_essays[["text", "generated"]]])

df_train_essays_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 74951 entries, 0 to 1377
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       74951 non-null  object
 1   generated  74951 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ MB


In [5]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.svm import OneClassSVM
from joblib import dump
from tqdm.auto import tqdm

In [6]:
class TextDataset(Dataset):
    def __init__(self, texts):
        self.texts = texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx]

In [7]:
def compute_entropy(logits):
    logits = torch.log_softmax(logits.float(), dim=-1)
    p_log_p = logits.exp() * logits
    entD = -p_log_p.sum(dim=-1)
    return entD

In [8]:
def extract_features(model, tokenizer, texts, batch_size, max_length):
    dataset = TextDataset(texts)
    loader = DataLoader(dataset, batch_size=batch_size)

    features = []
    with torch.no_grad():
        for batch in tqdm(loader, desc="Processing texts"):
            inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
            inputs = {k: v.to(model.device) for k, v in inputs.items()}
            outputs = model(**inputs)
            logits = outputs.logits
            entD = compute_entropy(logits)
            features.append(entD.cpu().numpy())

    return np.concatenate(features, axis=0)

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
data = df_train_essays_final
# Split data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
texts = data['text'].tolist()



In [29]:
# Filter to get 5000 human-written samples for training if available
human_train_data = train_data[train_data['generated'] == 0]
if len(human_train_data) > 10000:
    human_train_data = human_train_data.sample(n=10000, random_state=42)

# Initialize model and tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# Extract features for the selected training and full testing datasets
batch_size = 16
max_length = 512
train_features = extract_features(model, tokenizer, human_train_data['text'].tolist(), batch_size, max_length)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Processing texts:   0%|          | 0/625 [00:00<?, ?it/s]

In [30]:
# prompt: write code to save the train features

import joblib
joblib.dump(train_features, '/content/drive/MyDrive/dataset/train_features.joblib')


['/content/drive/MyDrive/dataset/train_features.joblib']

In [31]:
# Train the classifier on human-written training data
classifier = OneClassSVM(kernel='rbf', gamma='auto', nu=0.05)
classifier.fit(train_features)


In [32]:
# test_features = extract_features(model, tokenizer, test_data['text'].tolist(), batch_size, max_length)

In [33]:
 test_features.shape

(14991, 512)

In [21]:
joblib.dump(test_features, '/content/drive/MyDrive/dataset/test_features.joblib')

['/content/drive/MyDrive/dataset/test_features.joblib']

In [34]:
# zscore
z_mean = np.mean(train_features, 0, keepdims=True)
z_std  = np.maximum(np.std(train_features, 0, keepdims=True), 1e-4)
train_feats = (train_features - z_mean)/z_std
np.savez('zscore.npz', z_std=z_std, z_mean=z_mean)

In [35]:
test_features = (test_features - z_mean)/z_std

In [36]:

# Predict on testing data and evaluate the model
y_test = test_data['generated'].values
y_pred = -1.0*classifier.decision_function(test_features)
y_pred = np.where(y_pred > 0.5, 1, 0)

print(classification_report(y_test, y_pred))

# Save the model
dump(classifier, 'oneClassSVM.joblib')

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      5727
           1       0.62      1.00      0.76      9264

    accuracy                           0.62     14991
   macro avg       0.31      0.50      0.38     14991
weighted avg       0.38      0.62      0.47     14991



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


['oneClassSVM.joblib']

In [27]:
y_pred

array([0.64049216, 0.60607023, 0.63766939, ..., 0.6400126 , 0.63738088,
       0.64050589])

In [37]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, log_loss, matthews_corrcoef
)

# Assuming y_test and y_pred are already defined
# For One-Class SVM, we need to use decision_function or predict_proba to get the scores
# However, One-Class SVM does not provide probability estimates directly, so we use decision_function
y_scores = classifier.decision_function(test_features)
y_pred_proba = (y_scores - y_scores.min()) / (y_scores.max() - y_scores.min())  # Scale to [0, 1]

# Calculate metrics
model_metrics = {
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred),
    'recall': recall_score(y_test, y_pred),
    'f1': f1_score(y_test, y_pred),
    'roc_auc': roc_auc_score(y_test, y_pred_proba),
    'log_loss': log_loss(y_test, y_pred_proba),
    'mcc': matthews_corrcoef(y_test, y_pred)
}

print(model_metrics)

{'accuracy': 0.6179707824694817, 'precision': 0.6179707824694817, 'recall': 1.0, 'f1': 0.7638837353123067, 'roc_auc': 0.08289944338441096, 'log_loss': 6.5735374599389536, 'mcc': 0.0}


In [38]:
# Save the metrics and probabilities
joblib.dump(model_metrics, '/content/drive/MyDrive/dataset/modelentropy_metrics.pkl')
joblib.dump(y_pred_proba, '/content/drive/MyDrive/dataset/modelentropy_y_pred_proba.pkl')

['/content/drive/MyDrive/dataset/modelentropy_y_pred_proba.pkl']