<a href="https://colab.research.google.com/github/jeffreylowzg/LLM_homework6/blob/jeffrey-commits/data_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U "huggingface_hub[cli]"
!pip install torch transformers[torch] numpy tqdm datasets peft accelerate

Download dataset and saves 5%

In [None]:
from datasets import load_dataset
import pandas as pd
import os

# Load the dataset from Hugging Face
dataset = load_dataset("dmitva/human_ai_generated_text", split="train")

# Calculate 5% of the dataset size
sample_size = int(0.05 * len(dataset))

# Sample 5% of the data
sampled_dataset = dataset.shuffle(seed=42).select(range(sample_size))

# Convert to pandas DataFrame for easier handling
df = pd.DataFrame(sampled_dataset)

# Ensure the 'data' directory exists
os.makedirs("data", exist_ok=True)

# Save to a CSV file in the 'data' folder
df.to_csv("data/sample_5_percent.csv", index=False)

print("5% of the dataset has been saved to 'data/sample_5_percent.csv'")


Read saved data and split into labels 0 (for human) and 1 (for ai)

In [None]:
import json

# Read the sampled CSV file
df = pd.read_csv("data/sample_5_percent.csv")

# Initialize an empty list to hold the new records
data = []

# Process each row to create two entries: one for human text, one for AI text
for _, row in df.iterrows():
    # Append the human text with label 0
    data.append({
        "text": row["human_text"],
        "instructions": row["instructions"],
        "label": 0
    })

    # Append the AI text with label 1
    data.append({
        "text": row["ai_text"],
        "instructions": row["instructions"],
        "label": 1
    })

# Save the processed data to a JSON file
outfile = "data/sample_5_percent.jsonl"
with open(outfile, "w") as f:
    for d in data:
        json.dump(d, f)
        f.write("\n")

print(f"The dataset has been saved to {outfile} with the specified format.")

split dataset into train and test

In [None]:
import json
from sklearn.model_selection import train_test_split

# Paths
original_data_path = "data/sample_5_percent.jsonl"
train_data_path = "data/train.jsonl"
test_data_path = "data/test.jsonl"

# Function to split JSONL file
def split_jsonl_file(input_path, train_path, test_path, test_size=0.2):
    with open(input_path, "r") as f:
        lines = [json.loads(line) for line in f]
    
    train_lines, test_lines = train_test_split(lines, test_size=test_size, random_state=42)
    
    # Save split datasets
    with open(train_path, "w") as train_file:
        for line in train_lines:
            train_file.write(json.dumps(line) + "\n")
    
    with open(test_path, "w") as test_file:
        for line in test_lines:
            test_file.write(json.dumps(line) + "\n")

if __name__ == "__main__":
    # Perform the split
    split_jsonl_file(original_data_path, train_data_path, test_data_path)
    print(f"Data split completed. Train: {train_data_path}, Test: {test_data_path}")


In [None]:
!mkdir -p models/pythia-160m
!huggingface-cli download EleutherAI/pythia-160m --local-dir ./models/pythia-160m
!cd ../..

In [None]:
!wandb login 7077b7416aa6d8dd6e87ab0b9150b82abed30bd1

train + evaluate

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score
import numpy as np

# Specify the local directory where the model was downloaded
model_path = "./models/pythia-160m"

# Load the tokenizer and model for sequence classification
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)  # Binary classification

# Add padding token if it doesn't exist and set it as the pad token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))  # Resize model embeddings to match the new pad token

# Explicitly set pad_token_id in model configuration
model.config.pad_token_id = tokenizer.pad_token_id

# LoRA Configuration
lora_config = LoraConfig(
    task_type="SEQ_CLS",   # Sequence classification
    inference_mode=False,
    r=16,                  # LoRA rank
    lora_alpha=32,         # Scaling factor
    lora_dropout=0.1       # Regularization
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

# Freeze the first few layers of GPT-NeoX
num_layers_to_freeze = 6  # Adjust based on model depth and dataset size

# For GPT-NeoX, transformer layers are in model.base_model.gpt_neox.layers
for layer in model.base_model.gpt_neox.layers[:num_layers_to_freeze]:
    for param in layer.parameters():
        param.requires_grad = False

# Always ensure the classification head and LoRA layers are trainable
model.print_trainable_parameters()  # Check trainable parameters

# Load the dataset
data_path = "data/sample_5_percent.jsonl"
dataset = load_dataset("json", data_files=data_path)

# Preprocessing function for tokenization and label mapping
def preprocess_function(examples):
    # Tokenize the text
    inputs = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
    inputs["labels"] = examples["label"]  # Use label for classification
    return inputs

# Tokenize the dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Split the dataset for training and evaluation
split_datasets = tokenized_dataset["train"].train_test_split(test_size=0.2)
train_dataset = split_datasets["train"]
eval_dataset = split_datasets["test"]

# Define a function to compute accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Take the highest probability class
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./models/pythia-160m-finetuned-classifier-lora",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    save_strategy="epoch",     # Save the model at the end of each epoch
    evaluation_strategy="epoch",
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=100,
    load_best_model_at_end=True,
    learning_rate=1e-4,        # Adjusted for PEFT
    fp16=True,                 # Enable mixed precision training if supported
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  # Add this line
)

# Train the model
trainer.train()

# Save the final model
model.save_pretrained("./models/pythia-160m-finetuned-classifier-lora")
tokenizer.save_pretrained("./models/pythia-160m-finetuned-classifier-lora")

print("Model fine-tuning completed and saved to './models/pythia-160m-finetuned-classifier-lora'")

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score
import numpy as np

# Paths for train and test data
train_data_path = "data/train.jsonl"
test_data_path = "data/test.jsonl"

# Specify the local directory where the model was downloaded
model_path = "./models/pythia-160m"

# Load the tokenizer and model for sequence classification
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)  # Binary classification

# Add padding token if it doesn't exist and set it as the pad token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))  # Resize model embeddings to match the new pad token

# Explicitly set pad_token_id in model configuration
model.config.pad_token_id = tokenizer.pad_token_id

# LoRA Configuration
lora_config = LoraConfig(
    task_type="SEQ_CLS",   # Sequence classification
    inference_mode=False,
    r=16,                  # LoRA rank
    lora_alpha=32,         # Scaling factor
    lora_dropout=0.1       # Regularization
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

# Freeze the first few layers of GPT-NeoX
num_layers_to_freeze = 6  # Adjust based on model depth and dataset size

# For GPT-NeoX, transformer layers are in model.base_model.gpt_neox.layers
for layer in model.base_model.gpt_neox.layers[:num_layers_to_freeze]:
    for param in layer.parameters():
        param.requires_grad = False

# Always ensure the classification head and LoRA layers are trainable
model.print_trainable_parameters()  # Check trainable parameters

# Load the split datasets
train_dataset = load_dataset("json", data_files=train_data_path)["train"]
test_dataset = load_dataset("json", data_files=test_data_path)["train"]

# Preprocessing function for tokenization and label mapping
def preprocess_function(examples):
    # Tokenize the text
    inputs = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
    inputs["labels"] = examples["label"]  # Use label for classification
    return inputs

# Tokenize the datasets
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

# Define a function to compute accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Take the highest probability class
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./models/pythia-160m-finetuned-classifier-lora",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    save_strategy="epoch",     # Save the model at the end of each epoch
    evaluation_strategy="epoch",
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=100,
    load_best_model_at_end=True,
    learning_rate=1e-4,        # Adjusted for PEFT
    fp16=True,                 # Enable mixed precision training if supported
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

# Save the final model
model.save_pretrained("./models/pythia-160m-finetuned-classifier-lora")
tokenizer.save_pretrained("./models/pythia-160m-finetuned-classifier-lora")

print("Model fine-tuning completed and saved to './models/pythia-160m-finetuned-classifier-lora'")

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")


Evaluation on untrained model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score
import numpy as np

# Specify the paths for the train and test datasets
train_data_path = "data/train.jsonl"
test_data_path = "data/test.jsonl"

# Specify the local directory where the model was downloaded
model_path = "./models/pythia-160m"

# Load the tokenizer and model for sequence classification
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)  # Binary classification

# Add padding token if it doesn't exist and set it as the pad token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))  # Resize model embeddings to match the new pad token

# Explicitly set pad_token_id in model configuration
model.config.pad_token_id = tokenizer.pad_token_id

# LoRA Configuration
lora_config = LoraConfig(
    task_type="SEQ_CLS",   # Sequence classification
    inference_mode=False,
    r=16,                  # LoRA rank
    lora_alpha=32,         # Scaling factor
    lora_dropout=0.1       # Regularization
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

# Load the train and test datasets
train_dataset = load_dataset("json", data_files=train_data_path)["train"]
test_dataset = load_dataset("json", data_files=test_data_path)["train"]

# Preprocessing function for tokenization and label mapping
def preprocess_function(examples):
    # Tokenize the text
    inputs = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
    inputs["labels"] = examples["label"]  # Use label for classification
    return inputs

# Tokenize the datasets
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

# Define a function to compute accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Take the highest probability class
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

# Set up evaluation arguments
evaluation_args = TrainingArguments(
    output_dir="./models/pythia-160m-eval",
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    fp16=True,  # Enable mixed precision evaluation if supported
)

# Initialize the Trainer for evaluation only
trainer = Trainer(
    model=model,
    args=evaluation_args,
    train_dataset=tokenized_train_dataset,  # Optional: If you're training as well
    eval_dataset=tokenized_test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Evaluate the untrained model
eval_results = trainer.evaluate()
print(f"Evaluation Results (Untrained Model): {eval_results}")


printing generated outputs before classification head.

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import numpy as np
from datasets import load_dataset
from peft import PeftModel

# Path to the fine-tuned model and test data
model_path = "./models/pythia-160m-finetuned-classifier-lora"
base_model_path = "./models/pythia-160m"  # Base pre-trained model path
test_data_path = "data/test.jsonl"

# Load the tokenizer from the fine-tuned directory
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Add padding token if not already defined
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load the base model
base_model = AutoModelForSequenceClassification.from_pretrained(
    base_model_path,
    num_labels=2
)

# Resize the base model's embedding layer to match the tokenizer
base_model.resize_token_embeddings(len(tokenizer))

# Set the padding token ID in the model configuration
base_model.config.pad_token_id = tokenizer.pad_token_id

# Load the LoRA adapters into the resized base model
model = PeftModel.from_pretrained(base_model, model_path)

# Ensure the model is in evaluation mode
model.eval()

# Load the test dataset
test_dataset = load_dataset("json", data_files=test_data_path)["train"]

# Extract the text prompts from the dataset
test_prompts = test_dataset["text"][:10]  # Select only the first 10 inputs

# Tokenize the test prompts
inputs = tokenizer(
    test_prompts,
    padding=True,  # Enable padding
    truncation=True,
    max_length=128,
    return_tensors="pt"
)

# Move tensors to the appropriate device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Pass the inputs through the model to get hidden states
with torch.no_grad():
    outputs = model.base_model(**inputs, output_hidden_states=True)
    # Extract the last hidden state (before the classification head)
    hidden_states = outputs.hidden_states[-1]  # Last layer's hidden states
    pooled_embeddings = hidden_states[:, 0, :]  # CLS token's embedding for each prompt

# Convert embeddings to a numpy array for saving
pooled_embeddings_np = pooled_embeddings.cpu().numpy()

# Save the embeddings and corresponding prompts to a JSON file
output_data = {
    "prompts": test_prompts,
    "embeddings": pooled_embeddings_np.tolist()
}

output_file = "data/test_prompt_embeddings.json"
with open(output_file, "w") as f:
    json.dump(output_data, f, indent=4)

print(f"Embeddings saved to {output_file}")


Some weights of GPTNeoXForSequenceClassification were not initialized from the model checkpoint at ./models/pythia-160m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embeddings saved to data/test_prompt_embeddings.json
