In [1]:
# Uninstall potentially conflicting packages
!pip uninstall -y transformers accelerate unsloth torch torchvision torchaudio

# Install base packages
!pip install unsloth

# Install dependencies
!pip install -q transformers accelerate peft
!pip install -q datasets evaluate bitsandbytes trl
!pip install -q torch torchvision torchaudio

# Install Colab-optimized unsloth
!pip uninstall unsloth -y
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Install other tools
!pip install pandas scikit-learn
!pip install -q ipywidgets

Found existing installation: transformers 4.46.3
Uninstalling transformers-4.46.3:
  Successfully uninstalled transformers-4.46.3
Found existing installation: accelerate 1.1.1
Uninstalling accelerate-1.1.1:
  Successfully uninstalled accelerate-1.1.1
[0mFound existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Successfully uninstalled torch-2.5.1+cu121
Found existing installation: torchvision 0.20.1+cu121
Uninstalling torchvision-0.20.1+cu121:
  Successfully uninstalled torchvision-0.20.1+cu121
Found existing installation: torchaudio 2.5.1+cu121
Uninstalling torchaudio-2.5.1+cu121:
  Successfully uninstalled torchaudio-2.5.1+cu121
Collecting unsloth
  Downloading unsloth-2024.12.4-py3-none-any.whl.metadata (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2024.11.8 (from unsloth)
  Downloading unsloth_zoo-2024.12.1-py3-none-any.whl.metadata (16 kB)
Collect

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m77.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m104.9 MB/s[0m eta [36m0:00:00[0m
[?25hFound existing installation: unsloth 2024.12.4
Uninstalling unsloth-2024.12.4:
  Successfully uninstalled unsloth-2024.12.4
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-h7t18g5n/unsloth_b8ddca38ab0e4adaa723e8d0d9cd3709
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-h7t18g5n/unsloth_b8ddca38ab0e4adaa723e8d0d9cd3709
  Resolved https://github.com/unslothai/unsloth.git to commit 85f1fa096afde5efe2fb8521d8ceec8d13

Preparations
------

In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Environment setup
import os
import warnings
import random
import numpy as np
import torch
from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split
import pandas as pd
import gc
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments, TrainerCallback
import transformers
import accelerate
import json

# Print versions
print(f"PyTorch version: {torch.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"Accelerate version: {accelerate.__version__}")

# Configure environment
os.environ["CUDA_VISIBLE_DEVICES"]="0"
warnings.filterwarnings('ignore')
torch.set_float32_matmul_precision('high')

# Set random seeds
def set_seeds(seed=3407):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# Memory management utilities
def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()

def print_gpu_utilization():
    print("\nGPU Memory Usage:")
    !nvidia-smi | grep -E "Memory|Volatile"

def print_detailed_gpu_info():
    print("\nDetailed GPU Memory Info:")
    print(f"Allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
    print(f"Cached: {torch.cuda.memory_reserved()/1024**2:.2f} MB")
    print(f"Max Allocated: {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")

Mounted at /content/drive
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
PyTorch version: 2.5.1+cu124
Transformers version: 4.46.3
Accelerate version: 1.2.1


set up wandb

In [3]:
!pip install wandb

import wandb
wandb.login()



[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

define the sweep

In [4]:
sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'loss',
    'goal': 'minimize'
    }

sweep_config['metric'] = metric

parameters_dict = {
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3
        },
    'warmup_ratio': {
        'values': [0.05, 0.1, 0.2]
        },
    'weight_decay': {
        'values': [0.01, 0.03, 0.05]
        },
    'per_device_train_batch_size': {
        'values': [2, 4]
        },
    'gradient_accumulation_steps': {
        'values': [2, 4, 8]
        },
    'epochs': {
        'value': 1
        }
}

sweep_config['parameters'] = parameters_dict
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'epochs': {'value': 1},
                'gradient_accumulation_steps': {'values': [2, 4, 8]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.001,
                                  'min': 1e-05},
                'per_device_train_batch_size': {'values': [2, 4]},
                'warmup_ratio': {'values': [0.05, 0.1, 0.2]},
                'weight_decay': {'values': [0.01, 0.03, 0.05]}}}


In [5]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'epochs': {'value': 1},
                'gradient_accumulation_steps': {'values': [2, 4, 8]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.001,
                                  'min': 1e-05},
                'per_device_train_batch_size': {'values': [2, 4]},
                'warmup_ratio': {'values': [0.05, 0.1, 0.2]},
                'weight_decay': {'values': [0.01, 0.03, 0.05]}}}


Define class
---

In [6]:
class MemoryCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 50 == 0:  # 每50步清理一次
            clear_memory()
            print_detailed_gpu_info()

class AIGenerationDetector:
    def __init__(self, max_seq_length=2048, save_dir='/content/drive/MyDrive/ai_detection_model', data_dir='/content/drive/MyDrive/ai_dataset'):
        """
        Initializes the AIGenerationDetector class.
        Args:
            max_seq_length (int): Maximum sequence length for the model.
            save_dir (str): Directory to save the model and checkpoints.
            data_dir (str): Directory to save the dataset.
        """
        self.max_seq_length = max_seq_length
        self.save_dir = save_dir
        self.data_dir = data_dir
        self.model = None
        self.tokenizer = None
        self.train_dataset = None
        self.eval_dataset = None
        self.test_dataset = None
        os.makedirs(self.save_dir, exist_ok=True)
        os.makedirs(self.data_dir, exist_ok=True)


    def download_data(self):
        """
        Downloads the dataset from GitHub and saves to Google Drive.
        """

        github_base_url = "https://raw.githubusercontent.com/botianzhe/CHEAT/main/data/"
        file_names = [
            "ieee-chatgpt-fusion.jsonl",
            "ieee-chatgpt-generation.jsonl",
            "ieee-init.jsonl",
            "ieee-chatgpt-polish.jsonl",
            "ieee-chatgpt-fusion.xlsx",
            "ieee-chatgpt-generation.xlsx",
            "ieee-init.xlsx",
            "ieee-chatgpt-polish.xlsx"
        ]
        print("Downloading data from Github...")
        for file_name in file_names:
              file_url = github_base_url + file_name
              output_path = os.path.join(self.data_dir, file_name) # saving location

              if not os.path.exists(output_path): # download only when the data does not exist.
                print(f"Downloading {file_name}...")
                !wget "{file_url}" -O "{output_path}" # using wget for direct download
              else:
                 print(f"File {file_name} already exists. Skipping...")
        print("All data downloaded!")

    def setup_model(self):
        """
        Loads the pre-trained model and tokenizer and configures the PEFT model.
        """
        clear_memory()
        print("Loading model...")

        try:
            model, tokenizer = FastLanguageModel.from_pretrained(
                model_name="unsloth/Meta-Llama-3.1-8B",
                max_seq_length=self.max_seq_length,
                load_in_4bit=True,
            )

            model = FastLanguageModel.get_peft_model(
                model,
                r=16,
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
                lora_alpha=16,
                lora_dropout=0.1,
                bias="none",
                use_gradient_checkpointing=True,
                random_state=3407,
                use_rslora=True,
            )

            self.model = model
            self.tokenizer = tokenizer
            print("Model loaded successfully!")

        except Exception as e:
            print(f"Error loading model: {str(e)}")
            raise

    def prepare_datasets(self, max_samples=1000):
        """
        Prepares the training and evaluation datasets.
        Args:
            max_samples (int): Maximum number of training samples to use.
        """
        clear_memory()
        print("Preparing datasets...")
        try:
            all_data = []  # Initialize a list to hold all training examples.

            # Data holders for each jsonl file
            init_data = {}
            generation_data = {}
            polish_data = {}
            fusion_data = []

            # Load data from each file
            for filename in os.listdir(self.data_dir):
                if filename.endswith("init.jsonl"):
                    filepath = os.path.join(self.data_dir, filename)
                    with open(filepath, 'r', encoding='utf-8') as f:
                        for line in f:
                            try:
                                data_item = json.loads(line)
                                init_data[data_item['id']] = data_item['abstract']  # use id as key
                            except json.JSONDecodeError:
                                print(f"Warning: Skipping invalid JSON line in {filename}")

                elif filename.endswith("generation.jsonl"):
                    filepath = os.path.join(self.data_dir, filename)
                    with open(filepath, 'r', encoding='utf-8') as f:
                        for line in f:
                            try:
                                data_item = json.loads(line)
                                generation_data[data_item['id']] = data_item['abstract']
                            except json.JSONDecodeError:
                                print(f"Warning: Skipping invalid JSON line in {filename}")


                elif filename.endswith("polish.jsonl"):
                    filepath = os.path.join(self.data_dir, filename)
                    with open(filepath, 'r', encoding='utf-8') as f:
                        for line in f:
                            try:
                                data_item = json.loads(line)
                                polish_data[data_item['id']] = data_item['abstract']
                            except json.JSONDecodeError:
                                print(f"Warning: Skipping invalid JSON line in {filename}")

                elif filename.endswith("fusion.jsonl"):
                    filepath = os.path.join(self.data_dir, filename)
                    with open(filepath, 'r', encoding='utf-8') as f:
                        for line in f:
                            try:
                                data_item = json.loads(line)
                                fusion_data.append(data_item)
                            except json.JSONDecodeError:
                                print(f"Warning: Skipping invalid JSON line in {filename}")


            # Create training data by pairing the init, generation, and polish abstracts
            common_ids = set(init_data.keys()) & set(generation_data.keys()) & set(polish_data.keys())
            for id in common_ids:
                all_data.append({'text': init_data[id], 'is_ai_generated': "False"}) # human written data
                all_data.append({'text': generation_data[id], 'is_ai_generated': "True"}) # ai generated data
                all_data.append({'text': polish_data[id], 'is_ai_generated': "True"}) # ai generated data

            # Create a Dataset from list of dicts.
            dataset = Dataset.from_list(all_data)

            # Take required number of samples, if specified
            if max_samples > 0:
                dataset = dataset.shuffle(seed=3407).select(range(max_samples))

            # Split train and val set
            train_idx, val_idx = train_test_split(
                range(len(dataset)),
                test_size=0.1,
                random_state=3407
            )

            train_examples = [self.process_training_example(dataset[i]) for i in train_idx]
            eval_examples = [self.process_training_example(dataset[i]) for i in val_idx]

            self.train_dataset = Dataset.from_list(train_examples)
            self.eval_dataset = Dataset.from_list(eval_examples)

            del train_examples, eval_examples, dataset
            clear_memory()
            print(f"Datasets prepared! Train size: {len(self.train_dataset)}, Eval size: {len(self.eval_dataset)}")


        except Exception as e:
            print(f"Error preparing datasets: {str(e)}")
            raise

    def create_test_dataset(self):
        """
        Prepares the test dataset from fusion data.
        """
        clear_memory()
        print("Preparing test dataset...")
        try:
            fusion_data = []

            for filename in os.listdir(self.data_dir):
                if filename.endswith("fusion.jsonl"):
                    filepath = os.path.join(self.data_dir, filename)
                    with open(filepath, 'r', encoding='utf-8') as f:
                        for line in f:
                            try:
                                data_item = json.loads(line)
                                if 'abstract' in data_item:
                                    fusion_data.append({'text': data_item['abstract']})
                                else:
                                    print(f"Warning: Skipping entry with no 'abstract' in {filename}")
                            except json.JSONDecodeError:
                                print(f"Warning: Skipping invalid JSON line in {filename}")

            self.test_dataset = Dataset.from_list(fusion_data)
            print(f"Test dataset prepared! Size: {len(self.test_dataset)}")

        except Exception as e:
            print(f"Error preparing test dataset: {str(e)}")
            raise


    def process_training_example(self, example):
         """
         Processes a single training example to create the prompt.
         Args:
            example (dict): A dictionary containing the 'text' and 'is_ai_generated' fields.
         Returns:
            dict: A dictionary containing the processed text.
         """
         text = example['text']
         is_ai_generated = example['is_ai_generated']  # Get the field from the dataset

         prompt = (
            "You are an expert in distinguishing between text written by humans and text generated by AI.\n\n"
            f"Given Text: {text}\n\n"
            "Based on careful analysis, is the text generated by an AI? Respond with EXACTLY 'True' or 'False'.\n"
            f"Answer: {str(is_ai_generated)}"
         ) + self.tokenizer.eos_token

         return {"text": prompt}


    def process_test_example(self, example):
        """
        Processes a single test example to create the prompt.
        Args:
            example (dict): A dictionary containing the 'text' field.
        Returns:
            str: The generated prompt for testing.
        """
        text = example['text']

        prompt = (
           "You are an expert in distinguishing between text written by humans and text generated by AI.\n\n"
           f"Given Text: {text}\n\n"
           "Based on careful analysis, is the text generated by an AI? Respond with EXACTLY 'True' or 'False'.\n"
        )
        return prompt

    def setup_training_args(self, config=None):
        """
        Sets up training arguments, either default or for hyperparameter sweeping.
        Args:
           config (dict, optional): Configuration for hyperparameter sweep. Defaults to None.
        Returns:
           TrainingArguments: Training arguments based on the given configuration.
        """
        if config is None:
            # Default training arguments
            return TrainingArguments(
                output_dir=os.path.join(self.save_dir, "checkpoints"),
                per_device_train_batch_size=2,
                gradient_accumulation_steps=8,
                warmup_ratio=0.1,
                num_train_epochs=3,
                learning_rate=0.0006026,
                fp16=True,
                logging_steps=10,
                optim="adamw_torch",
                weight_decay=0.05,
                lr_scheduler_type="cosine",
                seed=3407,
                evaluation_strategy="steps",
                eval_steps=50,
                save_strategy="steps",
                save_steps=50,
                load_best_model_at_end=True,
                metric_for_best_model="eval_loss",
                gradient_checkpointing=True,
                max_grad_norm=0.3,
                report_to="none",
                remove_unused_columns=True,
                dataloader_pin_memory=False
            )
        else:
            # Training arguments for hyperparameter sweep
            return TrainingArguments(
                output_dir=os.path.join(self.save_dir, "checkpoints"),
                per_device_train_batch_size=config.per_device_train_batch_size,
                gradient_accumulation_steps=config.gradient_accumulation_steps,
                warmup_ratio=config.warmup_ratio,
                num_train_epochs=config.epochs,
                learning_rate=config.learning_rate,
                fp16=True,
                logging_steps=10,
                optim="adamw_torch",
                weight_decay=config.weight_decay,
                lr_scheduler_type="cosine",
                seed=3407,
                evaluation_strategy="steps",
                eval_steps=50,
                save_strategy="steps",
                save_steps=50,
                load_best_model_at_end=True,
                metric_for_best_model="eval_loss",
                gradient_checkpointing=True,
                max_grad_norm=0.3,
                report_to="wandb",
                remove_unused_columns=True,
                dataloader_pin_memory=False,
            )

    def train(self):
        """
        Trains the model using the SFTTrainer.
        """
        clear_memory()
        print("Starting training...")

        try:
            trainer = SFTTrainer(
                model=self.model,
                tokenizer=self.tokenizer,
                train_dataset=self.train_dataset,
                eval_dataset=self.eval_dataset,
                dataset_text_field="text",
                max_seq_length=self.max_seq_length,
                dataset_num_proc=2,
                packing=False,
                args=self.setup_training_args(),
                callbacks=[MemoryCallback()]
            )

            trainer.train()

            final_save_path = os.path.join(self.save_dir, "final_model")
            self.model.save_pretrained(final_save_path)
            self.tokenizer.save_pretrained(final_save_path)
            print(f"Training completed! Model saved to {final_save_path}")

        except Exception as e:
            print(f"Error during training: {str(e)}")
            raise


    def sweep(self):
       """
        Conducts a hyperparameter sweep using Weights & Biases.
        """
       print("Starting sweeping...")

       with wandb.init():
            config = wandb.config
            training_args = self.setup_training_args(config)

            try:
                trainer = SFTTrainer(
                    model=self.model,
                    tokenizer=self.tokenizer,
                    train_dataset=self.train_dataset,
                    eval_dataset=self.eval_dataset,
                    dataset_text_field="text",
                    max_seq_length=self.max_seq_length,
                    dataset_num_proc=2,
                    packing=False,
                    args=training_args
                    )
                trainer.train()

                final_save_path = os.path.join(self.save_dir, "final_model")
                self.model.save_pretrained(final_save_path)
                self.tokenizer.save_pretrained(final_save_path)
                print(f"Training completed! Model saved to {final_save_path}")

            except Exception as e:
                  print(f"Error during training: {str(e)}")
                  raise


    def generate_predictions(self, batch_size=16):
         """
        Generates predictions on the test dataset.
        Args:
           batch_size (int): Batch size for generating predictions.
        Returns:
           list: List of prediction values.
        """
         clear_memory()
         print("Generating predictions...")

         try:
            FastLanguageModel.for_inference(self.model)
            predictions = []
            # Convert test data to a list to support batch processing
            test_examples = list(self.test_dataset)
            total_batches = (len(test_examples) + batch_size - 1) // batch_size
            all_predictions = []

            # Process in batches
            for i in range(0, len(test_examples), batch_size):
                if i % (batch_size * 10) == 0:
                    print(f"Processing batch {i//batch_size}/{total_batches}")

                # Get current batch samples
                batch = test_examples[i:i + batch_size]
                prompts = [self.process_test_example(example) for example in batch]

                # Batch encoding
                inputs = self.tokenizer(
                    prompts,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=self.max_seq_length
                ).to("cuda")

                # Batch generation
                with torch.inference_mode():
                    outputs = self.model.generate(
                        **inputs,
                        max_new_tokens=8,  # Reduce the number of generated tokens as we only need True/False
                        temperature=0.1,
                        top_p=0.9,
                        do_sample=False,    # Disable sampling for faster generation
                        use_cache=True,
                        pad_token_id=self.tokenizer.pad_token_id,
                    )

                input_length = inputs['input_ids'].shape[1]
                responses = self.tokenizer.batch_decode(
                    [output[input_length:] for output in outputs],
                    skip_special_tokens=True
                )

                # Batch processing prediction results
                batch_predictions = ["true" in response.lower() for response in responses]
                all_predictions.extend(batch_predictions)

                # Periodically clear memory
                if i % (batch_size * 50) == 0:
                    clear_memory()

            print(f"Total predictions: {len(all_predictions)}")
            assert len(all_predictions) == len(test_examples)

            return all_predictions

         except Exception as e:
            print(f"Error generating predictions: {str(e)}")
            raise


    def create_submission(self):
        """
        Creates the submission file in CSV format using the generated predictions.
        """
        print("Creating submission file...")
        try:
            predictions = self.generate_predictions(batch_size=16)
            print(f"Generated predictions: {len(predictions)}")

            assert len(predictions) == len(self.test_dataset), \
                f"Prediction count mismatch! Expected {len(self.test_dataset)}, got {len(predictions)}"

            submission_df = pd.DataFrame({
                'ID': range(len(predictions)),
                'is_ai_generated': predictions
            })

            print(f"Submission DataFrame shape: {submission_df.shape}")

            submission_path = os.path.join(self.save_dir, 'submission.csv')
            submission_df.to_csv(submission_path, index=False)
            print(f"Submission saved to {submission_path}")

            saved_df = pd.read_csv(submission_path)
            print(f"Saved file shape: {saved_df.shape}")

        except Exception as e:
            print(f"Error creating submission: {str(e)}")
            raise

# Hyper Parameters sweeping

## 1. Initial the sweep

In [7]:
sweep_id = wandb.sweep(sweep_config, project="ai_detection")

Create sweep with ID: xr6gzwrx
Sweep URL: https://wandb.ai/bw2676-new-york-university/ai_detection/sweeps/xr6gzwrx


## 2. Run sweep agent

In [8]:
def run_sweep(data_dir):
    """
    Runs the hyperparameter sweep.
    Args:
      data_dir(str): Path to the data directory.
    """
    trainer = AIGenerationDetector(data_dir = data_dir) # set data directory path
    trainer.download_data()
    trainer.setup_model()
    trainer.prepare_datasets(max_samples=1000)
    trainer.sweep()

In [None]:
wandb.agent(sweep_id, lambda:run_sweep('/content/drive/MyDrive/ai_dataset'), count = 50)

[34m[1mwandb[0m: Agent Starting Run: pge4yv0w with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0004176073291840377
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.12.4 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Model loaded successfully!
Preparing datasets...
Datasets prepared! Train size: 900, Eval size: 100
Starting sweeping...


[34m[1mwandb[0m: Currently logged in as: [33mbw2676[0m ([33mbw2676-new-york-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6384,1.652759
100,1.701,1.634398


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▃▃▂▂█▁▁▁▁▁▁
train/learning_rate,▇██▇▆▅▄▃▂▁▁
train/loss,█▃▂▂▁▃▂▂▂▂▁

0,1
eval/loss,1.6344
eval/runtime,32.2259
eval/samples_per_second,3.103
eval/steps_per_second,0.403
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.63568
train/learning_rate,0.0
train/loss,1.6693


[34m[1mwandb[0m: Agent Starting Run: dje5zkcl with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.00015982948634405152
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7568,1.662521


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▄▆▂▁
train/learning_rate,▇█▆▃▁
train/loss,█▂▁▁▂

0,1
eval/loss,1.66252
eval/runtime,32.2752
eval/samples_per_second,3.098
eval/steps_per_second,0.403
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.46202
train/learning_rate,1e-05
train/loss,1.7568


[34m[1mwandb[0m: Agent Starting Run: 2m1viabu with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0007755405694524066
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7892,1.746292
100,1.6907,1.72754
150,1.7345,1.714156
200,1.7817,1.667445


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▆▅▁
eval/runtime,▅█▇▁
eval/samples_per_second,▄▁▂█
eval/steps_per_second,▁▁▁█
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▃█▁▂▄▅▅▅▄▂▃█▄▂▄▃▂▁▂▁▁▂
train/learning_rate,▃▄▆▇███▇▇▇▆▅▅▄▄▃▃▂▂▁▁▁
train/loss,█▄▂▂▂▂▂▃▂▁▄▃▂▂▂▂▃▂▁▂▂▁

0,1
eval/loss,1.66744
eval/runtime,32.1617
eval/samples_per_second,3.109
eval/steps_per_second,0.404
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,1.26811
train/learning_rate,0.0
train/loss,1.6988


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2t8v91h8 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.00026075339388766875
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6388,1.660524
100,1.6982,1.631351


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,█▂▂▂▁▁▁▁▁▁▁
train/learning_rate,▇██▇▆▅▄▃▂▁▁
train/loss,█▃▂▂▁▃▁▁▂▂▁

0,1
eval/loss,1.63135
eval/runtime,32.3199
eval/samples_per_second,3.094
eval/steps_per_second,0.402
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.60714
train/learning_rate,0.0
train/loss,1.6676


[34m[1mwandb[0m: Agent Starting Run: jw5z354b with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 7.808626474665544e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7455,1.684569
100,1.6198,1.66486
150,1.6811,1.654027
200,1.7547,1.649545


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▄▂▁
eval/runtime,▆█▁▆
eval/samples_per_second,▃▁█▄
eval/steps_per_second,▃▁█▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▃█▄▄▃▂▂▄▃▂▂▂▂▃▂▂▃▁▃▂▁▂
train/learning_rate,▄▇███▇▇▇▆▆▅▅▄▄▃▃▂▂▁▁▁▁
train/loss,█▇▃▂▂▂▁▂▁▁▃▂▂▂▂▂▂▂▁▂▂▂

0,1
eval/loss,1.64955
eval/runtime,32.2926
eval/samples_per_second,3.097
eval/steps_per_second,0.403
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,1.07455
train/learning_rate,0.0
train/loss,1.6801


[34m[1mwandb[0m: Agent Starting Run: hw9r6kax with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0006962584093021807
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6518,1.678705
100,1.7091,1.644284


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,█▄▁▂▂▂▁▁▁▁▁
train/learning_rate,██▇▆▅▄▃▂▂▁▁
train/loss,█▃▃▂▁▄▂▂▃▂▁

0,1
eval/loss,1.64428
eval/runtime,32.1092
eval/samples_per_second,3.114
eval/steps_per_second,0.405
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.76862
train/learning_rate,0.0
train/loss,1.6771


[34m[1mwandb[0m: Agent Starting Run: euxv3688 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 0.00026737887916873106
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7483,1.655112


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▂▁▁▁
train/learning_rate,█▇▅▂▁
train/loss,█▂▁▁▂

0,1
eval/loss,1.65511
eval/runtime,31.8969
eval/samples_per_second,3.135
eval/steps_per_second,0.408
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.42758
train/learning_rate,1e-05
train/loss,1.7483


[34m[1mwandb[0m: Agent Starting Run: rjmkrkm4 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.263519141385965e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.773,1.769902
100,1.7539,1.690941


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▄▅█▆▃▅▃▂▂▁▄
train/learning_rate,▄▇█▇▇▅▄▃▂▁▁
train/loss,█▇▅▃▁▂▁▁▂▁▁

0,1
eval/loss,1.69094
eval/runtime,31.9223
eval/samples_per_second,3.133
eval/steps_per_second,0.407
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,1.20348
train/learning_rate,0.0
train/loss,1.7266


[34m[1mwandb[0m: Agent Starting Run: e71pkd98 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 4.7296306921428745e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 8
\        /    Total batch size = 32 | Total steps = 28
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
train/epoch,▁▅█
train/global_step,▁▅█
train/grad_norm,█▁
train/learning_rate,█▁
train/loss,█▁

0,1
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,28.0
train/grad_norm,0.94553
train/learning_rate,1e-05
train/loss,1.9673
train_loss,2.07135
train_runtime,953.028
train_samples_per_second,0.944
train_steps_per_second,0.029


[34m[1mwandb[0m: Agent Starting Run: wx5z8etw with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 2.219291171314665e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7572,1.759955
100,1.7572,1.694422


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▄▄█▄▃▃▃▂▁▁▄
train/learning_rate,▇██▇▆▅▄▃▂▁▁
train/loss,█▇▄▂▁▂▁▁▂▁▁

0,1
eval/loss,1.69442
eval/runtime,32.4509
eval/samples_per_second,3.082
eval/steps_per_second,0.401
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,1.20879
train/learning_rate,0.0
train/loss,1.7298


[34m[1mwandb[0m: Agent Starting Run: 3vul4iax with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 1.8877709574182864e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.9387,1.852063


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,▁▅█▅▁
train/learning_rate,█▇▅▂▁
train/loss,█▆▃▁▁

0,1
eval/loss,1.85206
eval/runtime,32.3584
eval/samples_per_second,3.09
eval/steps_per_second,0.402
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.99287
train/learning_rate,0.0
train/loss,1.9387


[34m[1mwandb[0m: Agent Starting Run: rdkq2ib1 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 5.424459712923015e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7772,1.686852


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▆▄▁▁
train/learning_rate,█▇▄▂▁
train/loss,█▃▂▁▂

0,1
eval/loss,1.68685
eval/runtime,31.8792
eval/samples_per_second,3.137
eval/steps_per_second,0.408
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.5712
train/learning_rate,0.0
train/loss,1.7772


[34m[1mwandb[0m: Agent Starting Run: 448qwqdq with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.00020330285529291783
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.643,1.665468
100,1.7013,1.632483


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▃█▃▂▁▁▁▁▁▁▁
train/learning_rate,▄▇█▇▇▅▄▃▂▁▁
train/loss,█▄▂▂▁▂▁▁▂▂▁

0,1
eval/loss,1.63248
eval/runtime,32.0171
eval/samples_per_second,3.123
eval/steps_per_second,0.406
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.64767
train/learning_rate,0.0
train/loss,1.6702


[34m[1mwandb[0m: Agent Starting Run: qd7fvnds with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 3.1491473028256675e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 8
\        /    Total batch size = 32 | Total steps = 28
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
train/epoch,▁▅█
train/global_step,▁▅█
train/grad_norm,▁█
train/learning_rate,█▁
train/loss,█▁

0,1
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,28.0
train/grad_norm,1.2255
train/learning_rate,1e-05
train/loss,2.1222
train_loss,2.18367
train_runtime,953.0345
train_samples_per_second,0.944
train_steps_per_second,0.029


[34m[1mwandb[0m: Agent Starting Run: sucq9ojt with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 2.6848547971023467e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 8
\        /    Total batch size = 32 | Total steps = 28
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
train/epoch,▁▅█
train/global_step,▁▅█
train/grad_norm,▁█
train/learning_rate,█▁
train/loss,█▁

0,1
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,28.0
train/grad_norm,1.07596
train/learning_rate,1e-05
train/loss,2.1899
train_loss,2.239
train_runtime,953.1132
train_samples_per_second,0.944
train_steps_per_second,0.029


[34m[1mwandb[0m: Agent Starting Run: l44hpsz5 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 4.779993120295755e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6668,1.687358
100,1.7338,1.668503


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,▁▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▆█▆▆▆▂▅▁▁▁▅
train/learning_rate,▄▇█▇▇▅▄▃▂▁▁
train/loss,█▇▃▂▁▂▁▁▂▂▁

0,1
eval/loss,1.6685
eval/runtime,32.3756
eval/samples_per_second,3.089
eval/steps_per_second,0.402
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,1.09142
train/learning_rate,0.0
train/loss,1.7057


[34m[1mwandb[0m: Agent Starting Run: v34oo2c4 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.00016713951562914522
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7452,1.686805
100,1.6131,1.651304
150,1.6666,1.637525
200,1.7364,1.628857


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▄▂▁
eval/runtime,▁█▆▇
eval/samples_per_second,█▁▃▂
eval/steps_per_second,█▁▂▂
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▃█▅▃▅▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂
train/learning_rate,▃▄▆▇███▇▇▇▆▅▅▄▄▃▃▂▂▁▁▁
train/loss,█▇▃▂▂▂▁▂▁▁▃▂▂▁▂▂▂▂▁▂▂▁

0,1
eval/loss,1.62886
eval/runtime,32.2924
eval/samples_per_second,3.097
eval/steps_per_second,0.403
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,0.90923
train/learning_rate,0.0
train/loss,1.6591


[34m[1mwandb[0m: Agent Starting Run: lynsyw17 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 1.2731175498249936e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.8753,1.864651
100,1.8433,1.783426


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▃▃▄█▄▃▃▂▁▁▂
train/learning_rate,██▇▆▅▄▃▂▂▁▁
train/loss,█▇▅▄▂▃▁▁▂▁▁

0,1
eval/loss,1.78343
eval/runtime,32.3346
eval/samples_per_second,3.093
eval/steps_per_second,0.402
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,1.16601
train/learning_rate,0.0
train/loss,1.8176


[34m[1mwandb[0m: Agent Starting Run: 9gist310 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.0009099232922597412
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7494,1.647545


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,▃▁█▁▁
train/learning_rate,▇█▆▃▁
train/loss,█▂▂▁▂

0,1
eval/loss,1.64755
eval/runtime,32.1281
eval/samples_per_second,3.113
eval/steps_per_second,0.405
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.52215
train/learning_rate,4e-05
train/loss,1.7494


[34m[1mwandb[0m: Agent Starting Run: bs1f6ps4 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.0005177367912021509
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7326,1.635375


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▇▂▂▁
train/learning_rate,█▇▅▂▁
train/loss,█▂▂▁▂

0,1
eval/loss,1.63538
eval/runtime,32.2211
eval/samples_per_second,3.104
eval/steps_per_second,0.403
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.44531
train/learning_rate,2e-05
train/loss,1.7326


[34m[1mwandb[0m: Agent Starting Run: xe4kok2s with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 1.3052928447444756e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,2.0335,1.916908
100,1.6859,1.724355
150,1.7102,1.691367
200,1.7868,1.685981


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▂▁▁
eval/runtime,▂█▁▂
eval/samples_per_second,▆▁█▆
eval/steps_per_second,▅▁█▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▁▄▃▆▇▄▃▇▆▅▂▂▃▅▄▁▅▁▅▁▄█
train/learning_rate,▄▇███▇▇▇▆▆▅▅▄▄▃▃▂▂▂▁▁▁
train/loss,▇█▇▅▄▃▂▂▂▁▃▂▁▁▁▁▂▂▁▂▁▁

0,1
eval/loss,1.68598
eval/runtime,32.1415
eval/samples_per_second,3.111
eval/steps_per_second,0.404
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,1.92505
train/learning_rate,0.0
train/loss,1.714


[34m[1mwandb[0m: Agent Starting Run: s44r8rh5 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.0002413193093676099
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7486,1.654258


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▂▂▁▁
train/learning_rate,█▇▄▂▁
train/loss,█▂▂▁▂

0,1
eval/loss,1.65426
eval/runtime,32.2614
eval/samples_per_second,3.1
eval/steps_per_second,0.403
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.42293
train/learning_rate,1e-05
train/loss,1.7486


[34m[1mwandb[0m: Agent Starting Run: np5v9xua with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0001441298210406579
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.643,1.661774
100,1.7163,1.649782


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,█▂▂▂▁▁▂▁▁▁▁
train/learning_rate,██▇▆▅▄▃▂▂▁▁
train/loss,█▃▂▂▁▃▂▂▂▂▂

0,1
eval/loss,1.64978
eval/runtime,32.22
eval/samples_per_second,3.104
eval/steps_per_second,0.403
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.65881
train/learning_rate,0.0
train/loss,1.6878


[34m[1mwandb[0m: Agent Starting Run: 9wd9q7bj with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.00012078522030909396
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.759,1.666523


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▃▃▂▁
train/learning_rate,█▇▄▂▁
train/loss,█▂▂▁▂

0,1
eval/loss,1.66652
eval/runtime,32.232
eval/samples_per_second,3.103
eval/steps_per_second,0.403
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.48323
train/learning_rate,0.0
train/loss,1.759


[34m[1mwandb[0m: Agent Starting Run: tztf9g4h with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 0.00013193374492586924
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7586,1.665226


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,▁█▂▁▁
train/learning_rate,▇█▆▃▁
train/loss,█▂▁▁▂

0,1
eval/loss,1.66523
eval/runtime,32.3328
eval/samples_per_second,3.093
eval/steps_per_second,0.402
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.47233
train/learning_rate,1e-05
train/loss,1.7586


[34m[1mwandb[0m: Agent Starting Run: 0rg3k0hd with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 1.6187464961567345e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.8994,1.827409
100,1.6544,1.699555
150,1.7028,1.6827
200,1.7813,1.679242


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▂▁▁
eval/runtime,▃█▁▇
eval/samples_per_second,▆▁█▂
eval/steps_per_second,▁▁█▁
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▁▅▅█▆▃▁▅▆▆▂▂▄▇▃▁▆▁▄▁▂▇
train/learning_rate,▇███▇▇▇▆▆▅▅▄▄▃▃▂▂▂▁▁▁▁
train/loss,▇█▆▄▃▃▂▂▁▁▃▂▂▁▁▂▂▂▁▂▂▁

0,1
eval/loss,1.67924
eval/runtime,32.354
eval/samples_per_second,3.091
eval/steps_per_second,0.402
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,1.76289
train/learning_rate,0.0
train/loss,1.7069


[34m[1mwandb[0m: Agent Starting Run: w5hbnzct with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 9.388078354364212e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7616,1.66901


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▃▃▂▁
train/learning_rate,█▇▄▂▁
train/loss,█▂▁▁▂

0,1
eval/loss,1.66901
eval/runtime,32.2896
eval/samples_per_second,3.097
eval/steps_per_second,0.403
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.52553
train/learning_rate,0.0
train/loss,1.7616


[34m[1mwandb[0m: Agent Starting Run: 9g865y9x with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 0.00034238068260598095
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6315,1.648586
100,1.6974,1.629391


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,█▂▁▁▁▁▁▁▁▁▁
train/learning_rate,██▇▆▆▅▃▃▂▁▁
train/loss,█▃▂▂▁▃▂▂▃▂▁

0,1
eval/loss,1.62939
eval/runtime,32.3315
eval/samples_per_second,3.093
eval/steps_per_second,0.402
total_flos,1.065358355914752e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.6209
train/learning_rate,0.0
train/loss,1.6662


[34m[1mwandb[0m: Agent Starting Run: 4qc4t0at with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 8
[34m[1mwandb[0m: 	learning_rate: 2.052197346611538e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 8
\        /    Total batch size = 32 | Total steps = 28
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
train/epoch,▁▅█
train/global_step,▁▅█
train/grad_norm,▁█
train/learning_rate,█▁
train/loss,█▁

0,1
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,28.0
train/grad_norm,0.94557
train/learning_rate,0.0
train/loss,2.2321
train_loss,2.27602
train_runtime,956.8152
train_samples_per_second,0.941
train_steps_per_second,0.029


[34m[1mwandb[0m: Agent Starting Run: smas9xfx with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 2.0948303178464037e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7873,1.786882
100,1.7591,1.696197


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,▅▅█▇▄▅▄▂▂▁▅
train/learning_rate,▄▇█▇▇▅▄▃▂▁▁
train/loss,█▇▆▃▂▃▁▁▂▁▁

0,1
eval/loss,1.6962
eval/runtime,32.1806
eval/samples_per_second,3.107
eval/steps_per_second,0.404
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,1.23881
train/learning_rate,0.0
train/loss,1.732


[34m[1mwandb[0m: Agent Starting Run: ui0ul95u with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0004981291600886722
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 2
\        /    Total batch size = 8 | Total steps = 112
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.6424,1.668437
100,1.7002,1.635431


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁▁
train/epoch,▁▂▂▃▄▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▄▄▄▅▆▆▇▇██
train/grad_norm,█▅▁▂▁▁▁▁▁▁▁
train/learning_rate,▄▇█▇▇▅▄▃▂▁▁
train/loss,█▃▂▂▁▃▂▂▂▂▁

0,1
eval/loss,1.63543
eval/runtime,32.1627
eval/samples_per_second,3.109
eval/steps_per_second,0.404
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,112.0
train/grad_norm,0.70505
train/learning_rate,0.0
train/loss,1.6708


[34m[1mwandb[0m: Agent Starting Run: izzevfr8 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 4.2452574373786935e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.8313,1.7395
100,1.6265,1.673602
150,1.6891,1.664126
200,1.7637,1.660199


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,█▂▁▁
eval/runtime,▂▁█▇
eval/samples_per_second,▇█▁▃
eval/steps_per_second,▄█▁▄
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▃▆▆█▄▆▅▆▄▄▃▃▄▆▄▂▅▁▄▂▂▄
train/learning_rate,▃▄▆▇███▇▇▇▆▅▅▄▄▃▂▂▂▁▁▁
train/loss,▇█▆▄▃▂▁▂▁▁▃▂▂▁▂▂▂▂▁▂▂▂

0,1
eval/loss,1.6602
eval/runtime,32.2841
eval/samples_per_second,3.098
eval/steps_per_second,0.403
total_flos,1.0707175827456e+16
train/epoch,1.0
train/global_step,225.0
train/grad_norm,1.38976
train/learning_rate,0.0
train/loss,1.691


[34m[1mwandb[0m: Agent Starting Run: opzvy8mw with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 0.0003517177852785735
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.03


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7414,1.646601


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,█▂▁▁▁
train/learning_rate,█▇▄▂▁
train/loss,█▂▂▁▂

0,1
eval/loss,1.6466
eval/runtime,32.2677
eval/samples_per_second,3.099
eval/steps_per_second,0.403
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.42034
train/learning_rate,1e-05
train/loss,1.7414


[34m[1mwandb[0m: Agent Starting Run: yphdzao2 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 4
[34m[1mwandb[0m: 	learning_rate: 3.268808094709937e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 4
[34m[1mwandb[0m: 	warmup_ratio: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 56
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.8313,1.741461


Training completed! Model saved to /content/drive/MyDrive/ai_detection_model/final_model


0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▃▄▆▇▇█
train/global_step,▁▃▄▆▇▇█
train/grad_norm,▃█▃▂▁
train/learning_rate,█▇▄▂▁
train/loss,█▄▂▁▁

0,1
eval/loss,1.74146
eval/runtime,32.2952
eval/samples_per_second,3.096
eval/steps_per_second,0.403
total_flos,1.190055113539584e+16
train/epoch,0.99556
train/global_step,56.0
train/grad_norm,0.70405
train/learning_rate,0.0
train/loss,1.8313


[34m[1mwandb[0m: Agent Starting Run: uz3g82bd with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	gradient_accumulation_steps: 2
[34m[1mwandb[0m: 	learning_rate: 0.0003850601694885458
[34m[1mwandb[0m: 	per_device_train_batch_size: 2
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


Downloading data from Github...
File ieee-chatgpt-fusion.jsonl already exists. Skipping...
File ieee-chatgpt-generation.jsonl already exists. Skipping...
File ieee-init.jsonl already exists. Skipping...
File ieee-chatgpt-polish.jsonl already exists. Skipping...
File ieee-chatgpt-fusion.xlsx already exists. Skipping...
File ieee-chatgpt-generation.xlsx already exists. Skipping...
File ieee-init.xlsx already exists. Skipping...
File ieee-chatgpt-polish.xlsx already exists. Skipping...
All data downloaded!
Loading model...
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded success

Map (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 900 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 225
 "-____-"     Number of trainable parameters = 13,631,488


Step,Training Loss,Validation Loss
50,1.7522,1.686088
100,1.6252,1.665632


Step,Training Loss,Validation Loss
50,1.7522,1.686088
100,1.6252,1.665632
150,1.674,1.646945


# Get final result

Before running the main function, hyper parameter in trainer.train() should be changed.

In [None]:
def main(data_dir):
    """
    Main training and evaluation pipeline.
    Args:
       data_dir (str): Path to data directory.
    """
    try:
        set_seeds()
        print("Starting training pipeline...")

        # Initialize and run trainer
        trainer = AIGenerationDetector(data_dir=data_dir)  # set data directory path
        trainer.download_data()
        trainer.setup_model()
        trainer.prepare_datasets(max_samples=5000)
        trainer.create_test_dataset()
        trainer.train()
        trainer.create_submission()

        print("Training pipeline completed successfully!")

    except Exception as e:
        print(f"Fatal error in main: {str(e)}")
        raise
    finally:
        clear_memory()
        print_gpu_utilization()
        print_detailed_gpu_info()

# Main execution block
if __name__ == "__main__":
    data_dir = "/content/drive/MyDrive/ai_dataset"  # Set the Google Drive path to save data

    # Run the final training using main()
    main(data_dir)