install library

In [1]:
%pip install transformers
%pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

### Prune, calculate perplexity, and restore pruned layers

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
import pickle
import wandb
from transformers import AdamW, GPT2LMHeadModel, GPT2Tokenizer
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import torch
import torch.nn.utils.prune as prune
from datasets import load_dataset
import numpy as np

# Path to save the sampled dataset
dataset_path = "/content/drive/MyDrive/HPML/Course_Project/sampled_dataset.pkl"

# Load or create the dataset
if os.path.exists(dataset_path):
    print("Loading existing sampled dataset...")
    with open(dataset_path, "rb") as f:
        full_subset = pickle.load(f)
else:
    print("Creating and saving a new sampled dataset...")
    dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")
    subset_ratio = 0.0025
    subset_size = int(len(dataset) * subset_ratio)
    full_subset = [dataset[i] for i in range(subset_size)]

    # Save the sampled dataset
    with open(dataset_path, "wb") as f:
        pickle.dump(full_subset, f)

from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

full_subset = CustomDataset(full_subset)

# Split into train and test sets
train_size = int(0.8 * len(full_subset))
test_size = len(full_subset) - train_size
train_data, test_data = random_split(full_subset, [train_size, test_size])

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Path to save the fine-tuned model
fine_tuned_model_path = "/content/drive/MyDrive/HPML/Course_Project/fine_tuned_model"

# Fine-tune the original model if not already saved
if not os.path.exists(fine_tuned_model_path):
    print("Fine-tuning original model...")
    model = GPT2LMHeadModel.from_pretrained("gpt2")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.train()

    train_dataloader = DataLoader(train_data, batch_size=4, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    for epoch in range(5):  # Fine-tune for a few epochs
        epoch_loss = 0
        for batch in tqdm(train_dataloader):
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt")
            input_ids = inputs['input_ids']
            attention_mask = inputs['attention_mask']

            if input_ids.size(0) == 0 or input_ids.size(1) == 0:
                continue

            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}: Loss {epoch_loss / len(train_dataloader)}")

    model.save_pretrained(fine_tuned_model_path)
    tokenizer.save_pretrained(fine_tuned_model_path)
else:
    print("Fine-tuned model already exists. Skipping fine-tuning...")

# Define the W&B sweep configuration
sweep_config = {
    "method": "random",
    "metric": {
        "name": "perplexity",
        "goal": "minimize"
    },
    "parameters": {
        "first_ratio": {"values": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9]},
        "middle_ratio": {"values": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9]},
        "last_ratio": {"values": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9]},
    }
}

def prune_mlp_layers(model, layer_count, ratios):
    first_end = layer_count // 3
    middle_start = first_end
    middle_end = 2 * layer_count // 3
    last_start = middle_end

    # Prune the first third of the layers
    for i in range(0, first_end):
        if ratios[0] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[0])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[0])

    # Prune the middle third of the layers
    for i in range(middle_start, middle_end):
        if ratios[1] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[1])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[1])

    # Prune the last third of the layers
    for i in range(last_start, layer_count):
        if ratios[2] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[2])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[2])

    # Remove pruning masks to finalize pruning
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            try:
                prune.remove(module, 'weight')
            except ValueError:
                pass

def evaluate_model(model, tokenizer, dataset, device):
    model.eval()
    dataloader = DataLoader(dataset, batch_size=8, shuffle=False)
    total_loss = 0.0
    total_tokens = 0
    total_correct = 0

    with torch.no_grad():
        for batch in dataloader:
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt")
            input_ids = inputs['input_ids'].to(device)
            attention_mask = inputs['attention_mask'].to(device)

            if input_ids.size(0) == 0 or input_ids.size(1) == 0:
                continue

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss
            total_loss += loss.item() * input_ids.numel()

            predictions = torch.argmax(outputs.logits, dim=-1)
            correct = (predictions == input_ids).float().sum().item()
            total_correct += correct
            total_tokens += input_ids.numel()

    perplexity = np.exp(total_loss / total_tokens) if total_tokens > 0 else float('inf')
    accuracy = total_correct / total_tokens if total_tokens > 0 else 0
    return perplexity, accuracy

def sweep_function():
    wandb.init()
    config = wandb.config

    # Load the fine-tuned model
    model = GPT2LMHeadModel.from_pretrained(fine_tuned_model_path)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Prune the MLP layers
    layer_count = len(model.transformer.h)
    prune_mlp_layers(model, layer_count, [config.first_ratio, config.middle_ratio, config.last_ratio])

    # Fine-tune the pruned model for a short time
    model.train()
    train_dataloader = DataLoader(train_data, batch_size=4, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    for epoch in range(1):
        epoch_loss = 0
        for batch in tqdm(train_dataloader, desc="Re-training after pruning"):
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt")
            input_ids = inputs['input_ids']
            attention_mask = inputs['attention_mask']

            if input_ids.size(0) == 0 or input_ids.size(1) == 0:
                continue

            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        wandb.log({"train_loss": epoch_loss / len(train_dataloader)})

    if len(test_data) > 0:
        first_test_sample = test_data[0]['text']
        print("First test sample text:\n", first_test_sample)
        # Tokenize it
        sample_inputs = tokenizer(first_test_sample, return_tensors="pt")
        print("First test sample tokenized input_ids:", sample_inputs['input_ids'])
        print("Decoded text from tokenized input:", tokenizer.decode(sample_inputs['input_ids'][0]))

    # Evaluate on test data
    perplexity, accuracy = evaluate_model(model, tokenizer, test_data, device)
    wandb.log({"perplexity": perplexity, "accuracy": accuracy})
    print(f"Test Perplexity: {perplexity:.4f}, Test Accuracy: {accuracy:.4f}")


# Initialize and run the sweep
sweep_id = wandb.sweep(sweep_config, project="gpt2-pruning-optimization")
wandb.agent(sweep_id, function=sweep_function)


Loading existing sampled dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fine-tuned model already exists. Skipping fine-tuning...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: wd1vpw3g
Sweep URL: https://wandb.ai/tokamak01823-new-york-university/gpt2-pruning-optimization/sweeps/wd1vpw3g


[34m[1mwandb[0m: Agent Starting Run: oayqa2js with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0.3
[34m[1mwandb[0m: Currently logged in as: [33mtokamak01823[0m ([33mtokamak01823-new-york-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


Re-training after pruning: 100%|██████████| 901/901 [03:28<00:00,  4.32it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72608
perplexity,1.07686
train_loss,0.19493


[34m[1mwandb[0m: Agent Starting Run: 1w8dk2u7 with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72709
perplexity,1.99054
train_loss,1.28543


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7qreyfeh with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.47it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72643
perplexity,1.59102
train_loss,0.96533


[34m[1mwandb[0m: Agent Starting Run: uvdky0mm with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.48it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72674
perplexity,1.85562
train_loss,1.62847


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kk30jav7 with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.48it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7262
perplexity,1.32948
train_loss,0.59526


[34m[1mwandb[0m: Agent Starting Run: o3hgtkx6 with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72611
perplexity,1.11507
train_loss,0.27971


[34m[1mwandb[0m: Agent Starting Run: 5d9kjatg with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72628
perplexity,1.30354
train_loss,0.53133


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: luz78e9k with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72624
perplexity,1.27949
train_loss,0.50768


[34m[1mwandb[0m: Agent Starting Run: ga28qphs with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72603
perplexity,1.0726
train_loss,0.19242


[34m[1mwandb[0m: Agent Starting Run: eewkf3cu with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.47it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72612
perplexity,1.17725
train_loss,0.39768


[34m[1mwandb[0m: Agent Starting Run: ieszm7q8 with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72627
perplexity,1.39755
train_loss,0.64351


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mr8t92mm with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72629
perplexity,1.39534
train_loss,0.63946


[34m[1mwandb[0m: Agent Starting Run: m74rpr8n with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.9
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72685
perplexity,1.71587
train_loss,0.89543


[34m[1mwandb[0m: Agent Starting Run: hy7pfak9 with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.46it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72609
perplexity,1.07421
train_loss,0.19377


[34m[1mwandb[0m: Agent Starting Run: fhf09gpg with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72648
perplexity,1.50135
train_loss,0.86197


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7td52nyl with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.9
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7269
perplexity,1.725
train_loss,0.9029


[34m[1mwandb[0m: Agent Starting Run: dk533lx5 with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72604
perplexity,1.47035
train_loss,0.80513


[34m[1mwandb[0m: Agent Starting Run: gnedx50p with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.46it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72903
perplexity,3.17183
train_loss,2.04383


[34m[1mwandb[0m: Agent Starting Run: 2i9jg4i9 with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7264
perplexity,1.61833
train_loss,0.8591


[34m[1mwandb[0m: Agent Starting Run: 6hqioj4m with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72632
perplexity,1.61691
train_loss,0.88298


[34m[1mwandb[0m: Agent Starting Run: 7nhqiv8a with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72707
perplexity,2.07248
train_loss,1.71841


[34m[1mwandb[0m: Agent Starting Run: wl1ikdlc with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72668
perplexity,1.8784
train_loss,1.64902


[34m[1mwandb[0m: Agent Starting Run: s7vlee2v with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72683
perplexity,2.01001
train_loss,1.62781


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: olubkwn5 with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:25<00:00,  4.39it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72615
perplexity,1.21988
train_loss,0.47941


[34m[1mwandb[0m: Agent Starting Run: jt6e5csn with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72741
perplexity,2.42647
train_loss,1.82052


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cwaxj9b5 with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7261
perplexity,1.09255
train_loss,0.25689


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 66rh8ciz with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72628
perplexity,1.35642
train_loss,0.62016


[34m[1mwandb[0m: Agent Starting Run: hq9lq566 with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72636
perplexity,1.46536
train_loss,0.80741


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: np6fzq6d with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:25<00:00,  4.38it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72593
perplexity,1.561
train_loss,0.85826


[34m[1mwandb[0m: Agent Starting Run: ucpxa7zo with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.46it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.726
perplexity,1.10578
train_loss,0.26995


[34m[1mwandb[0m: Agent Starting Run: 5dunagyo with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0.5


Re-training after pruning: 100%|██████████| 901/901 [03:25<00:00,  4.39it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72622
perplexity,1.24541
train_loss,0.60413


[34m[1mwandb[0m: Agent Starting Run: a4s3w0wo with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7261
perplexity,1.11864
train_loss,0.29439


[34m[1mwandb[0m: Agent Starting Run: 1p9gke0y with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72594
perplexity,1.56431
train_loss,0.85823


[34m[1mwandb[0m: Agent Starting Run: f95uacyz with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72619
perplexity,1.43004
train_loss,0.82429


[34m[1mwandb[0m: Agent Starting Run: xr4g2v3d with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72623
perplexity,1.2794
train_loss,0.5115


[34m[1mwandb[0m: Agent Starting Run: bqx04gdq with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:25<00:00,  4.39it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72624
perplexity,1.28898
train_loss,0.52313


[34m[1mwandb[0m: Agent Starting Run: s34jn3eq with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72609
perplexity,1.09406
train_loss,0.25523


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7sqm7eq3 with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72671
perplexity,1.86981
train_loss,1.59214


[34m[1mwandb[0m: Agent Starting Run: lw2flhlk with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:26<00:00,  4.36it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72605
perplexity,1.07077
train_loss,0.17852


[34m[1mwandb[0m: Agent Starting Run: vo3xi334 with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.1
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:26<00:00,  4.36it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72609
perplexity,1.07236
train_loss,0.19265


[34m[1mwandb[0m: Agent Starting Run: qh4co3rn with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:26<00:00,  4.35it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7263
perplexity,1.33314
train_loss,0.59564


[34m[1mwandb[0m: Agent Starting Run: y2vckzlb with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:25<00:00,  4.39it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72753
perplexity,2.57286
train_loss,1.86522


[34m[1mwandb[0m: Agent Starting Run: 2ppffdcu with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0


Re-training after pruning: 100%|██████████| 901/901 [03:26<00:00,  4.36it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72604
perplexity,1.08066
train_loss,0.20561


[34m[1mwandb[0m: Agent Starting Run: vzfs3hl5 with config:
[34m[1mwandb[0m: 	first_ratio: 0.5
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7263
perplexity,1.35961
train_loss,0.62593


[34m[1mwandb[0m: Agent Starting Run: ssa4232m with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112770011115774, max=1.0…

Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7257
perplexity,1.15127
train_loss,0.4634


[34m[1mwandb[0m: Agent Starting Run: 81afava3 with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72695
perplexity,1.96168
train_loss,1.10662


[34m[1mwandb[0m: Agent Starting Run: docx3m2m with config:
[34m[1mwandb[0m: 	first_ratio: 0.1
[34m[1mwandb[0m: 	last_ratio: 0.5
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.42it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7262
perplexity,1.33064
train_loss,0.59193


[34m[1mwandb[0m: Agent Starting Run: g6ig7nsl with config:
[34m[1mwandb[0m: 	first_ratio: 0
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0.7


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72601
perplexity,1.23353
train_loss,0.47749


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bqli596c with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.1


Re-training after pruning: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72743
perplexity,2.30458
train_loss,1.77387


[34m[1mwandb[0m: Agent Starting Run: iqqcas3c with config:
[34m[1mwandb[0m: 	first_ratio: 0.7
[34m[1mwandb[0m: 	last_ratio: 0
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:22<00:00,  4.44it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.72672
perplexity,1.80609
train_loss,1.15838


[34m[1mwandb[0m: Agent Starting Run: i53kisf0 with config:
[34m[1mwandb[0m: 	first_ratio: 0.9
[34m[1mwandb[0m: 	last_ratio: 0.7
[34m[1mwandb[0m: 	middle_ratio: 0.9


Re-training after pruning: 100%|██████████| 901/901 [03:21<00:00,  4.47it/s]


First test sample text:
  Hurricanes are an ever @-@ present threat during the summer and fall season , which puts Galveston in Coastal Windstorm Area . Galveston Island and the Bolivar Peninsula are generally at the greatest risk among the communities near the Galveston Bay . However , though the island and peninsula provide some shielding , the bay shoreline still faces significant danger from storm surge . 

First test sample tokenized input_ids: tensor([[39419,   389,   281,  1683,  2488,    12,    31,  1944,  2372,  1141,
           262,  3931,   290,  2121,  1622,   837,   543,  7584,  5027,  4223,
           261,   287, 43513,  3086, 12135,  9498,   764,  5027,  4223,   261,
          5451,   290,   262, 10797,   452,   283, 24078,   389,  4143,   379,
           262,  6000,  2526,  1871,   262,  5348,  1474,   262,  5027,  4223,
           261,  4696,   764,  2102,   837,   996,   262,  7022,   290, 28080,
          2148,   617, 43539,   837,   262, 15489, 15191,  1370,   991, 

VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁
perplexity,▁
train_loss,▁

0,1
accuracy,0.7317
perplexity,3.55781
train_loss,2.14224


[34m[1mwandb[0m: Agent Starting Run: yanqd423 with config:
[34m[1mwandb[0m: 	first_ratio: 0.3
[34m[1mwandb[0m: 	last_ratio: 0.3
[34m[1mwandb[0m: 	middle_ratio: 0.3


Re-training after pruning:  50%|█████     | 454/901 [01:43<01:38,  4.54it/s]

In [5]:
import os
import pickle
from transformers import AdamW, GPT2LMHeadModel, GPT2Tokenizer
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import torch
import torch.nn.utils.prune as prune
from datasets import load_dataset
import numpy as np
from torch.utils.data import Dataset

# Constants
FIXED_FIRST_RATIO = 0.1

# Path to save the sampled dataset
dataset_path = "/content/drive/MyDrive/HPML/Course_Project/sampled_dataset.pkl"

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def evaluate_model(model, tokenizer, dataset, device):
    model.eval()
    dataloader = DataLoader(dataset, batch_size=8, shuffle=False)
    total_loss = 0.0
    total_tokens = 0

    with torch.no_grad():
        for batch in dataloader:
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt")
            input_ids = inputs['input_ids'].to(device)
            attention_mask = inputs['attention_mask'].to(device)

            if input_ids.size(0) == 0 or input_ids.size(1) == 0:
                continue

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss
            total_loss += loss.item() * input_ids.numel()
            total_tokens += input_ids.numel()

    perplexity = np.exp(total_loss / total_tokens) if total_tokens > 0 else float('inf')
    return perplexity

def prune_mlp_layers(model, layer_count, ratios):
    first_end = layer_count // 3
    middle_start = first_end
    middle_end = 2 * layer_count // 3
    last_start = middle_end

    for i in range(0, first_end):
        if ratios[0] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[0])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[0])

    for i in range(middle_start, middle_end):
        if ratios[1] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[1])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[1])

    for i in range(last_start, layer_count):
        if ratios[2] > 0:
            prune.l1_unstructured(model.transformer.h[i].mlp.c_fc, name="weight", amount=ratios[2])
            prune.l1_unstructured(model.transformer.h[i].mlp.c_proj, name="weight", amount=ratios[2])

    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            try:
                prune.remove(module, 'weight')
            except ValueError:
                pass

def fine_tune_model(model, train_data, tokenizer, device, epochs=3):
    model.train()
    dataloader = DataLoader(train_data, batch_size=4, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    for epoch in range(epochs):
        epoch_loss = 0
        for batch in tqdm(dataloader, desc=f"Fine-tuning Epoch {epoch + 1}/{epochs}"):
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt")
            input_ids = inputs['input_ids']
            attention_mask = inputs['attention_mask']

            if input_ids.size(0) == 0 or input_ids.size(1) == 0:
                continue

            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs} Loss: {epoch_loss / len(dataloader):.4f}")

def test_ratios(ratio_combinations, num_trials):
    results = {}

    for ratios in ratio_combinations:
        print(f"Testing ratios: {ratios}")
        perplexities = []

        for trial in range(num_trials):
            print(f"Trial {trial + 1} for ratios {ratios}")

            # Load the fine-tuned model
            model = GPT2LMHeadModel.from_pretrained(fine_tuned_model_path)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = model.to(device)

            # Prune the MLP layers
            layer_count = len(model.transformer.h)
            prune_mlp_layers(model, layer_count, ratios)

            # Fine-tune the model
            fine_tune_model(model, train_data, tokenizer, device, epochs=3)

            # Evaluate on test data
            perplexity = evaluate_model(model, tokenizer, test_data, device)
            perplexities.append(perplexity)

        # Calculate average perplexity
        avg_perplexity = sum(perplexities) / len(perplexities)
        results[tuple(ratios)] = avg_perplexity
        print(f"Average perplexity for ratios {ratios}: {avg_perplexity:.4f}")

    return results

# Main Execution
if __name__ == "__main__":
    # Define ratio combinations
    ratio_combinations = [
        [0.1, 0.2, 0.3],
        [0.1, 0.18, 0.32],
        [0.1, 0.3, 0.5],
        [0.1, 0.237, 0.563]
    ]

    # Number of trials per combination
    num_trials = 3

    # Load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    tokenizer.pad_token = tokenizer.eos_token

    # Load dataset
    if os.path.exists(dataset_path):
        print("Loading existing sampled dataset...")
        with open(dataset_path, "rb") as f:
            full_subset = pickle.load(f)
    else:
        print("Creating and saving a new sampled dataset...")
        dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")
        subset_ratio = 0.0025
        subset_size = int(len(dataset) * subset_ratio)
        full_subset = [dataset[i] for i in range(subset_size)]

        # Save the sampled dataset
        with open(dataset_path, "wb") as f:
            pickle.dump(full_subset, f)

    full_subset = CustomDataset(full_subset)

    # Split into train and test sets
    train_size = int(0.8 * len(full_subset))
    test_size = len(full_subset) - train_size
    train_data, test_data = random_split(full_subset, [train_size, test_size])

    # Fine-tuned model path
    fine_tuned_model_path = "/content/drive/MyDrive/HPML/Course_Project/fine_tuned_model"

    # Test ratio combinations
    results = test_ratios(ratio_combinations, num_trials)

    # Print final results
    print("\nFinal Results:")
    for ratios, avg_perplexity in results.items():
        print(f"Ratios {ratios}: Average Perplexity = {avg_perplexity:.4f}")


Loading existing sampled dataset...
Testing ratios: [0.1, 0.2, 0.3]
Trial 1 for ratios [0.1, 0.2, 0.3]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:12<00:00,  4.68it/s]


Epoch 1/3 Loss: 0.2044


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:20<00:00,  4.50it/s]


Epoch 2/3 Loss: 0.1742


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


Epoch 3/3 Loss: 0.1623
Trial 2 for ratios [0.1, 0.2, 0.3]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


Epoch 1/3 Loss: 0.2048


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:22<00:00,  4.46it/s]


Epoch 2/3 Loss: 0.1760


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:22<00:00,  4.46it/s]


Epoch 3/3 Loss: 0.1589
Trial 3 for ratios [0.1, 0.2, 0.3]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


Epoch 1/3 Loss: 0.2036


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


Epoch 2/3 Loss: 0.1738


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:21<00:00,  4.48it/s]


Epoch 3/3 Loss: 0.1607
Average perplexity for ratios [0.1, 0.2, 0.3]: 1.0972
Testing ratios: [0.1, 0.18, 0.32]
Trial 1 for ratios [0.1, 0.18, 0.32]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


Epoch 1/3 Loss: 0.2070


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:19<00:00,  4.52it/s]


Epoch 2/3 Loss: 0.1795


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:19<00:00,  4.52it/s]


Epoch 3/3 Loss: 0.1667
Trial 2 for ratios [0.1, 0.18, 0.32]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


Epoch 1/3 Loss: 0.2056


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:23<00:00,  4.44it/s]


Epoch 2/3 Loss: 0.1805


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


Epoch 3/3 Loss: 0.1576
Trial 3 for ratios [0.1, 0.18, 0.32]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:22<00:00,  4.45it/s]


Epoch 1/3 Loss: 0.2033


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


Epoch 2/3 Loss: 0.1784


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:23<00:00,  4.44it/s]


Epoch 3/3 Loss: 0.1577
Average perplexity for ratios [0.1, 0.18, 0.32]: 1.0983
Testing ratios: [0.1, 0.3, 0.5]
Trial 1 for ratios [0.1, 0.3, 0.5]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:21<00:00,  4.46it/s]


Epoch 1/3 Loss: 0.3005


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:21<00:00,  4.46it/s]


Epoch 2/3 Loss: 0.2381


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


Epoch 3/3 Loss: 0.2096
Trial 2 for ratios [0.1, 0.3, 0.5]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:24<00:00,  4.40it/s]


Epoch 1/3 Loss: 0.2960


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:20<00:00,  4.50it/s]


Epoch 2/3 Loss: 0.2401


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:22<00:00,  4.46it/s]


Epoch 3/3 Loss: 0.2084
Trial 3 for ratios [0.1, 0.3, 0.5]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:22<00:00,  4.44it/s]


Epoch 1/3 Loss: 0.2987


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:20<00:00,  4.50it/s]


Epoch 2/3 Loss: 0.2354


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:21<00:00,  4.47it/s]


Epoch 3/3 Loss: 0.2052
Average perplexity for ratios [0.1, 0.3, 0.5]: 1.1427
Testing ratios: [0.1, 0.237, 0.563]
Trial 1 for ratios [0.1, 0.237, 0.563]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:19<00:00,  4.52it/s]


Epoch 1/3 Loss: 0.3441


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:22<00:00,  4.46it/s]


Epoch 2/3 Loss: 0.2638


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:19<00:00,  4.51it/s]


Epoch 3/3 Loss: 0.2328
Trial 2 for ratios [0.1, 0.237, 0.563]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:21<00:00,  4.46it/s]


Epoch 1/3 Loss: 0.3411


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:24<00:00,  4.41it/s]


Epoch 2/3 Loss: 0.2630


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:20<00:00,  4.50it/s]


Epoch 3/3 Loss: 0.2347
Trial 3 for ratios [0.1, 0.237, 0.563]


Fine-tuning Epoch 1/3: 100%|██████████| 901/901 [03:23<00:00,  4.43it/s]


Epoch 1/3 Loss: 0.3371


Fine-tuning Epoch 2/3: 100%|██████████| 901/901 [03:21<00:00,  4.48it/s]


Epoch 2/3 Loss: 0.2647


Fine-tuning Epoch 3/3: 100%|██████████| 901/901 [03:20<00:00,  4.49it/s]


Epoch 3/3 Loss: 0.2301
Average perplexity for ratios [0.1, 0.237, 0.563]: 1.1680

Final Results:
Ratios (0.1, 0.2, 0.3): Average Perplexity = 1.0972
Ratios (0.1, 0.18, 0.32): Average Perplexity = 1.0983
Ratios (0.1, 0.3, 0.5): Average Perplexity = 1.1427
Ratios (0.1, 0.237, 0.563): Average Perplexity = 1.1680
