In [10]:
!pip install --quiet datasets
!pip install --quiet evaluate
!pip install --quiet accelerate
!pip install --quiet bitsandbytes

In [None]:
import numpy as np
import torch
import evaluate
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
import accelerate
from sklearn.metrics import accuracy_score
import time
from torch.utils.data import DataLoader
from collections import defaultdict
import pandas as pd
import random

AttributeError: module 'pyarrow.lib' has no attribute 'ListViewType'

In [None]:
model_name = "SamLowe/roberta-base-go_emotions"

In [None]:
def make_model():
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True,
                                                           device_map=device)
  return model

In [None]:
# Load a pretrained model and tokenizer
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)
model = make_model()
# tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")

In [None]:
print(model)

In [None]:
def print_model_structure(model):
    # Number of layers and heads from the config
    num_layers = model.config.num_hidden_layers
    num_heads = model.config.num_attention_heads
    print(f"Number of layers: {num_layers}")
    print(f"Number of attention heads per layer: {num_heads}")

    # Check the encoder structure
    num_layers_in_encoder = len(model.roberta.encoder.layer)
    print(f"Number of layers in encoder: {num_layers_in_encoder}")

    # Loop through each layer and print the number of heads
    for i, layer in enumerate(model.roberta.encoder.layer):
        print(f"Layer {i+1}: {num_heads} attention heads")

# Example usage with a loaded model
# Make sure to replace 'model' with your actual model name if it's different
print_model_structure(model)


In [None]:
import datasets
split_name = "validation"

dataset_name, dataset_config_name = "go_emotions", "simplified"
dataset_dict = datasets.load_dataset(dataset_name, dataset_config_name)
dataset_dict[split_name][0]

In [None]:
print(len(dataset_dict))
dataset_dict['validation'][0:10]

In [None]:
from torch.nn.utils.rnn import pad_sequence

def pad_labels(labels, max_length):
    """Pad the list of labels to a fixed length."""
    padded_labels = []
    for label_list in labels:
        # Create a zero-filled tensor with max_length
        padded = np.zeros(max_length, dtype=int)
        padded[:len(label_list)] = label_list  # Fill in the actual labels
        padded_labels.append(padded)
    return np.array(padded_labels)

def preprocess_function(examples):
    tokenized_inputs = tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    max_label_length = max(len(label_list) for label_list in examples['labels'])
    padded_labels = pad_labels(examples['labels'], max_label_length)

    tokenized_inputs['label'] = padded_labels.tolist()  # Convert numpy array to list
    return tokenized_inputs

encoded_dataset = dataset_dict.map(preprocess_function, batched=True)

print(encoded_dataset['validation'][0])

In [None]:
def collate_fn(batch):
    input_ids = pad_sequence([torch.tensor(item['input_ids']) for item in batch], batch_first=True)
    attention_mask = pad_sequence([torch.tensor(item['attention_mask']) for item in batch], batch_first=True)
    labels = pad_sequence([torch.tensor(item['label']) for item in batch], batch_first=True, padding_value=0)

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'label': labels
    }

In [None]:
# Use DataLoader to create batches
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
print(encoded_dataset['validation'])

In [None]:
# Modify model based on chromosome
def modify_model(model, chromosome):
    num_heads = model.config.num_attention_heads
    num_blocks = model.config.num_hidden_layers
    print(num_blocks)
    # num_ffn = model.config.num_hidden_layers

    # Disable attention heads
    heads_to_prune = defaultdict(list)
    for i, gene in enumerate(chromosome):
        if gene == 0:
          block_num = i//num_heads
          head_num = i%num_heads
          heads_to_prune[block_num].append(head_num)

    print(heads_to_prune)
    if heads_to_prune:

        for block_num in heads_to_prune:
            # model.transformer.encoder.block[block_num].layer[0].SelfAttention.prune_heads(heads_to_prune[block_num])
            model.roberta.encoder.layer[block_num].attention.prune_heads(heads_to_prune[block_num])
            # if(bloack_num!=0):
            # model.transformer.decoder.block[block_num].layer[0].SelfAttention.prune_heads(heads_to_prune[block_num])
            # model.transformer.decoder.block[block_num].layer[1].EncDecAttention.prune_heads(heads_to_prune[block_num])

            # print(block_num, model.transformer.encoder.block[block_num].layer[0].SelfAttention.n_heads)
            # model.prune_heads(heads_to_prune)

            # model.transformer.encoder.block[block_num].layer[0].SelfAttention.q = prune_linear_layer(model.transformer.encoder.block[block_num].layer[0].SelfAttention.q, index)
            # model.transformer.encoder.block[block_num].layer[0].SelfAttention.k = prune_linear_layer(model.transformer.encoder.block[block_num].layer[0].SelfAttention.k, index)
            # model.transformer.encoder.block[block_num].layer[0].SelfAttention.v = prune_linear_layer(model.transformer.encoder.block[block_num].layer[0].SelfAttention.v, index)
            # model.transformer.encoder.block[block_num].layer[0].SelfAttention.o = prune_linear_layer(model.transformer.encoder.block[block_num].layer[0].SelfAttention.o, index, dim=1)

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import hamming_loss, accuracy_score

def evaluate_fitness(chromosome, model, encoded_dataset, metric='accuracy'):
    # Modify the model according to the chromosome
    if len(chromosome):
        model = make_model()
        modify_model(model, chromosome)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    start_time = time.time()
    predictions = []
    labels = []

    dataloader = DataLoader(encoded_dataset['test'], batch_size=16, collate_fn=collate_fn)

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)

            # Get predictions
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)
            predictions.extend(preds.cpu().numpy())
            labels.extend(batch['label'].cpu().numpy().tolist())  # Convert to list here

    # Convert labels to binary format using MultiLabelBinarizer
    mlb = MultiLabelBinarizer()
    labels_binary = mlb.fit_transform(labels)  # Convert labels into binary format

    # Create binary predictions
    predictions_binary = np.zeros((len(predictions), len(mlb.classes_)), dtype=int)
    for idx, pred in enumerate(predictions):
        predictions_binary[idx, pred] = 1  # Set the predicted class to 1

    # Calculate Hamming Loss
    # fitness = 1 - hamming_loss(labels_binary, predictions_binary)  # Higher is better

    accuracy = accuracy_score(labels_binary, predictions_binary)

    # Evaluate the model
    print("For the chromosome", chromosome)
    print(f"Validation Hamming Loss: {accuracy:.4f}")
    end_time = time.time()
    print("Execution time = ", end_time - start_time)
    print()

    return fitness


In [None]:
# without modifying the model
print(evaluate_fitness([], model, encoded_dataset))

NameError: name 'model' is not defined

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


In [None]:
def create_random_binary_list(length, percentage_of_zeros):
    # Calculate the number of zeros and ones based on the percentage
    num_zeros = int(length * (percentage_of_zeros / 100))
    num_ones = length - num_zeros

    # Create the list with the required number of 0s and 1s
    binary_list = [0] * num_zeros + [1] * num_ones

    # Shuffle the list to randomize the order
    random.shuffle(binary_list)

    return binary_list

def initialize_chromosome(num_genes):
  # initialize chromosome with given sparsity percentage
  return create_random_binary_list(num_genes, 30)

In [None]:
# checking to see if decoder is there
print(model.config.is_decoder)

False


In [None]:
model  = make_model()
chromosome = initialize_chromosome(model.config.num_attention_heads*model.config.num_hidden_layers)
print("Chromosome:", chromosome)
fitness = evaluate_fitness(chromosome, model, encoded_dataset)
print("Fitness score (accuracy):", fitness)


Chromosome: [0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1]
12
defaultdict(<class 'list'>, {0: [0, 3, 4, 6, 8], 1: [2, 5, 6], 2: [2, 8], 3: [4, 7], 4: [3, 11], 5: [1, 3, 7, 9, 10, 11], 6: [0, 9, 10, 11], 7: [4, 5, 7, 8], 8: [1, 4, 5, 6, 8], 9: [0, 10], 10: [0, 3, 4, 7, 8, 11], 11: [6, 7]})
For the chromosome [0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0

In [None]:
def find_size(model):
  total_size_in_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
  total_size_in_megabytes = total_size_in_bytes / (1024 ** 2)
  print(f"Model size: {total_size_in_megabytes:.2f} MB")

find_size(model)

Model size: 475.57 MB


In [None]:
# m  = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, device_map=device)
# print(find_size(m), find_size(model))

In [None]:
POPN_SIZE = 10
crossover_rate = 0.7
mutation_rate = 0.1

In [None]:
def initialize_population(chromosome_length):
  # initialize random population
  population = []
  for _ in range(POPN_SIZE):
    chromosome = initialize_chromosome(chromosome_length)
    population.append(chromosome)
  return population

In [None]:
# Selection (Tournament Selection)
'''
def select_parents(population, fitness_scores):
    parents = []
    for _ in range(POPN_SIZE):
        tournament = np.random.choice(POPN_SIZE, 2)
        print("tournament = ",tournament)
        winner = tournament[np.argmax(fitness_scores[tournament])]
        parents.append(population[winner])
    return np.array(parents)
    '''

In [None]:
# roulette wheel selection
def select_parents(population, fitness_scores, num_parents):
    # Normalize fitness scores to create a probability distribution
    total_fitness = np.sum(fitness_scores)
    probabilities = fitness_scores / total_fitness

    # Select parents based on their fitness proportion (roulette wheel selection)
    # selected_parents = np.random.choice(population, size=num_parents, p=probabilities, replace=True)
    selected_parents = random.choices(population, weights=probabilities, k=num_parents)


    return np.array(selected_parents)

In [None]:
# Crossover (Single-point crossover)
def crossover(parent1, parent2):
    if np.random.rand() < crossover_rate:
        point = np.random.randint(1, len(parent1) - 1)
        child1 = np.concatenate([parent1[:point], parent2[point:]])
        child2 = np.concatenate([parent2[:point], parent1[point:]])
    else:
        child1, child2 = parent1, parent2
    return child1, child2

In [None]:
# Mutation (Flip bit mutation)
def mutate(chromosome):
    for i in range(len(chromosome)):
        if np.random.rand() < mutation_rate:
            chromosome[i] = 1 - chromosome[i]
    return chromosome

In [None]:
def elitism_and_selection(population, fitness_scores, num_elites, num_parents):
    # Elitism: Keep the top num_elites individuals
    elite_indices = np.argsort(fitness_scores)[-num_elites:]  # Get indices of top individuals
    elites = [population[i] for i in elite_indices]

    # Perform roulette wheel selection for the rest of the parents
    remaining_population = np.delete(population, elite_indices, axis=0)
    remaining_fitness_scores = np.delete(fitness_scores, elite_indices)

    num_to_select = num_parents - num_elites
    selected_parents = select_parents(remaining_population, remaining_fitness_scores, num_to_select)

    # Combine elites and selected parents
    next_generation = np.vstack((elites, selected_parents))

    return next_generation

In [None]:
'''def genetic_algorithm(model, num_generations, desired_sparsity):
  population = initialize_population(model.config.num_heads*model.config.num_layers)
  for generation in range(num_generations):
      fitness_scores = np.array([evaluate_fitness(chrom, model, encoded_dataset, metric) for chrom in population])
      parents = select_parents(population, fitness_scores)
      new_population = []
      for i in range(0, POPN_SIZE, 2):
          parent1, parent2 = parents[i], parents[i + 1]
          child1, child2 = crossover(parent1, parent2)
          child1 = mutate(child1)
          child2 = mutate(child2)
          new_population.extend([child1, child2])
      population = np.array(new_population)

      # Check for desired sparsity level
      sparsity_levels = np.mean(population == 0, axis=1)
      print(sparsity_levels)
      if np.any(sparsity_levels >= desired_sparsity):
          best_chromosome = population[np.argmax(sparsity_levels)]
          break
  return best_chromosome'''

In [None]:
def genetic_algorithm(model, num_generations, desired_sparsity):
  population = initialize_population(model.config.num_attention_heads*model.config.num_hidden_layers)  # Initialize the population
  for generation in range(num_generations):
      fitness_scores = np.array([evaluate_fitness(chrom, model, encoded_dataset) for chrom in population])
      best_chromosome = population[np.argmax(fitness_scores)]
      print("new fitness scores:", fitness_scores)
      print(f"best chromosome in generation {generation} is {best_chromosome} with accuracy {fitness_scores[np.argmax(fitness_scores)]}")
      parents = elitism_and_selection(population, fitness_scores, 4, POPN_SIZE)
      # parents = select_parents(population, fitness_scores, POPN_SIZE)
      new_population = []
      for i in range(0, POPN_SIZE, 2):
          parent1, parent2 = parents[i], parents[i + 1]
          child1, child2 = crossover(parent1, parent2)
          child1 = mutate(child1)
          child2 = mutate(child2)
          new_population.extend([child1, child2])
      population = np.array(new_population)
  return best_chromosome

In [None]:
'''genetic_algorithm(model, 10, 0.3)'''

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0, 1, 4], 1: [2, 5, 6, 7], 2: [1], 3: [4, 6], 4: [1, 7], 5: [1, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
Validation Accuracy: 0.3538
Execution time =  220.13971972465515



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [5], 1: [3, 4, 5, 6], 2: [4, 7], 4: [2, 3, 4, 5], 5: [1, 5, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0]
Validation Accuracy: 0.3730
Execution time =  222.45740389823914



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0], 1: [1, 5], 2: [3, 6], 3: [3, 4, 7], 4: [1, 3, 6], 5: [2, 6, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0]
Validation Accuracy: 0.5360
Execution time =  219.88337445259094



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [1, 3, 5], 1: [2, 4], 2: [1], 3: [1, 3, 7], 4: [0, 2, 4, 6], 5: [4]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1]
Validation Accuracy: 0.6635
Execution time =  209.18546509742737



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [1], 1: [0, 5, 7], 2: [0, 2, 4], 3: [3], 4: [1, 5], 5: [0, 1, 2, 3]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1]
Validation Accuracy: 0.6625
Execution time =  212.0702304840088



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0, 2, 3], 1: [1, 3, 5, 6], 3: [7], 4: [0, 2], 5: [0, 3, 4, 5]})
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1]
Validation Accuracy: 0.6088
Execution time =  210.9596984386444



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0, 4, 6], 1: [4, 5], 2: [6], 3: [0, 1, 3, 4], 4: [0, 1], 5: [0, 6]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1]
Validation Accuracy: 0.3873
Execution time =  208.51832008361816



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [7], 1: [0, 4, 6, 7], 2: [0, 2, 4, 6], 4: [2], 5: [2, 3, 4, 5]})
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1]
Validation Accuracy: 0.6826
Execution time =  209.6526551246643



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0, 3, 4, 5], 1: [3, 4], 2: [4, 5, 7], 4: [1, 3, 4, 7], 5: [0]})
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1]
Validation Accuracy: 0.3087
Execution time =  211.3256130218506



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [2, 4, 5], 1: [2, 5], 2: [3, 4, 6, 7], 3: [3, 4], 4: [3, 4], 5: [5]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]
Validation Accuracy: 0.4497
Execution time =  210.25780868530273



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [6], 1: [1, 4, 5, 7], 2: [0, 5], 3: [1, 3], 4: [0, 6, 7], 5: [2, 4]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]
Validation Accuracy: 0.3452
Execution time =  209.0882956981659



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [3, 4, 7], 2: [1, 4, 6, 7], 3: [5, 6], 4: [3, 6], 5: [0, 3, 6]})
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1]
Validation Accuracy: 0.3423
Execution time =  213.78472328186035



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [2, 5, 7], 1: [1, 2, 3, 4, 6, 7], 2: [2, 7], 3: [0, 3, 7]})
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Validation Accuracy: 0.6826
Execution time =  210.889000415802



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [5], 1: [2, 6, 7], 2: [2, 4], 3: [4], 4: [0, 1, 2, 6], 5: [0, 1, 3]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1]
Validation Accuracy: 0.3087
Execution time =  209.69819355010986



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [5, 7], 1: [1, 4], 2: [0, 6], 3: [0, 6, 7], 4: [1, 6], 5: [2, 4, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0]
Validation Accuracy: 0.6913
Execution time =  209.63826298713684



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [0, 2, 4, 6, 7], 1: [4], 2: [0, 5], 3: [0, 2, 3], 4: [0, 7], 5: [5]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1]
Validation Accuracy: 0.3087
Execution time =  211.9301302433014



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [4, 5], 1: [2], 2: [0, 1, 2, 7], 3: [1, 2, 3, 5], 4: [5], 5: [3, 6]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1]
Validation Accuracy: 0.6779
Execution time =  211.87941765785217



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [1, 3], 1: [0, 3], 2: [5], 3: [2, 3, 4], 4: [0, 3, 4, 6], 5: [3, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0]
Validation Accuracy: 0.6913
Execution time =  210.9225471019745



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [5, 6], 1: [3, 4], 2: [3, 5, 6], 3: [0, 5], 4: [1, 6, 7], 5: [3, 7]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0]
Validation Accuracy: 0.3087
Execution time =  211.41342115402222



Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


6
defaultdict(<class 'list'>, {0: [1, 7], 1: [7], 2: [2, 3, 7], 3: [1, 3, 5, 6], 4: [4], 5: [2, 3, 6]})
Pruning
Pruning
Pruning
Pruning
Pruning
Pruning
For the chromosome [1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1]
Validation Accuracy: 0.6913
Execution time =  212.1215159893036

tournament =  [5 5]
tournament =  [ 1 19]
tournament =  [13 19]
tournament =  [2 6]
tournament =  [8 7]
tournament =  [18 11]
tournament =  [16 16]
tournament =  [12  4]
tournament =  [10 15]
tournament =  [3 1]
tournament =  [2 5]
tournament =  [6 8]
tournament =  [14 14]
tournament =  [6 5]
tournament =  [ 2 15]
tournament =  [ 4 17]
tournament =  [15  7]
tournament =  [12 16]
tournament =  [15  5]
tournament =  [ 3 12]
[0.35416667 0.3125     0.33333333 0.22916667 0.33333333 0.41666667
 0.3125     0.39583333 0.25       0.35416667 0.3125     0.33333333
 0.35416667 0.33333333 0.375      0.41666667 0.29166667 0.2

array([0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1])

In [None]:
# genetic_algorithm(model, num_generations, desired_sparsity)
genetic_algorithm(model, 2, 0.3)

12
defaultdict(<class 'list'>, {0: [0, 1, 3, 4, 5, 7, 9, 11], 1: [1, 8, 9, 11], 2: [0, 2, 3, 5, 7, 9], 4: [0, 5], 5: [1, 2, 3, 6, 7, 8, 9], 6: [1, 2, 5, 10], 7: [4, 6, 9], 8: [1, 8, 9], 9: [1], 10: [0, 8], 11: [3, 7, 8]})
For the chromosome [0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1]
Number of predictions: 5427, Number of labels: 5427
Validation Hamming Loss: 0.9358
Execution time =  34.42866921424866

12
defaultdict(<class 'list'>, {0: [4], 1: [4, 6, 10, 11], 2: [0, 1, 3, 7, 9], 3: [0, 3, 4], 4: [0, 5, 6, 7], 5: [2, 6, 7, 8, 11], 6: [0, 1, 4, 10], 7: [3, 4, 5, 8, 11], 8: [2, 3, 6], 9: [1, 3, 11], 10: [2