In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/gemma/transformers/2b/2/model.safetensors.index.json
/kaggle/input/gemma/transformers/2b/2/gemma-2b.gguf
/kaggle/input/gemma/transformers/2b/2/config.json
/kaggle/input/gemma/transformers/2b/2/model-00001-of-00002.safetensors
/kaggle/input/gemma/transformers/2b/2/model-00002-of-00002.safetensors
/kaggle/input/gemma/transformers/2b/2/tokenizer.json
/kaggle/input/gemma/transformers/2b/2/tokenizer_config.json
/kaggle/input/gemma/transformers/2b/2/special_tokens_map.json
/kaggle/input/gemma/transformers/2b/2/.gitattributes
/kaggle/input/gemma/transformers/2b/2/tokenizer.model
/kaggle/input/gemma/transformers/2b/2/generation_config.json


In [2]:
# !pip install bert-score
# !pip install rouge-score
# from nltk.translate.bleu_score import sentence_bleu
# from rouge_score import rouge_scorer
import nltk
# from transformers.modeling_utils import prune_linear_layer
from collections import defaultdict
from datasets import load_dataset
from tqdm import tqdm
from torch.utils.data.dataset import Dataset
import gc
import math
import time

In [3]:
class IndexDataset(Dataset):
    def __init__(self, tensors):
        self.tensors = tensors

    def __getitem__(self, index):
        return self.tensors[index]

    def __len__(self):
        return len(self.tensors)

def process_data(samples, tokenizer, seq_len, field_name):
    test_ids = tokenizer("\n\n".join(samples[field_name]), return_tensors='pt').input_ids[0]
    test_ids_batch = []
    nsamples = test_ids.numel() // seq_len

    for i in range(nsamples):
        batch = test_ids[(i * seq_len):((i + 1) * seq_len)]
        test_ids_batch.append(batch)
    test_ids_batch = torch.stack(test_ids_batch)
    return IndexDataset(tensors=test_ids_batch)
       

def get_loaders(tokenizer, seq_len=2048, batch_size = 8):
    test_data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
#     test_dataset = process_data(test_data, tokenizer, seq_len, 'text')
    test_dataset = process_data(test_data[0:100], tokenizer, seq_len, 'text')

    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return test_loader

In [43]:
def make_model():
    for _ in range(10):
        # print("Emptying cache")
        torch.cuda.empty_cache()
        gc.collect()
    try:
        del model
        print("Deleted existing")
        for _ in range(10):
            print(torch.cuda.empty_cache())
            gc.collect()
    except:
        pass
    time.sleep(6)
    model = GemmaForCausalLM.from_pretrained(model_name)  # Use the appropriate model class
    model.to(device)
    return model

In [5]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModel, GemmaForCausalLM 
import torch
model_name = '/kaggle/input/gemma/transformers/2b/2'  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = make_model()

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
test_loader = get_loaders(tokenizer, seq_len=128, batch_size = 4)
def PPLMetric(model, tokenizer, seq_len=128, batch_size = 4, device="cuda"):
    metric = {}
    metric = ppl_eval(model, test_loader, device)
    print(metric)
    return metric


def ppl_eval(model, test_loader, device):
    '''model.eval()
    total_log_prob = 0
    total_count = 0
    
    with torch.no_grad():
        for batch in tqdm(test_loader):
            inputs = tokenizer(batch['text'], return_tensors='pt', padding=True, truncation=True).to(model.device)
            labels = inputs['input_ids']
            
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss
            log_prob = -loss.item() * labels.numel()  # Negative log likelihood
            total_log_prob += log_prob
            total_count += labels.numel()
    
    avg_log_prob = total_log_prob / total_count
    perplexity = math.exp(-avg_log_prob)
    return perplexity'''

    # nlls = []
    # n_samples = 0
    # with torch.no_grad():
    #     for batch in tqdm(test_loader):
    #         batch = batch.to(device)
    #         # CHANGE THIS:
    #         output = model(batch)
    #         lm_logits = output.logits

    #         shift_logits = lm_logits[:, :-1, :].contiguous()
    #         shift_labels = batch[:, 1:].contiguous()

    #         loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
    #         loss = loss_fct(shift_logits.reshape(-1, shift_logits.size(-1)), shift_labels.view(-1))
    #         nlls.append(loss)
    #         del batch
    #         for _ in range(10):
    #             torch.cuda.empty_cache()
    #             gc.collect()
    # #print(torch.cat(nlls, dim=-1).mean())
    # ppl = np.exp(torch.cat(nlls, dim=-1).mean().item())
    # return ppl.item()
    
    model.eval()  # Ensure model is in evaluation mode
    total_loss = 0
    total_tokens = 0
    
    with torch.no_grad():
        for batch in tqdm(test_loader):
            batch = batch.to(device)
            
            outputs = model(
                input_ids=batch,
                labels=batch  # Let the model handle the label shifting
            )
            
            # Most models return loss directly
            loss = outputs.loss
            
            # Accumulate total loss
            total_loss += loss.item() * batch.size(1)  # Multiply by sequence length
            total_tokens += batch.size(1)
            
            # Clean up memory
            del batch, outputs
            torch.cuda.empty_cache()
    
    # Calculate perplexity
    avg_loss = total_loss / total_tokens
    ppl = math.exp(avg_loss)
    return ppl

In [7]:
import random
def create_random_binary_list(length, percentage_of_zeros):
    num_zeros = int(length * percentage_of_zeros)
    num_ones = length - num_zeros

    # Create the list with the required number of 0s and 1s
    binary_list = [0] * num_zeros + [1] * num_ones

    # Shuffle the list to randomize the order
    random.shuffle(binary_list)

    return binary_list

def initialize_chromosome(num_genes):
  # initialize chromosome with given sparsity percentage
  return create_random_binary_list(num_genes, SPARSITY_RATE)

In [50]:
def modify_model(model, chromosome):
    num_heads = num_attention_heads
    num_blocks = num_hidden_layers
    # num_ffn = model.config.num_hidden_layers

    # Disable attention heads
    heads_to_prune = defaultdict(list)
    for i, gene in enumerate(chromosome):
        if gene == 0:
          block_num = i//num_heads
          head_num = i%num_heads
          heads_to_prune[block_num].append(head_num)

    head_dim = model.config.head_dim
    if heads_to_prune:
        print("Pruning heads in model")
        with torch.no_grad():
            for block in range(block_num): 
                for head in heads_to_prune[block]:
                        # Zero-out the corresponding rows in the q_proj, k_proj, and v_proj
                        start_index = head * head_dim
                        end_index = (head + 1) * head_dim
                        model.model.layers[block].self_attn.q_proj.weight[start_index:end_index, :] = 0
                        model.model.layers[block].self_attn.k_proj.weight[start_index:end_index, :] = 0
                        model.model.layers[block].self_attn.v_proj.weight[start_index:end_index, :] = 0

    #                     model.model.layers[block_num].self_attn.q_proj.weight = model.model.layers[block_num].self_attn.q_proj.weight.to(torch.int8)
    #                     model.model.layers[block_num].self_attn.k_proj.weight = model.model.layers[block_num].self_attn.q_proj.weight.to(torch.int8)
    #                     model.model.layers[block_num].self_attn.c_proj.weight = model.model.layers[block_num].self_attn.q_proj.weight.to(torch.int8)


    return model

In [52]:
def evaluate_fitness(chromosome):
    model = make_model()
    model = modify_model(model,chromosome)
    metric = PPLMetric(model, tokenizer)
    print("Chromosome:", chromosome, "\nFitness:", metric)
    del model
    return (-1)*metric

In [11]:
def find_size(model):
  total_size_in_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
  total_size_in_megabytes = total_size_in_bytes / (1024 ** 2)
  print(f"Model size: {total_size_in_megabytes:.2f} MB")

In [22]:
# del model
model = make_model()
metric = PPLMetric(model, tokenizer)
find_size(model)
num_attention_heads = model.config.num_attention_heads
num_hidden_layers = model.config.num_hidden_layers

Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache
Emptying cache


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 11/11 [00:05<00:00,  2.15it/s]

741.6232902902235
Model size: 9560.29 MB





In [23]:
chrom = initialize_chromosome(num_attention_heads*num_hidden_layers)
# model = modify_model(model, chrom)
# find_size(model)

In [24]:
POPN_SIZE = 8
SPARSITY_RATE = 0.3
crossover_rate = 0.7
mutation_rate = 0.08


In [25]:
def initialize_population(chromosome_length):
  # initialize random population
  population = []
  for _ in range(POPN_SIZE):
    chromosome = initialize_chromosome(chromosome_length)
    population.append(chromosome)
  return population

In [26]:
def select_parents(population, fitness_scores, num_parents):
    # Normalize fitness scores to create a probability distribution
    total_fitness = np.sum(fitness_scores)
    probabilities = fitness_scores / total_fitness

    # Select parents based on their fitness proportion (roulette wheel selection)
    # selected_parents = np.random.choice(population, size=num_parents, p=probabilities, replace=True)
    selected_parents = random.choices(population, weights=probabilities, k=num_parents)


    return np.array(selected_parents)

In [27]:
# Crossover (Single-point crossover)
def crossover(parent1, parent2):
    if np.random.rand() < crossover_rate:
        point = np.random.randint(1, len(parent1) - 1)
        child1 = np.concatenate([parent1[:point], parent2[point:]])
        child2 = np.concatenate([parent2[:point], parent1[point:]])
    else:
        child1, child2 = parent1, parent2
    return child1, child2

In [58]:
# Mutation (Flip bit mutation)
def mutate(chromosome):
    for i in range(len(chromosome)):
        if np.random.rand() < mutation_rate:
            chromosome[i] = 1 - chromosome[i]

    target_ones = int(len(chromosome) * (1-SPARSITY_RATE))

    for c in range(num_attention_heads-1, len(chromosome), num_attention_heads):
      # this part to ensure that each layer has at least one attention head
      start = c-num_attention_heads-1
      enc_part = chromosome[start:c]
      num_ones = np.sum(enc_part)  # Count the number of 1s in the chromosome
      if num_ones==0:
        chromosome[start] = 1

    for i in range(len(chromosome)):
        if np.random.rand() < mutation_rate:
            if chromosome[i] == 1 and num_ones > target_ones:
                chromosome[i] = 0  # Flip 1 to 0 only if there are too many 1s
                num_ones -= 1
            elif chromosome[i] == 0 and num_ones < target_ones:
                chromosome[i] = 1  # Flip 0 to 1 only if there are too few 1s
                num_ones += 1
    return chromosome


In [29]:
def elitism_and_selection(population, fitness_scores, num_elites, num_parents):
    # Elitism: Keep the top num_elites individuals
    elite_indices = np.argsort(fitness_scores)[-num_elites:]  # Get indices of top individuals
    elites = [population[i] for i in elite_indices]

    # Perform roulette wheel selection for the rest of the parents
    remaining_population = np.delete(population, elite_indices, axis=0)
    remaining_fitness_scores = np.delete(fitness_scores, elite_indices)

    num_to_select = num_parents - num_elites
    selected_parents = select_parents(remaining_population, remaining_fitness_scores, num_to_select)

    # Combine elites and selected parents
    next_generation = np.vstack((elites, selected_parents))

    return next_generation

In [56]:
import matplotlib.pyplot as plt

def genetic_algorithm(num_generations, desired_sparsity):
  population = initialize_population(num_attention_heads*num_hidden_layers)  # Initialize the population
  accuracy_per_generation = []  # List to store highest accuracy values for each generation
  for generation in range(num_generations):
      fitness_scores = np.array([evaluate_fitness(chrom) for chrom in population])
      best_chromosome = population[np.argmax(fitness_scores)]
      print("new fitness scores:", fitness_scores)
      print(f"best chromosome in generation {generation} is {best_chromosome} with accuracy {fitness_scores[np.argmax(fitness_scores)]}")
      accuracy_per_generation.append(fitness_scores[np.argmax(fitness_scores)])
      parents = elitism_and_selection(population, fitness_scores, 4, POPN_SIZE)
      # parents = select_parents(population, fitness_scores, POPN_SIZE)
      new_population = []
      for i in range(0, POPN_SIZE, 2):
          parent1, parent2 = parents[i], parents[i + 1]
          child1, child2 = crossover(parent1, parent2)
          child1 = mutate(child1)
          child2 = mutate(child2)
          new_population.extend([child1, child2])
      population = np.array(new_population)
  generations = list(range(1, len(accuracy_per_generation) + 1))


  return best_chromosome

In [57]:
genetic_algorithm(2, SPARSITY_RATE)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.56it/s]


681.567115914516
Chromosome: [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1] 
Fitness: 681.567115914516


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.57it/s]


7185.827098166401
Chromosome: [1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1] 
Fitness: 7185.827098166401


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.57it/s]


3.019709328804503e+50
Chromosome: [0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1] 
Fitness: 3.019709328804503e+50


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.57it/s]


2112.873610437727
Chromosome: [1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0] 
Fitness: 2112.873610437727


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.57it/s]


1.2917770633004654e+51
Chromosome: [0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Fitness: 1.2917770633004654e+51


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.55it/s]


961.3582048234236
Chromosome: [1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1] 
Fitness: 961.3582048234236


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.57it/s]


1.7072522510152955e+48
Chromosome: [0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1] 
Fitness: 1.7072522510152955e+48


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Pruning heads in model


100%|██████████| 11/11 [00:04<00:00,  2.56it/s]


1.3130824248665481e+45
Chromosome: [0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1] 
Fitness: 1.3130824248665481e+45
new fitness scores: [-6.81567116e+02 -7.18582710e+03 -3.01970933e+50 -2.11287361e+03
 -1.29177706e+51 -9.61358205e+02 -1.70725225e+48 -1.31308242e+45]
best chromosome in generation 0 is [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 

NameError: name 'target_ones' is not defined