In [1]:
!kill -9 2435744

/bin/bash: line 0: kill: (2435744) - No such process


In [2]:
!nvidia-smi

Fri Feb 23 01:42:36 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-PCIE-32GB           Off | 00000000:AF:00.0 Off |                    0 |
| N/A   39C    P0              26W / 250W |      4MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE-32GB           Off | 00000000:D8:0

In [3]:
from datetime import datetime
import os
import sys
import torch
import time
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq,BitsAndBytesConfig
from datasets import Dataset, DatasetDict
from math import ceil
import configparser
import logging,transformers
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    prepare_model_for_kbit_training,
    set_peft_model_state_dict,
)

  from .autonotebook import tqdm as notebook_tqdm
2024-02-23 01:42:39.576790: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-23 01:42:39.609025: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-23 01:42:39.609051: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-23 01:42:39.609069: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-23 01:42:3

In [4]:
exp_name="expDummy"
data_collator="DataCollatorForSeq2Seq"
model_name="codellama/CodeLlama-7b-Instruct-hf"
prompt_file_path="input/prompts/codellama_model.txt"
finetune_type="LoRA"
train_dataset ="input/datasets/spiderTrainSetNewContext.csv"

In [5]:
start_time = time.time()
config_filePath="./../config.ini"
config = configparser.ConfigParser()
config.read(config_filePath)
config.sections()
#User Config
logging_path = config['Default']['home_dir']+config['logs']['log_folder']+ exp_name

base_model = model_name
finetuningMethod = finetune_type
precision = int(config['Finetune']['precision'])
tokenizeMaxLength = int(config['Finetune']['tokenizeMaxLength'])



batch_size = int(config['Finetune']['batch_size'])
num_train_epochs = int(config['Finetune']['num_train_epochs'])
per_device_train_batch_size = int(config['Finetune']['per_device_train_batch_size'])
output_dir =  config['Default']['home_dir']+"output/model/"+ exp_name
prompt_file_path = config['Default']['home_dir']+prompt_file_path

LoRA_r = int(config['Finetune']['LoRA_r'])
LoRA_dropout = float(config['Finetune']['LoRA_dropout'])
LoRA_alpha = float(config['Finetune']['LoRA_alpha'])
target_modules = config['Finetune']['target_modules']
task_type = config['Finetune']['LoRA_taskType']

train_dataset = config['Default']['home_dir']+train_dataset

logging.basicConfig(filename=logging_path+".log", level=logging.INFO)

logging.info("EXPERIMENT :"+ exp_name)
logging.info(" Training Set : "+ train_dataset)
logging.info(" Base Model : "+ base_model)
logging.info(" Finetuning Method : "+finetuningMethod)
logging.info(" Precision : "+ str(precision))
logging.info(" Max length in tokenizer : "+ str(tokenizeMaxLength))
logging.info(" LoRA_r  : "+ str(LoRA_r))
logging.info(" LoRA_dropout  : "+ str(LoRA_dropout))
logging.info(" task_type  : "+ str(task_type))
logging.info(" LoRA_alpha  : "+ str(LoRA_alpha))
logging.info(" Batch Size  : "+ str(batch_size))
logging.info(" Number of train epochs  : "+ str(num_train_epochs))
logging.info(" per_device_train_batch_size  : "+ str(per_device_train_batch_size))
logging.info(" Output Directory : "+ output_dir)
logging.info(" Target Modules: "+ str(target_modules))

df = pd.read_csv(train_dataset)
data = Dataset.from_pandas(df)
num_samples = len(data)
val_set_size = ceil(0.9 * num_samples)
logging.info(" Number of samples for training: "+ str(num_samples))
logging.info(" Number of samples for validation: "+ str(val_set_size))

In [6]:
data

Dataset({
    features: ['Unnamed: 0', 'db_id', 'query', 'question', 'query_toks', 'query_toks_no_value', 'question_toks', 'context'],
    num_rows: 7000
})

In [7]:
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.add_eos_token = True
tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"


def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=tokenizeMaxLength,
        padding=False,
        return_tensors=None,
    )

    # "self-supervised learning" means the labels are also the inputs:
    result["labels"] = result["input_ids"].copy()
    return result

# def generate_and_tokenize_prompt(data_point):
#     prompt_file = open(prompt_file_path, "r")
#     full_prompt = prompt_file.read()
#     full_prompt = full_prompt.replace("{context}",data_point["context"])
#     full_prompt = full_prompt.replace("{question}",data_point["question"])
#     full_prompt = full_prompt.replace("{query}",data_point["query"])
#     print("**********************************",full_prompt)
#     return tokenize(full_prompt)
def generate_and_tokenize_prompt(data_point):
    full_prompt =f"""You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
{data_point["question"]}

### Context:
{data_point["context"]}

### Response:
{data_point["query"]}
"""
    return tokenize(full_prompt)

dataTrainTest = data.train_test_split(test_size=val_set_size, shuffle=True, seed=42)
tokenized_train_dataset = dataTrainTest["train"].shuffle().map(generate_and_tokenize_prompt)
tokenized_val_dataset = dataTrainTest["test"].shuffle().map(generate_and_tokenize_prompt)
dataTrainTest["test"].to_csv(config['Default']['home_dir']+"input/datasets/"+exp_name+"_validSet.csv")

if(finetune_type=="QLoRA"):
    print("In am in QLORA")
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    ) # setup bits and bytes config

    model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=bnb_config, device_map={"":0})

else:
    if (precision==8):
        print("In am in 8bit")
        model = AutoModelForCausalLM.from_pretrained(
            base_model,
            load_in_8bit=True,
            device_map="auto",
        )
    if (precision==32):
        print("In am in 32bit")
        model = AutoModelForCausalLM.from_pretrained(
        base_model,
        device_map="auto",
        )
    if (precision==16):
        print("In am in 16bit")
        model = AutoModelForCausalLM.from_pretrained(
        base_model,
        torch_dtype=torch.float16,
        device_map="auto",
        )




model.train()
if (target_modules == "all_linear_layers"):
    print("In am in all_linear_layers")
    target_modules = ['gate_proj',
    'down_proj',
    'v_proj',
    'q_proj',
    'k_proj',
    'o_proj',
    'lm_head',
    'up_proj']
if (target_modules == "attention_linear_layers"):
    print("In am in attention_linear_layers")
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ]



lora_config = LoraConfig(
    r=LoRA_r, 
    lora_alpha=LoRA_alpha, 
    target_modules=target_modules, 
    lora_dropout=LoRA_dropout, 
    bias="none", 
    task_type=task_type
)
#model.gradient_checkpointing_enable()
#model = prepare_model_for_kbit_training(model) # prepares the whole model for kbit training
model = prepare_model_for_int8_training(model)
model = get_peft_model(model, lora_config) # Now you get a model ready for QLoRA training



if torch.cuda.device_count() > 1:
    # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
    model.is_parallelizable = True
    model.model_parallel = True


Map: 100%|██████████| 700/700 [00:00<00:00, 893.04 examples/s] 
Map: 100%|██████████| 6300/6300 [00:07<00:00, 808.52 examples/s]
Creating CSV from Arrow format: 100%|██████████| 7/7 [00:00<00:00, 23.73ba/s]


In am in 32bit


Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.84s/it]


In am in attention_linear_layers


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [8]:
#Training Arguments
gradient_accumulation_steps = batch_size // per_device_train_batch_size
training_args = TrainingArguments(
        per_device_train_batch_size=per_device_train_batch_size,
#         gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=3,
        max_steps=50,
        learning_rate=3e-4,
        logging_steps=50,
        optim="adamw_torch",
        evaluation_strategy="steps", # if val_set_size > 0 else "no",
        save_strategy="steps",
        eval_steps=10,
        save_steps=20,
        #num_train_epochs = num_train_epochs,
        output_dir=output_dir,
        load_best_model_at_end=True, #
        group_by_length=True, # group sequences of roughly the same length together to speed up training
        report_to="none", # if use_wandb else "none",
    )

if(data_collator == "DataCollatorForSeq2Seq"):
    print("I am in DataCollatorForSeq2Seq")
    trainer = Trainer(
        model=model,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_val_dataset,
        args=training_args,
        data_collator=DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )
elif(data_collator == "DataCollatorForLanguageModeling"):
    print("I am in DataCollatorForLanguageModeling")
    trainer = Trainer(
        model=model,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_val_dataset,
        args=training_args,
        data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    )

#pytorch-related optimisation (which just make training faster but don't affect accuracy):
model.config.use_cache = False

if torch.__version__ >= "2" and sys.platform != "win32":
    print("compiling the model")
    model = torch.compile(model)
with torch.autocast("cuda"):
    trainer.train()
    model.save_pretrained(output_dir)

end_time = time.time()
total_time = end_time - start_time
logging.info("Time taken to run in seconds: :"+ str(total_time))


I am in DataCollatorForLanguageModeling
compiling the model


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

IndexError: Invalid key: 20 is out of bounds for size 0

In [None]:
len(tokenized_train_dataset["input_ids"])