In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
import pandas as pd
from datasets import Dataset

In [None]:
fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

In [None]:
csv_file_path = 'eng_guj.csv'

df = pd.read_csv(csv_file_path)

custom_dataset = Dataset.from_pandas(df)

print(custom_dataset[:5])

In [None]:
train_dataset = custom_dataset

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_model_id = "mistralai/Mistral-7B-v0.1"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, token='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')

In [None]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True,token='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')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
from peft import LoraConfig, get_peft_model
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=['q_proj', 'o_proj', 'v_proj', 'gate_proj', 'down_proj', 'k_proj', 'up_proj'],
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
model = get_peft_model(model, peft_params)

# Apply the accelerator. You can comment this out to remove the accelerator.
model = accelerator.prepare_model(model)

In [None]:
train_dataset[0]

In [None]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=512,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [None]:
def generate_and_tokenize_prompt(data_point):
    full_prompt = f"""<s>[INST] <<SYS>>
    Provide a precise translation of the following English sentence into Gujarati.
    <</SYS>>
    English: {data_point['English']}
    Gujarati: {data_point['Gujarati']} [/INST]</s>
    """

    return tokenize(full_prompt)

In [None]:
tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)

In [None]:
import transformers
from datetime import datetime

project = "gujarati-finetune"
base_model_name = "Mistral-7B-v0.1"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

#tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=5,
        per_device_train_batch_size=50,
        gradient_checkpointing=True,
        gradient_accumulation_steps=4,
        max_steps=300,
        learning_rate=2.5e-5,
        logging_steps=5,
        bf16=False,
        optim="paged_adamw_8bit",
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=50,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=500,               # Evaluate and save checkpoints every 50 steps
        do_eval=False,                # Perform evaluation at the end of training           # Comment this out if you don't want to use weights & baises
        run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"          # Name of the W&B run (optional)
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

#model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  #  same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
    token='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'
)

tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    add_bos_token=True,
    trust_remote_code=True,
    #padding_side="left",
    token='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'
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
from peft import PeftModel
model.config.use_cache = True 
ft_model = PeftModel.from_pretrained(model, "reverse-llama2-13B-chat-hinglish-finetune/checkpoint-600")

In [None]:
def answer(text):
  eval_prompt = f"""[INST] <<SYS>> Please be accurate and translate the given English sentence into Hinglish. Return only the translated output and nothing else. <</SYS>> {text} [/INST]"""
  model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

  ft_model.eval()
  with torch.no_grad():
      print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=200)[0], skip_special_tokens=True))

In [None]:
ans=str(answer('History is a story told by the winners.'))
ans.strip()