In [1]:
!pip install torch



In [2]:
!pip install transformers



In [3]:
!pip install peft



In [4]:
!pip install datasets



In [5]:
!pip install huggingface_hub



In [6]:
!pip install python-dotenv



In [7]:
!pip install -U bitsandbytes





In [8]:
!pip install -U transformers



In [9]:
import os
from dotenv import load_dotenv
import torch
import json
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from datasets import Dataset
from huggingface_hub import login

# Load environment variables
load_dotenv()

# Login to Hugging Face
hf_token = os.getenv('HUGGINGFACE_TOKEN')
if hf_token:
    try:
        login(token=hf_token)
        print("Successfully logged in to Hugging Face")
    except Exception as e:
        print(f"Failed to login to Hugging Face: {str(e)}")
else:
    print("Warning: HUGGINGFACE_TOKEN not found in environment variables")

# Create necessary directories
os.makedirs("models", exist_ok=True)
os.makedirs("models/checkpoints", exist_ok=True)
os.makedirs("models/final", exist_ok=True)
os.makedirs("logs", exist_ok=True)

def _get_quantization_config():
    """Get quantization configuration."""
    try:
        return BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
        )
    except Exception as e:
        print(f"Error with 4-bit quantization: {str(e)}")
        print("Falling back to 8-bit quantization")
        return BitsAndBytesConfig(
            load_in_8bit=True
        )

def process_jsonl_dataset(file_path):
    """Process JSONL dataset for fine-tuning."""
    training_data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                try:
                    data = json.loads(line)
                    if 'messages' in data:
                        # Extract conversation pairs
                        messages = data['messages']
                        for i in range(0, len(messages)-1, 2):  # Process pairs of messages
                            if i+1 < len(messages):
                                user_msg = messages[i]['content']
                                assistant_msg = messages[i+1]['content']
                                # Format as instruction-response pair
                                text = f"### Instruction: {user_msg}\n\n### Response: {assistant_msg}"
                                training_data.append({"text": text})
                except json.JSONDecodeError:
                    print(f"Skipping invalid JSON line")
                except Exception as e:
                    print(f"Error processing line: {str(e)}")
    except Exception as e:
        print(f"Error reading file: {str(e)}")
    return training_data

def fine_tune_model(training_data, model_name=None, llm_model_name="mistralai/Mistral-7B-Instruct-v0.2"):
    """Fine-tune the model using LoRA on the provided training data and push to Hugging Face Hub."""
    if model_name is None:
        from datetime import datetime
        model_name = f"fine_tuned_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # Get Hugging Face username from environment variable or use a default
    hf_username = os.getenv('HUGGINGFACE_USERNAME', 'rishikann')
    print(f"Starting fine-tuning process for model: {hf_username}/{model_name}")

    # Check for GPU availability
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # Initialize tokenizer and model
    try:
        print("Loading tokenizer and model...")
        tokenizer = AutoTokenizer.from_pretrained(llm_model_name, token=hf_token if hf_token else None)

        # Modified loading approach with better fallback options
        if device == 'cuda':
            try:
                print("Attempting to load with 4-bit quantization...")
                model_kwargs = {
                    "torch_dtype": torch.float16,
                    "device_map": "auto",
                    "quantization_config": _get_quantization_config(),
                    "low_cpu_mem_usage": True,
                    "trust_remote_code": True,
                    "token": hf_token if hf_token else None
                }
                base_model = AutoModelForCausalLM.from_pretrained(
                    llm_model_name,
                    **model_kwargs
                )
            except Exception as e:
                print(f"4-bit loading failed: {str(e)}")
                try:
                    print("Attempting to load with 8-bit quantization...")
                    model_kwargs = {
                        "torch_dtype": torch.float16,
                        "device_map": "auto",
                        "load_in_8bit": True,
                        "low_cpu_mem_usage": True,
                        "trust_remote_code": True,
                        "token": hf_token if hf_token else None
                    }
                    base_model = AutoModelForCausalLM.from_pretrained(
                        llm_model_name,
                        **model_kwargs
                    )
                except Exception as e2:
                    print(f"8-bit loading failed: {str(e2)}")
                    print("Falling back to standard precision (will use more memory)...")
                    model_kwargs = {
                        "torch_dtype": torch.float16,
                        "device_map": "auto",
                        "low_cpu_mem_usage": True,
                        "trust_remote_code": True,
                        "token": hf_token if hf_token else None
                    }
                    base_model = AutoModelForCausalLM.from_pretrained(
                        llm_model_name,
                        **model_kwargs
                    )
        else:
            # CPU configuration
            print("Loading model in CPU mode with reduced size...")
            model_kwargs = {
                "torch_dtype": torch.float32,
                "device_map": {"": "cpu"},
                "load_in_8bit": False,
                "load_in_4bit": False,
                "low_cpu_mem_usage": True,
                "trust_remote_code": True,
                "token": hf_token if hf_token else None
            }
            base_model = AutoModelForCausalLM.from_pretrained(
                llm_model_name,
                **model_kwargs
            )

        print("Model loaded successfully")

    except Exception as e:
        print(f"Error loading model: {str(e)}")
        raise

    tokenizer.pad_token = tokenizer.eos_token

    # Prepare model for LoRA training if using GPU
    if device == 'cuda':
        base_model = prepare_model_for_kbit_training(base_model)

        # Configure LoRA
        lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )

        # Get PEFT model
        model = get_peft_model(base_model, lora_config)
    else:
        print("Skipping LoRA configuration in CPU mode")
        model = base_model

    # Convert training data to dataset format
    dataset = Dataset.from_list(training_data)

    # Tokenize the dataset
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            padding="max_length",
            truncation=True,
            max_length=512,
            return_tensors="pt"
        )

    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Set up temporary directories for checkpoints and logs
    temp_checkpoint_dir = os.path.join("temp_checkpoints", model_name)
    temp_log_dir = os.path.join("temp_logs", model_name)

    os.makedirs(temp_checkpoint_dir, exist_ok=True)
    os.makedirs(temp_log_dir, exist_ok=True)

    try:
        # Training arguments - removed evaluation_strategy parameter
        training_args = TrainingArguments(
            output_dir=temp_checkpoint_dir,
            num_train_epochs=3,
            per_device_train_batch_size=4,
            gradient_accumulation_steps=4,
            warmup_steps=100,
            weight_decay=0.01,
            logging_dir=temp_log_dir,
            logging_steps=10,
            save_strategy="epoch",
            # evaluation_strategy="epoch",  # Removed problematic parameter
            load_best_model_at_end=False,  # Changed to False since we're not evaluating
            push_to_hub=True,
            hub_model_id=f"{hf_username}/{model_name}",
            hub_token=os.getenv('HUGGINGFACE_TOKEN')
        )

        # Initialize trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_dataset,
            data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
        )

        print("Starting training...")
        # Train the model
        trainer.train()

        print(f"Training completed. Pushing model to Hugging Face Hub: {hf_username}/{model_name}")

        # Push to Hugging Face Hub
        trainer.push_to_hub()

        print(f"Model successfully pushed to Hugging Face Hub: {hf_username}/{model_name}")

        # Clean up temporary directories
        import shutil
        shutil.rmtree(temp_checkpoint_dir, ignore_errors=True)
        shutil.rmtree(temp_log_dir, ignore_errors=True)

    except Exception as e:
        print(f"Error during fine-tuning or pushing to hub: {str(e)}")
        # Clean up temporary directories even if there's an error
        import shutil
        shutil.rmtree(temp_checkpoint_dir, ignore_errors=True)
        shutil.rmtree(temp_log_dir, ignore_errors=True)
        raise

    print("Fine-tuning process completed")

if __name__ == "__main__":
    # Process JSONL dataset
    training_data = process_jsonl_dataset("/content/pinecone_qa_dataset.jsonl")

    if training_data:
        print(f"Processing {len(training_data)} training examples...")
        try:
            # Fine-tune the model
            fine_tune_model(
                training_data=training_data,
                model_name="qa_finetuned_model"
            )
            print("Fine-tuning completed successfully!")
        except Exception as e:
            print(f"Error during fine-tuning: {str(e)}")

Processing 99 training examples...
Starting fine-tuning process for model: rishikann/qa_finetuned_model
Using device: cuda
Loading tokenizer and model...
Attempting to load with 4-bit quantization...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded successfully


Map:   0%|          | 0/99 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting training...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrishikanaarayan2003[0m ([33mrishikanaarayan2003-pes-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,3.9127


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Training completed. Pushing model to Hugging Face Hub: rishikann/qa_finetuned_model


tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Model successfully pushed to Hugging Face Hub: rishikann/qa_finetuned_model
Fine-tuning process completed
Fine-tuning completed successfully!
