#### Installing Packages

In [None]:
# Install required packages
!pip install torch transformers datasets pandas numpy peft accelerate

#### Mounting google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Path to your WhatsApp chat file in Google Drive
chat_file_path = "/content/drive/MyDrive/Data-ChatLLM/input.txt"  # Update this path

# Verify the file exists
import os
if os.path.exists(chat_file_path):
    print(f"File found: {chat_file_path}")
    # Show first few lines to verify content
    with open(chat_file_path, 'r', encoding='utf-8') as f:
        print("First 5 lines of the file:")
        for i, line in enumerate(f):
            if i < 5:
                print(line.strip())
            else:
                break
else:
    print(f"File not found: {chat_file_path}")
    print("Available files in MyDrive:")
    !ls "/content/drive/MyDrive"

#### importing

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, \
    DataCollatorForLanguageModeling
from datasets import Dataset
import numpy as np
from typing import Dict, List, Optional
import random

## The fine tuning itself
(can be skiped - if already in drive)

#### parsing

In [None]:
# Step 1: Parse the WhatsApp chat data
def parse_whatsapp_chat(file_path: str) -> pd.DataFrame:
    """
    Parse WhatsApp chat export file into a structured DataFrame using string separators.

    Args:
        file_path: Path to the WhatsApp chat export file

    Returns:
        DataFrame with columns: timestamp, sender, message
    """
    # Lists to store extracted data
    timestamps = []
    senders = []
    messages = []

    # Read the file
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Process each line
    for line in lines:
        line = line.strip()

        # Check if this line starts a new message
        # Format: 10/29/23, 9:28 AM - Yoel Weisberg: זה לדבר על הערוץ
        if " - " in line and ": " in line and len(
                line.split(" - ")[0].strip()) >= 10:  # Timestamp usually at least 10 chars
            # Split by the first occurrence of " - "
            parts = line.split(" - ", 1)

            if len(parts) == 2:
                timestamp = parts[0].strip()

                # Split the second part by the first occurrence of ": "
                sender_message_parts = parts[1].split(": ", 1)

                if len(sender_message_parts) == 2:
                    sender = sender_message_parts[0].strip()
                    message = sender_message_parts[1].strip()

                    timestamps.append(timestamp)
                    senders.append(sender)
                    messages.append(message)
                else:
                    # This might be a system message or special format
                    continue
        else:
            # If it doesn't match the expected format, it might be a continuation of the previous message
            if messages and senders and timestamps:
                messages[-1] += " " + line

    # Create DataFrame
    df = pd.DataFrame({
        'timestamp': timestamps,
        'sender': senders,
        'message': messages
    })

    return df

#### prapering data for training

In [None]:
# Step 2: Prepare data for training
def prepare_data_for_training(df: pd.DataFrame, min_messages: int = 20) -> Dict[str, List[str]]:
    """
    Prepare chat data for training by organizing messages by sender.

    Args:
        df: DataFrame containing parsed WhatsApp chat
        min_messages: Minimum number of messages required for a sender to be included

    Returns:
        Dictionary mapping sender names to their messages
    """
    # Count messages per sender
    sender_counts = df['sender'].value_counts()

    # Filter senders with enough messages
    valid_senders = sender_counts[sender_counts >= min_messages].index.tolist()

    # Create dictionary of sender -> messages
    sender_messages = {}
    for sender in valid_senders:
        sender_messages[sender] = df[df['sender'] == sender]['message'].tolist()

    return sender_messages

#### creating dataset

In [None]:
# Step 3: Create training data for Llama fine-tuning
def create_training_dataset(sender_messages: Dict[str, List[str]],
                            context_length: int = 3,
                            max_length: int = 512) -> Dataset:
    """
    Create a dataset for fine-tuning Llama model.

    Args:
        sender_messages: Dictionary mapping sender names to their messages
        context_length: Number of previous messages to use as context
        max_length: Maximum length of input sequences

    Returns:
        Dataset suitable for fine-tuning
    """
    training_examples = []

    for sender, messages in sender_messages.items():
        for i in range(context_length, len(messages)):
            # Create context from previous messages
            context = messages[i - context_length:i]
            context_text = " ".join([f"Message: {msg}" for msg in context])

            # Create input with format that tells the model who should respond
            input_text = f"Context: {context_text}\nGenerate {sender}'s response:"

            # Target is the actual response
            target_text = messages[i]

            training_examples.append({
                "input": input_text,
                "target": target_text,
                "sender": sender
            })

    # Convert to Dataset
    dataset = Dataset.from_dict({
        "input": [example["input"] for example in training_examples],
        "target": [example["target"] for example in training_examples],
        "sender": [example["sender"] for example in training_examples]
    })

    return dataset


#### Tokenising

In [None]:
# Step 4: Tokenization functions
def tokenize_function(examples, tokenizer, max_length):
    """Tokenize the input and target texts."""
    # Tokenize inputs
    inputs = tokenizer(examples["input"], padding="max_length", truncation=True, max_length=max_length)

    # Tokenize targets with special handling for the EOS token
    targets = tokenizer(examples["target"], padding="max_length", truncation=True, max_length=max_length)

    # Prepare inputs and labels for training
    examples["input_ids"] = inputs["input_ids"]
    examples["attention_mask"] = inputs["attention_mask"]
    examples["labels"] = targets["input_ids"]

    return examples

#### fine tuning

In [None]:

# Step 5: Fine-tune the Llama model
def fine_tune_llama(dataset, model_name="meta-llama/Llama-2-7b-hf", output_dir="./fine_tuned_llama", max_length=512,
                    use_peft=True, batch_size=1, fp16=True):
    """
    Fine-tune a Llama model on WhatsApp chat data.

    Args:
        dataset: Dataset containing input-target pairs
        model_name: HuggingFace model name/path
        output_dir: Directory to save the fine-tuned model
        max_length: Maximum sequence length
        use_peft: Whether to use PEFT/LoRA for more efficient fine-tuning
        batch_size: Batch size for training
        fp16: Whether to use mixed precision training

    Returns:
        Fine-tuned model and tokenizer
    """
    # Try to import the necessary libraries
    try:
        from transformers import BitsAndBytesConfig
    except ImportError:
        print("Warning: bitsandbytes not installed. Quantization won't be available.")
        BitsAndBytesConfig = None

    try:
        import accelerate
        print(f"Using accelerate version: {accelerate.__version__}")
    except ImportError:
        print("Warning: accelerate not installed. Installing it now...")
        import subprocess
        subprocess.check_call(["pip", "install", "accelerate>=0.26.0"])
        import accelerate
        print(f"Installed accelerate version: {accelerate.__version__}")

    # Set up quantization and PEFT if requested
    model_kwargs = {}
    peft_config = None

    if use_peft:
        try:
            from peft import LoraConfig, get_peft_model, TaskType
            print("Using PEFT/LoRA for efficient fine-tuning")

            # Define LoRA config
            peft_config = LoraConfig(
                task_type=TaskType.CAUSAL_LM,
                inference_mode=False,
                r=8,  # rank
                lora_alpha=32,
                lora_dropout=0.1,
                target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
            )
        except ImportError:
            print("Warning: peft not installed. Falling back to full fine-tuning.")
            print("To use PEFT, install it with: pip install peft")
            use_peft = False

    # Check if GPU is available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    if device == "cpu":
        print("WARNING: Training on CPU will be very slow")
        fp16 = False  # Disable fp16 on CPU

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Add padding token if needed
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Load model with appropriate configuration
    if BitsAndBytesConfig and device == "cuda" and use_peft:
        try:
            # Try to use 4-bit quantization for efficiency
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
            )
            model_kwargs["quantization_config"] = quantization_config
            print("Using 4-bit quantization")
        except Exception as e:
            print(f"Quantization setup failed: {e}")
            print("Falling back to standard loading")

    # Load the model
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map=device if device == "cuda" else None,
            **model_kwargs
        )
    except Exception as e:
        print(f"Error loading model with advanced options: {e}")
        print("Trying with basic configuration...")
        model = AutoModelForCausalLM.from_pretrained(model_name)

    # Apply LoRA if available and requested
    if use_peft and peft_config:
        try:
            model = get_peft_model(model, peft_config)
            print("LoRA applied successfully")
        except Exception as e:
            print(f"Error applying LoRA: {e}")
            print("Continuing with full fine-tuning")

    # Tokenize the dataset
    tokenized_dataset = dataset.map(
        lambda examples: tokenize_function(examples, tokenizer, max_length),
        batched=True,
        remove_columns=["input", "target", "sender"]
    )

    # Data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False  # We're not using masked language modeling
    )

    # Set up training arguments
    # Set up training arguments with fixes
    training_args = TrainingArguments(
        output_dir=output_dir,
        overwrite_output_dir=True,
        num_train_epochs=3,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=4,
        save_steps=1000,
        save_total_limit=2,
        prediction_loss_only=True,
        logging_dir="./logs",
        fp16=fp16 and device == "cuda",
        optim="adamw_torch",
        logging_steps=100,
        warmup_steps=100,
        learning_rate=2e-4,
        # Fix for gradient checkpointing
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
        label_names=["labels"],
        report_to=None
        # Explicitly set use_cache to False
    )

    # Initialize Trainer with label_names parameter
    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=tokenized_dataset
        # Add this line to fix the label_names warning
    )

    # Train the model
    trainer.train()

    # Save the model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    return model, tokenizer


#### pipeline

In [None]:
# Step 8: Complete pipeline function
def whatsapp_llama_pipeline(chat_file_path: str, model_name: str = "meta-llama/Llama-2-7b-hf",
                            output_dir: str = "./fine_tuned_llama", use_peft: bool = True,
                            batch_size: int = 1, fp16: bool = True, skip_training: bool = False):
    """
    Complete pipeline from WhatsApp chat data to fine-tuned model.

    Args:
        chat_file_path: Path to WhatsApp chat export file
        model_name: Base model to fine-tune
        output_dir: Directory to save the fine-tuned model
        use_peft: Whether to use PEFT/LoRA for efficient fine-tuning
        batch_size: Batch size for training
        fp16: Whether to use mixed precision training
        skip_training: Skip the training step (for testing the pipeline)
    """
    try:
        print("Step 1: Parsing WhatsApp chat data...")
        df = parse_whatsapp_chat(chat_file_path)

        print(f"Found {len(df)} messages from {df['sender'].nunique()} different senders")

        print("\nStep 2: Preparing data for training...")
        sender_messages = prepare_data_for_training(df)
        print(f"Found {len(sender_messages)} senders with enough messages")

        if len(sender_messages) == 0:
            print("No senders with enough messages found. Try reducing the min_messages parameter.")
            return

        print("\nStep 3: Creating training dataset...")
        dataset = create_training_dataset(sender_messages)
        print(f"Created dataset with {len(dataset)} training examples")

        if not skip_training:
            print("\nStep 4: Fine-tuning Llama model...")
            print(f"Using base model: {model_name}")
            print(f"This may take a while depending on your hardware...")

            # Check system resources
            import os
            import psutil

            try:
                process = psutil.Process(os.getpid())
                memory_gb = process.memory_info().rss / (1024 * 1024 * 1024)
                print(f"Current process memory usage: {memory_gb:.2f} GB")

                total_memory = psutil.virtual_memory().total / (1024 * 1024 * 1024)
                print(f"Total system memory: {total_memory:.2f} GB")

                if torch.cuda.is_available():
                    for i in range(torch.cuda.device_count()):
                        gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024 * 1024 * 1024)
                        print(f"GPU {i} total memory: {gpu_memory:.2f} GB")
            except Exception as e:
                print(f"Could not check system resources: {e}")

            model, tokenizer = fine_tune_llama(
                dataset,
                model_name=model_name,
                output_dir=output_dir,
                use_peft=use_peft,
                batch_size=batch_size,
                fp16=fp16
            )

            print(f"\nModel successfully fine-tuned and saved to {output_dir}")
        else:
            print("\nSkipping training step as requested.")
            print("Loading tokenizer only for chat interface...")
            from transformers import AutoTokenizer
            tokenizer = AutoTokenizer.from_pretrained(model_name)


    except Exception as e:
        import traceback
        print(f"Error in pipeline: {e}")
        print(traceback.format_exc())




#### Running the *pipeline*

In [None]:
whatsapp_llama_pipeline(
    chat_file_path=chat_file_path,
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # Smaller model for faster training
    output_dir="/content/drive/MyDrive/Data-ChatLLM/whatsapp_model",  # Save to Drive
    use_peft=True,  # Use PEFT/LoRA for efficient training
    batch_size=1,
    fp16=True  # Use mixed precision (works well with Colab GPUs)
)

## loading the model from drive
(go here if alreday in drive)

In [None]:
# Updated working code
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# Import necessary libraries
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

# Check what's in the directory first
checkpoint_path = "/content/drive/MyDrive/Data-ChatLLM/whatsapp_model/checkpoint-4000"
print("Files in checkpoint directory:")
print(os.listdir(checkpoint_path))

# Load the model and tokenizer
try:
    # First try loading from the checkpoint
    tokenizer = AutoTokenizer.from_pretrained(
        checkpoint_path,
        use_fast=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        checkpoint_path,
        device_map="auto"  # This handles GPU placement automatically
    )

    print("Successfully loaded from checkpoint!")
except Exception as e:
    print(f"Error loading from checkpoint: {e}")
    print("\nTrying to load from the main model directory instead...")

    # If that fails, try the main model directory
    model_path = "/content/drive/MyDrive/whatsapp_model"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
    print("Successfully loaded from main model directory!")


#### interface function

In [None]:
def generate_response(input_message, friend_name, max_length=100, context=None, temperature=0.2):
    # Add context from previous messages if available
    if context is None:
        context = []

    # Create a more structured prompt with context
    if context:
        context_str = "\n".join([f"Previous message: {msg}" for msg in context])
        prompt = f"{context_str}\nMessage: {input_message}\nGenerate {friend_name}'s response:"
    else:
        prompt = f"Message: {input_message}\nGenerate {friend_name}'s response:"

    # Tokenize input with explicit attention mask
    encoded_input = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=max_length
    )

    # Make sure attention mask is properly set
    if 'attention_mask' not in encoded_input:
        # Create attention mask manually
        input_ids = encoded_input['input_ids']
        attention_mask = (input_ids != tokenizer.pad_token_id).long() if tokenizer.pad_token_id is not None else torch.ones_like(input_ids)
        encoded_input['attention_mask'] = attention_mask

    # Move everything to the correct device
    encoded_input = {k: v.to(model.device) for k, v in encoded_input.items()}

    # Generate response with more control parameters
    with torch.no_grad():
        output = model.generate(
            encoded_input['input_ids'],
            attention_mask=encoded_input['attention_mask'],
            max_length=max_length,
            max_new_tokens=max_length,
            min_length=5,  # Avoid extremely short responses
            temperature=temperature,  # Control randomness
            top_p=0.92,  # Slightly increased for more diversity
            top_k=50,  # Limit vocabulary to top 50 choices at each step
            do_sample=True,
            no_repeat_ngram_size=3,  # Avoid repeating the same phrases
            num_return_sequences=1,  # Generate one sequence
            pad_token_id=tokenizer.eos_token_id  # Proper padding
        )

    # Decode the response
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract the response part with improved handling
    if "Generate" in generated_text and "response:" in generated_text:
        response = generated_text.split("response:")[-1].strip()
    elif input_message in generated_text:
        response = generated_text.split(input_message)[-1].strip()
    else:
        # If both methods fail, try to find a sensible section
        response = generated_text.strip()
        # Remove the prompt part if it's included
        prompt_parts = [f"Generate {friend_name}'s response", input_message]
        for part in prompt_parts:
            if part in response:
                response = response.split(part)[-1].strip()

    return response

# Improved interface with context tracking
def test_model():
    print("=== WhatsApp Friend Simulator (Enhanced Version) ===")
    print("Enter a friend's name or type 'quit' to exit")
    print("Type 'temp' followed by a number (0.1-1.5) to adjust creativity")

    context = []  # Keep track of conversation history
    current_friend = None

    while True:
        if current_friend is None:
            friend_name = input("\nFriend name: ")
            if friend_name.lower() == 'quit':
                break
            current_friend = friend_name

        message = input("Your message (or 'switch' to change friend, 'clear' for new convo): ")

        if message.lower() == 'quit':
            break
        elif message.lower() == 'switch':
            current_friend = None
            continue
        elif message.lower() == 'clear':
            context = []
            print("Conversation history cleared.")
            continue
        elif message.lower().startswith('temp '):
            try:
                new_temp = float(message.split(' ')[1])
                if 0.1 <= new_temp <= 1.5:
                    temperature = new_temp
                    print(f"Temperature set to {temperature} (higher = more creative, lower = more consistent)")
                else:
                    print("Temperature must be between 0.1 and 1.5")
            except:
                print("Invalid temperature format. Use 'temp 0.9' for example.")
            continue

        response = generate_response(
            message,
            current_friend,
            max_length=100,  # Longer responses
            context=context[-3:] if context else None,  # Use last 3 messages
        )

        print(f"\n{current_friend}: {response}")

        # Update context
        context.append(message)
        context.append(response)

        # Keep context manageable
        if len(context) > 10:
            context = context[-10:]

In [None]:
test_model()