In [None]:
!pip install bitsandbytes accelerate
!pip install transformers peft datasets pypdf2

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import Trainer
from datasets import Dataset
import PyPDF2
import re

# ========== STEP 1: Extract Text from PDF ==========
def extract_text_from_pdf(pdf_path):
    """Extract all text from PDF file"""
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
    return text

# ========== STEP 2: Create Conversational Dataset ==========
def create_qa_dataset(text, chunk_size=500):
    """
    Creates synthetic Q&A pairs from text chunks with proper special tokens
    """
    # Split into chunks
    chunks = []
    sentences = re.split(r'[.!?]+', text)
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) < chunk_size:
            current_chunk += sentence + ". "
        else:
            if current_chunk.strip():
                chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "

    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    # Create Q&A pairs with conversational questions
    conversations = []
    question_templates = [
        "Can you explain {topic}?",
        "What do you know about {topic}?",
        "Tell me more about {topic}",
        "I'm curious about {topic}",
        "Help me understand {topic}",
        "What's your take on {topic}?",
        "{topic} - can you elaborate?",
        "I'd like to learn about {topic}",
        "Could you discuss {topic}?",
        "What are your thoughts on {topic}?"
    ]

    for i, chunk in enumerate(chunks):
        # Extract a topic/key phrase from the chunk (first few words)
        words = chunk.split()[:5]
        topic = ' '.join(words).rstrip('.,!?')

        template = question_templates[i % len(question_templates)]
        question = template.format(topic=topic)

        # Properly formatted conversation with EOS tokens
        conversation = f"<|system|>\nYou are a helpful assistant.</s>\n<|user|>\n{question}</s>\n<|assistant|>\n{chunk}</s>"
        conversations.append({"text": conversation})

    return Dataset.from_list(conversations)

# ========== STEP 3: Setup Model and Training ==========
def setup_model_and_train(dataset, output_dir="./tinyllama-finetuned"):
    """Setup and train the model with LoRA"""

    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

    # Quantization config for memory efficiency
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )

    # Load model and tokenizer
    print("Loading model and tokenizer...")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Prepare model for training
    model = prepare_model_for_kbit_training(model)

    # LoRA configuration
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
    )

    model = get_peft_model(model, peft_config)

    # Tokenize the dataset
    print("Tokenizing dataset...")
    def tokenize_function(examples):
        tokenized = tokenizer(
            examples["text"],
            truncation=True,
            padding="max_length",  # Changed to max_length for consistent size
            max_length=512,
            return_tensors=None
        )
        tokenized["labels"] = tokenized["input_ids"].copy()
        return tokenized

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names
    )

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        fp16=True,
        save_steps=100,
        logging_steps=10,
        save_total_limit=2,
        warmup_steps=50,
        optim="paged_adamw_8bit",
        remove_unused_columns=False,
        report_to="none",  # Disable W&B and other logging
    )

    # Use standard Trainer (not SFTTrainer)
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    # Train
    print("Starting training...")
    trainer.train()

    # Save the final model
    print(f"Saving model to {output_dir}")
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    return model, tokenizer

# ========== STEP 4: Load and Chat with Fine-tuned Model ==========
def load_finetuned_model(model_path="./tinyllama-finetuned"):
    """Load the fine-tuned model for inference"""
    from peft import PeftModel

    base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

    print("Loading fine-tuned model...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",
        torch_dtype=torch.float16
    )

    # Load LoRA weights
    model = PeftModel.from_pretrained(base_model, model_path)
    model = model.merge_and_unload()  # Merge LoRA weights

    return model, tokenizer

def chat_with_model(model, tokenizer, user_message):
    """Generate a response to user message with proper token handling"""
    prompt = f"<|system|>\nYou are a helpful assistant.</s>\n<|user|>\n{user_message}</s>\n<|assistant|>\n"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id  # Stop at EOS
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    # Extract only the assistant's response (between last <|assistant|> and </s>)
    if "<|assistant|>" in response:
        response = response.split("<|assistant|>")[-1].strip()
        if "</s>" in response:
            response = response.split("</s>")[0].strip()

    return response



In [None]:
# ========== MAIN EXECUTION ==========
if __name__ == "__main__":

    # === TRAINING PHASE ===
    book = "depresion"  # Fixed typo: "anxity" -> "anxiety"
    PDF_PATH = f"{book}.pdf"  # Change this to your PDF path
    OUTPUT_DIR = f"./tinyllama-{book}-chat"

    print("Step 1: Extracting text from PDF...")
    book_text = extract_text_from_pdf(PDF_PATH)
    print(f"Extracted {len(book_text)} characters")

    print("\nStep 2: Creating dataset...")
    dataset = create_qa_dataset(book_text)
    print(f"Created {len(dataset)} training examples")

    print("\nStep 3: Fine-tuning model...")
    model, tokenizer = setup_model_and_train(dataset, OUTPUT_DIR)

    print("\n" + "="*50)
    print("Training complete! Model saved to:", OUTPUT_DIR)
    print("="*50)

    # === INFERENCE PHASE ===
    print("\n\nLoading fine-tuned model for chat...")
    model, tokenizer = load_finetuned_model(OUTPUT_DIR)

    # Interactive chat loop
    print("\n=== Chat with your book! (type 'quit' to exit) ===\n")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['quit', 'exit', 'q']:
            break

        response = chat_with_model(model, tokenizer, user_input)
        print(f"\nAssistant: {response}\n")


In [None]:
import shutil
shutil.make_archive(f"tinyllama-{book}-chat", 'zip', f"./tinyllama-{book}-chat")

In [None]:
model, tokenizer = load_finetuned_model(OUTPUT_DIR)

    # Interactive chat loop
print("\n=== Chat with your book! (type 'quit' to exit) ===\n")
while True:
        user_input = input("You: ")
        if user_input.lower() in ['quit', 'exit', 'q']:
            break

        response = chat_with_model(model, tokenizer, user_input)
        print(f"\nAssistant: {response}\n")