<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/finetune_deepseek_tourism.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- 1. Set Up Your Environment ---
!pip install scikit-learn -q # For potential evaluation metrics (optional)
!pip install -U transformers -q
!pip install -U datasets -q
!pip install -U accelerate -q
!pip install -U peft -q
!pip install -U trl -q # For SFTTrainer
!pip install -U bitsandbytes -q
!pip install unsloth -q # Recommended for speed and efficiency
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # For latest Unsloth

In [1]:
import torch
import io
import pandas as pd
import json
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer
from huggingface_hub import login # Optional: for pushing model to Hub

# Optional: Login to Hugging Face Hub for pushing models and Weights & Biases for logging
# login()
# import wandb
# wandb.login()

# --- 2. Load the Model and Tokenizer ---
print("Loading DeepSeek-R1 model and tokenizer...")
max_seq_length = 2048 # Adjust if your combined input/output is longer
dtype = None # Automatically chooses bfloat16 or float16 based on GPU
load_in_4bit = True # Enable 4-bit quantization for memory efficiency

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B", # Recommended for fine-tuning
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
print("Model and tokenizer loaded.")

# --- 3. Apply LoRA Adapters ---
print("Applying LoRA adapters...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16, # Rank of the LoRA matrices (common values: 8, 16, 32, 64)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], # DeepSeek specific modules
    lora_alpha=16, # Scaling factor for LoRA weights
    lora_dropout=0, # Dropout rate for LoRA (set to 0 for inference)
    bias="none", # Or "all", "lora_only"
    use_gradient_checkpointing=True, # Recommended for memory saving
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)
print("LoRA adapters applied.")

# --- 4. Prepare the Training Dataset ---
print("Loading and preparing osunlp/TravelPlanner dataset...")

# Load the training split of the dataset, specifying the 'train' config
train_dataset = load_dataset("osunlp/TravelPlanner", name="train", split="train")
# Use the validation split for actual evaluation later, or for a small dev set, specifying the 'validation' config
eval_dataset = load_dataset("osunlp/TravelPlanner", name="validation", split="validation")
# Load the test split, specifying the 'test' config
test_dataset = load_dataset("osunlp/TravelPlanner", name="test", split="test")

# Define the formatting function for the dataset
# This is the most critical part: converting raw data into a fine-tunable format.
def format_travel_planner_example(example):
    user_query = example["query"]
    reference_info_str = example["reference_information"]

    # Attempt to parse the reference_information string into structured data
    parsed_info = []
    try:
        # osunlp/TravelPlanner's reference_information is a stringified list of dicts.
        # Sometimes it's malformed or empty.
        ref_info_list = json.loads(reference_info_str)
        if isinstance(ref_info_list, list):
            parsed_info = ref_info_list
    except json.JSONDecodeError:
        pass # If it fails to parse, leave parsed_info as empty

    # --- Programmatically generate a structured plan from parsed_info ---
    # This part requires robust logic to turn raw data into a coherent itinerary.
    # The complexity here directly impacts the quality of your fine-tuned model.
    # This is a *template* and you should expand this with more sophisticated parsing
    # and planning logic to reflect a real itinerary.

    plan_lines = ["Proposed Travel Itinerary:"]

    # Basic extraction logic for demonstration:
    flights_info = []
    hotels_info = []
    attractions_info = []
    restaurants_info = []

    for block in parsed_info:
        description = block.get("Description", "")
        content = block.get("Content", "")

        if "Flights" in description and content:
            try:
                df = pd.read_csv(io.StringIO(content))
                for _, row in df.iterrows():
                    flights_info.append(f"- Flight from {row.get('Origin', 'N/A')} to {row.get('Destination', 'N/A')} on {row.get('Departure Date', 'N/A')} at {row.get('Departure Time', 'N/A')} with {row.get('Airline', 'N/A')}")
            except Exception:
                pass
        elif "Hotels" in description and content:
            try:
                df = pd.read_csv(io.StringIO(content))
                for _, row in df.iterrows():
                    hotels_info.append(f"- Stay at {row.get('Name', 'N/A')} ({row.get('Star', 'N/A')} star) for {row.get('Price', 'N/A')} / night.")
            except Exception:
                pass
        elif "Attractions" in description and content:
            try:
                df = pd.read_csv(io.StringIO(content))
                for _, row in df.iterrows():
                    attractions_info.append(f"- Visit {row.get('Name', 'N/A')}")
            except Exception:
                pass
        elif "Restaurants" in description and content:
            try:
                df = pd.read_csv(io.StringIO(content))
                for _, row in df.iterrows():
                    restaurants_info.append(f"- Dine at {row.get('Name', 'N/A')} (Rating: {row.get('Rating', 'N/A')})")
            except Exception:
                pass

    if flights_info:
        plan_lines.append("\nFlights:")
        plan_lines.extend(flights_info)
    if hotels_info:
        plan_lines.append("\nAccommodation:")
        plan_lines.extend(hotels_info)
    if attractions_info:
        plan_lines.append("\nActivities:")
        plan_lines.extend(attractions_info)
    if restaurants_info:
        plan_lines.append("\nDining:")
        plan_lines.extend(restaurants_info)

    # If no specific plan elements were extracted, provide a generic fallback
    if not (flights_info or hotels_info or attractions_info or restaurants_info):
         plan_lines.append("Based on the provided information, here's a general plan (details are limited by available data):")
         plan_lines.append("Day 1: Explore the city center and enjoy local cuisine.")
         plan_lines.append("Day 2: Visit major landmarks and museums.")


    generated_plan_text = "\n".join(plan_lines)

    # Format into a chat template for DeepSeek-R1 fine-tuning
    messages = [
        {"role": "user", "content": f"User's travel request: {user_query}\n\nAvailable Information:\n{reference_info_str}\n\nBased on the request and available information, please generate a detailed travel itinerary including flights, accommodation, daily activities, and restaurant recommendations. Format as a day-by-day plan."},
        {"role": "assistant", "content": generated_plan_text}
    ]

    example["text"] = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_special_tokens=False
    )
    return example

# Apply the formatting function to the training and evaluation datasets
train_dataset = train_dataset.map(format_travel_planner_example, batched=False)
eval_dataset = eval_dataset.map(format_travel_planner_example, batched=False)

# Optional: Print a formatted example to check
print("\n--- Example of formatted training data ---")
print(train_dataset[0]["text"])
print("-" * 50)
print("Dataset preparation complete.")


# --- 5. Set Up and Configure the Trainer ---
print("Setting up SFTTrainer...")
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset, # Use eval_dataset for evaluation during training
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=TrainingArguments(
        per_device_train_batch_size=2, # Adjust based on GPU memory. Lower if OOM.
        gradient_accumulation_steps=4, # Increase to compensate for smaller batch size
        warmup_steps=10, # Short warmup to stabilize training
        num_train_epochs=3, # Train for a few epochs
        learning_rate=2e-4, # Standard learning rate for LoRA
        fp16=not torch.cuda.is_bf16_supported(), # Use fp16 if bfloat16 is not supported
        bf16=torch.cuda.is_bf16_supported(), # Use bfloat16 if supported (recommended)
        logging_steps=10, # Log training progress every 10 steps
        output_dir="./sft_results", # Directory to save checkpoints and logs
        optim="adamw_8bit", # Optimized AdamW for 8-bit
        seed=3407,
        save_steps=500, # Save checkpoint every X steps
        save_total_limit=2, # Keep only the last 2 checkpoints
        eval_strategy="steps", # Evaluate every 'eval_steps'
        eval_steps=500, # Evaluate every 500 steps
        load_best_model_at_end=True, # Load the best model at the end of training
        metric_for_best_model="eval_loss", # Metric to determine the best model
        greater_is_better=False, # Lower loss is better
        report_to="none", # Disable logging to Weights & Biases
    ),
)
print("SFTTrainer configured.")

# --- 6. Start Training ---
print("Starting training...")
trainer.train()
print("Training complete.")

# --- 7. Save Your Fine-tuned Model ---
output_dir = "./deepseek_r1_travel_planner_finetuned"
print(f"Saving fine-tuned model to {output_dir}...")
model.save_pretrained(output_dir, tokenizer)
print("Model saved locally.")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Loading DeepSeek-R1 model and tokenizer...
==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model and tokenizer loaded.
Applying LoRA adapters...


Unsloth 2025.6.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


LoRA adapters applied.
Loading and preparing osunlp/TravelPlanner dataset...

--- Example of formatted training data ---
<｜begin▁of▁sentence｜><｜User｜>User's travel request: Please help me plan a trip from St. Petersburg to Rockford spanning 3 days from March 16th to March 18th, 2022. The travel should be planned for a single person with a budget of $1,700.

Available Information:
[{'Description': 'Attractions in Rockford', 'Content': "                                   Name  Latitude  Longitude                                                          Address          Phone                                                                 Website     City\n       Burpee Museum of Natural History 42.277324 -89.088142                           737 N Main St, Rockford, IL 61103, USA (815) 965-3433                                                  http://www.burpee.org/ Rockford\n                  Midway Village Museum 42.280499 -88.984640                        6799 Guilford Rd, Rockford, IL 

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 45 | Num Epochs = 3 | Total steps = 18
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss


Training complete.
Saving fine-tuned model to ./deepseek_r1_travel_planner_finetuned...
Model saved locally.


In [2]:
!pip install colab-env -q
import colab_env

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for colab-env (setup.py) ... [?25l[?25hdone
Mounted at /content/gdrive


In [3]:
!rm -rf /content/gdrive/MyDrive/model/deepseek_r1_travel_planner_finetuned
!cp -pr /content/deepseek_r1_travel_planner_finetuned /content/gdrive/MyDrive/model/deepseek_r1_travel_planner_finetuned

In [5]:
# --- 8. Model Evaluation ---
print("\n" + "="*70)
print("Starting Model Evaluation")
print("="*70 + "\n")

output_dir = "/content/gdrive/MyDrive/model/deepseek_r1_travel_planner_finetuned"

# --- Before loading, ensure the directory only contains adapter files ---
# This is a defensive step to remove the base model's config.json
# if it was accidentally copied or saved there.
# Unsloth's model.save_pretrained for PEFT models should NOT save config.json,
# but if it does, or if it was copied from elsewhere, this cleans it.
!rm -f {output_dir}/config.json
!rm -f /content/deepseek_r1_travel_planner_finetuned/config.json # Also clean the source before copying

# Re-load the fine-tuned model if starting a new session or to ensure correct model loading
# This should now load only the adapter
print(f"Loading fine-tuned model from {output_dir}...")
model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=output_dir, # Load from your saved model directory
     max_seq_length=max_seq_length,
     dtype=dtype,
     load_in_4bit=load_in_4bit,
)
print("Model and tokenizer loaded from saved directory.")

# Use a TextStreamer for real-time output during generation
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

print(f"Evaluating on {min(10, len(test_dataset))} examples from the test set...\n")

import re # Import regex library here if not imported globally already

# Evaluate on a subset of the test dataset for demonstration
num_examples_to_evaluate = 5 # You can increase this for more comprehensive evaluation
for i in range(min(num_examples_to_evaluate, len(test_dataset))):
    example = test_dataset[i]
    original_query = example["query"]
    reference_info = example["reference_information"]

    # --- Construct the evaluation prompt for inference ---
    # This is the exact prompt you specified for evaluation.
    # We will try to extract relevant info for the prompt.
    # For osunlp/TravelPlanner, budget/interests are usually in the original query.
    # Example: "Plan a 7-day trip to Paris for a couple, budget-friendly, interested in museums and food"

    # Minimal attempt to extract common elements from the query
    destination = "Unknown Destination"
    days = "Unknown Days"
    budget = "Unknown Budget"
    interests = "Unknown Interests"
    travelers = "Unknown Travelers"
    specific_preference = "minimizing travel time between activities" # Default priority

    # VERY BASIC PARSING LOGIC - IMPROVE FOR ROBUSTNESS
    # --- Extract Destination ---
    destination_patterns = [
        r"trip to ([A-Za-z\s]+?)(?: for|\s*$)",
        r"visit ([A-Za-z\s]+?)(?: for|\s*$)",
        r"going to ([A-Za-z\s]+?)(?: for|\s*$)",
        r"travel to ([A-Za-z\s]+?)(?: for|\s*$)"
    ]
    for pattern in destination_patterns:
        match = re.search(pattern, original_query, re.IGNORECASE)
        if match:
            potential_dest = match.group(1).strip()
            if len(potential_dest.split()) > 0 and len(potential_dest) > 2: # Simple heuristic
                destination = potential_dest
                break
    # Fallback
    if destination == "Unknown Destination":
        match = re.search(r"plan a trip to ([A-Za-z\s]+?)(?: for|\s*$)", original_query, re.IGNORECASE)
        if match:
            potential_dest = match.group(1).strip()
            if len(potential_dest.split()) > 0 and len(potential_dest) > 2:
                destination = potential_dest

    # --- Extract Days/Duration ---
    days_match = re.search(r"(\d+)\s*days?", original_query, re.IGNORECASE)
    if days_match:
        days = f"{days_match.group(1)}-day"
    else:
        if "a week" in original_query.lower(): days = "7-day"
        elif "two weeks" in original_query.lower(): days = "14-day"
        elif "long weekend" in original_query.lower(): days = "3-4 day"

    # --- Extract Budget ---
    if "budget-friendly" in original_query.lower() or "cheap" in original_query.lower() or "economical" in original_query.lower():
        budget = "budget-friendly"
    elif "mid-range budget" in original_query.lower() or "moderate budget" in original_query.lower():
        budget = "mid-range"
    elif "luxury budget" in original_query.lower() or "high-end" in original_query.lower() or "indulgent" in original_query.lower():
        budget = "luxury"
    else:
        budget = "flexible budget"

    # --- Extract Travelers ---
    if "couple" in original_query.lower(): travelers = "a couple"
    elif "family" in original_query.lower(): travelers = "a family"
    elif "solo" in original_query.lower() or "myself" in original_query.lower(): travelers = "a solo traveler"
    elif "group of friends" in original_query.lower() or "friends" in original_query.lower(): travelers = "a group of friends"
    elif "business trip" in original_query.lower() or "colleagues" in original_query.lower(): travelers = "a business traveler"
    else: travelers = "an individual or small group"

    # --- Extract Interests ---
    common_interests = [
        "historical sites", "museums", "art", "culture", "history",
        "outdoor adventures", "hiking", "nature", "scenery", "beaches", "skiing",
        "food tours", "cuisine", "dining", "nightlife", "bars", "parties",
        "shopping", "markets", "fashion",
        "relaxation", "spa", "wellness",
        "family-friendly", "kid-friendly", "theme parks",
        "wildlife", "safari"
    ]
    found_interests = []
    for interest_keyword in common_interests:
        if interest_keyword in original_query.lower():
            found_interests.append(interest_keyword)

    if found_interests:
        if len(found_interests) > 1:
            interests = f"{', '.join(found_interests[:-1])} and {found_interests[-1]}"
        else:
            interests = found_interests[0]
    else:
        interests = "general sightseeing and exploration"

    # --- Extract Specific Preference ---
    if "prioritize" in original_query.lower():
        match = re.search(r"prioritize\s+(.+?)(?:\.|\s*$)", original_query, re.IGNORECASE)
        if match:
            specific_preference_candidate = match.group(1).strip()
            if specific_preference_candidate.lower() != "minimizing travel time between activities":
                specific_preference = specific_preference_candidate
    elif "focus on" in original_query.lower():
        match = re.search(r"focus on\s+(.+?)(?:\.|\s*$)", original_query, re.IGNORECASE)
        if match:
            specific_preference = match.group(1).strip()
    elif "main goal is" in original_query.lower():
        match = re.search(r"main goal is\s+(.+?)(?:\.|\s*$)", original_query, re.IGNORECASE)
        if match:
            specific_preference = match.group(1).strip()

    # Now, destination, days, budget, interests, travelers, specific_preference
    # are correctly updated for the *current* example in the loop.

    formatted_eval_prompt = f"""User's travel request: {original_query}

Additional user preferences:
- Budget: {budget}
- Interests: {interests}
- Travelers: {travelers}
- Specific Preference: {specific_preference}

Available Information (for planning):
{reference_info}

Based on the user's travel request and the available information, please generate a detailed {days} travel itinerary for {travelers} visiting {destination}. The itinerary should include flight suggestions, daily activities, restaurant recommendations, and accommodation options. Prioritize {specific_preference}. Format as a day-by-day plan.
"""

    # Format the prompt for the model's chat template
    messages = [
        {"role": "user", "content": formatted_eval_prompt}
    ]
    # Ensure the tokenizer is available
    if 'tokenizer' not in locals():
        # Re-load tokenizer if needed (e.g., running this cell standalone)
        print("Tokenizer not found, attempting to load from model directory...")
        from transformers import AutoTokenizer
        tokenizer = AutoTokenizer.from_pretrained(output_dir)

    # Truncate the input prompt to avoid exceeding max_seq_length
    # This is crucial before tokenization for models with strict max_seq_length
    tokenized_input = tokenizer(formatted_eval_prompt, return_tensors="pt", max_length=max_seq_length, truncation=True)
    input_ids = tokenized_input["input_ids"].to("cuda")
    attention_mask = tokenized_input["attention_mask"].to("cuda")

    # Ensure the model is available and on GPU
    if 'model' not in locals() or not isinstance(model, torch.nn.Module):
         # Re-load model if needed (e.g., running this cell standalone)
         print("Model not found or not a PyTorch module, attempting to load from directory...")
         from unsloth import FastLanguageModel
         model, tokenizer_check = FastLanguageModel.from_pretrained(
              model_name=output_dir,
              max_seq_length=max_seq_length, # Need max_seq_length defined or passed
              dtype=dtype, # Need dtype defined or passed
              load_in_4bit=load_in_4bit, # Need load_in_4bit defined or passed
         )
         if tokenizer_check != tokenizer:
             print("Warning: Re-loaded tokenizer differs.")
    model.to("cuda") # Ensure model is on GPU

    print(f"\n--- Evaluation Case {i+1} ---")
    print(f"Original Query: {original_query}")
    print(f"--- Prompt Sent to Model ---\n{formatted_eval_prompt}\n")
    print("--- Generated Itinerary ---")

    # Generate and stream the output
    # Using `max_new_tokens` to control response length
    # Adjust `temperature` and `top_p` for creativity vs. coherence
    # Ensure streamer is defined if needed
    if 'streamer' not in locals() or streamer is None:
         print("Streamer not found, creating a new one...")
         from transformers import TextStreamer
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    # Use input_ids and attention_mask directly from truncation
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=1024, # Maximum length of the generated response
        use_cache=True,
        temperature=0.7,    # Lower for more deterministic, higher for more creative
        top_p=0.95,         # Nucleus sampling
        do_sample=True,     # Enable sampling for varied responses
        pad_token_id=tokenizer.eos_token_id, # Important for generation
        streamer=streamer   # Use streamer to print tokens as they are generated
    )

    # After streaming, we can decode the full output if needed for saving/further processing
    # Decode starting from the end of the input prompt
    generated_text = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)

    print("\n" + "="*70 + "\n")

print("Evaluation complete. Review the generated itineraries above.")
print("Remember to manually assess quality, coherence, and adherence to constraints.")


Starting Model Evaluation

Loading fine-tuned model from /content/gdrive/MyDrive/model/deepseek_r1_travel_planner_finetuned...
==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model and tokenizer loaded from saved directory.
Evaluating on 10 examples from the test set...


--- Evaluation Case 1 ---
Original Query: Please plan a trip for me starting from Sarasota to Chicago for 3 days, from March 22nd to March 24th, 2022. The budget for this trip is set at $1,900.
--- Prompt Sent to Model ---
User's travel request: Please plan a trip for me starting from Sarasota to Chicago for 3 days, from M