<a href="https://colab.research.google.com/github/dhrumilpatel28/TravelPlanner/blob/main/final_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
import os
os.environ["TRANSFORMERS_OFFLINE"] = "1"
os.environ["HF_DATASETS_OFFLINE"] = "1"
os.environ["HF_HUB_OFFLINE"] = "1"


In [42]:
tokenizer = T5Tokenizer.from_pretrained("./t5-small")
model = T5ForConditionalGeneration.from_pretrained("./t5-small")


In [43]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
T5Tokenizer.from_pretrained("t5-small").save_pretrained("./t5-small")
T5ForConditionalGeneration.from_pretrained("t5-small").save_pretrained("./t5-small")


In [44]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Download model + tokenizer and save locally
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

tokenizer.save_pretrained("./t5-small")
model.save_pretrained("./t5-small")


In [46]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import pandas as pd
import torch

# Load and clean dataset
df = pd.read_csv("/content/synthetic_travel_training_data.csv")  # Must have 'query' and 'plan' columns
df = df[["query", "plan"]].dropna()

# Convert to HuggingFace Dataset
dataset = Dataset.from_pandas(df)

# Train-test split
dataset = dataset.train_test_split(test_size=0.1)
train_ds = dataset["train"]
eval_ds = dataset["test"]

# Load model/tokenizer from local folder (no login needed)
tokenizer = T5Tokenizer.from_pretrained("./t5-small")
model = T5ForConditionalGeneration.from_pretrained("./t5-small")

# Check for GPU and move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Preprocessing function
def preprocess(example):
    inputs = tokenizer(example["query"], padding="max_length", truncation=True, max_length=128)
    targets = tokenizer(example["plan"], padding="max_length", truncation=True, max_length=256)
    inputs["labels"] = targets["input_ids"]
    return inputs

# Apply preprocessing
train_ds = train_ds.map(preprocess, batched=True)
eval_ds = eval_ds.map(preprocess, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./t5_travel_model",
    eval_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    push_to_hub=False,  # CRUCIAL: must stay False
    report_to=[],         # Disable default reporting (e.g., wandb)
    # Add this line to specify the device
    ddp_find_unused_parameters=False if device.type == "cuda" else None, # This is to avoid an error when using DataParallel with unused parameters
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
)

# Train the model
trainer.train()

# Save the trained model
model.save_pretrained("t5_travel_model")
tokenizer.save_pretrained("t5_travel_model")

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,0.3659,0.294132
2,0.3038,0.274755
3,0.3212,0.270819


('t5_travel_model/tokenizer_config.json',
 't5_travel_model/special_tokens_map.json',
 't5_travel_model/spiece.model',
 't5_travel_model/added_tokens.json')

In [50]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model = T5ForConditionalGeneration.from_pretrained("t5_travel_model")
tokenizer = T5Tokenizer.from_pretrained("t5_travel_model")


In [65]:
from transformers import pipeline

# Load the pipeline
planner = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Example query
query = "Plan a 4-day trip from Surat to Mumbai for 3 person(s) with a budget of ₹24000 starting 2025-09-01."

# Generate output
result = planner(query, max_new_tokens=512)[0]["generated_text"]

print(" Suggested Travel Plan:\n", result)


Device set to use cpu


🔮 Suggested Travel Plan:
 Day 1: Stay at Travelers' Choice Deluxe. Visit Central Botanical Garden, Central Botanical Garden, Central Botanical Garden. Lunch at Tandoori Tadka Cafe. Dinner at Tandoori Tadka Cafe. Day 2: Stay at Travelers' Choice Deluxe. Visit Central Botanical Garden, Central Botanical Garden, Central Botanical Garden. Lunch at Tandoori Tadka Cafe. Dinner at Tandoori Tadka Cafe. Day 4: Stay at Travelers' Choice Deluxe. Visit Central Botanical Garden, Central Botanical Garden, Central Botanical Garden. Lunch at Taste Buds Point. Dinner at Tandoori Tadka Cafe. Day 5: Stay at Travelers' Choice Deluxe. Visit Central Botanical Garden, Central Botanical Garden, Central Botanical Garden. Lunch at Tandoori Tadka Cafe. Dinner at Tandoori Tadka Cafe.


In [94]:
from transformers import pipeline
import pandas as pd
import re
import unicodedata

#  Load the model and tokenizer
planner = pipeline("text2text-generation", model="t5_travel_model", tokenizer="t5_travel_model")

#  Load real-world datasets
attractions_df = pd.read_csv("/content/Top Indian Places to Visit.csv")
restaurants_df = pd.read_csv("/content/restaurant-real-world-swiggy.csv.zip")
hotels_df = pd.read_csv("/content/hotel-real-world-travel_sample.csv.zip")

#  Normalize function to clean city names
def normalize(s):
    if pd.isnull(s):
        return ""
    s = str(s)
    s = unicodedata.normalize("NFKD", s)
    s = s.encode("ascii", "ignore").decode("utf-8")
    return s.strip().lower()

#  Ask user for a travel query
query = input(" Enter your travel query:\n ")

#  Generate travel plan using the model
raw_plan = planner(query, max_new_tokens=512)[0]["generated_text"]

#  Extract destination city from the query
match = re.search(r'from\s+[\w\s]+\s+to\s+([\w\s]+?)\s+(for|with|starting)', query, re.IGNORECASE)
destination_city = match.group(1).strip() if match else "Delhi"
normalized_city = normalize(destination_city)

#  Filter Attractions
attractions = attractions_df[attractions_df["City"].apply(normalize) == normalized_city].head(5)

#  Filter Hotels
hotels = hotels_df[hotels_df["city"].apply(normalize) == normalized_city].head(3)

#  Filter Restaurants (use .str.contains for fuzzy match)
restaurants = restaurants_df[restaurants_df["city"].apply(normalize).str.contains(normalized_city, na=False)].head(5)

#  Debug info
print(f"\n Normalized destination city: {normalized_city}")
print(f" Matching Rows — Attractions: {len(attractions)}, Restaurants: {len(restaurants)}, Hotels: {len(hotels)}")

#  Display the full travel plan
print(f"\n Final Travel Plan for {destination_city}")
print("\n Model-Generated Plan:")
print(raw_plan)

#  Show Attractions
if not attractions.empty:
    print("\n Top Attractions:")
    for _, row in attractions.iterrows():
        print(f" - {row['Name']} ({row['City']}, {row['State']})")
else:
    print("\n No attractions found for this city.")

#  Show Restaurants
if not restaurants.empty:
    print("\n Recommended Restaurants:")
    for _, row in restaurants.iterrows():
        print(f" - {row['name']} | Cuisine: {row['cuisine']} | Rating: {row['rating']}")
else:
    print("\n No restaurants found for this city.")

#  Show Hotels
if not hotels.empty:
    print("\n Hotel Suggestions:")
    for _, row in hotels.iterrows():
        print(f" - {row['property_name']} | Rating: {row['hotel_star_rating']} | Area: {row['area']} | City: {row['city']}")
else:
    print("\n No hotels found for this city.")


Device set to use cpu


 Enter your travel query:
 plan a trip from mumbai to ahmedabad for 4 days in 10000Rs

 Normalized destination city: ahmedabad
 Matching Rows — Attractions: 5, Restaurants: 5, Hotels: 3

 Final Travel Plan for ahmedabad

 Model-Generated Plan:
Day 1: Stay at Travelers' Choice Deluxe. Visit Royal Botanical Garden, Royal Botanical Garden, Royal Botanical Garden. Lunch at Masala Magic Point. Dinner at Masala Magic Point. Day 2: Stay at Travelers' Choice Deluxe. Visit Royal Botanical Garden, Royal Botanical Garden, Royal Botanical Garden. Lunch at Masala Magic Point. Dinner at Masala Magic Point. Day 3: Stay at Travelers' Choice Deluxe. Visit Royal Botanical Garden, Royal Botanical Garden, Royal Botanical Garden. Lunch at Masala Magic Point. Dinner at Masala Magic Point. Day 4: Stay at Travelers' Choice Deluxe. Visit Royal Botanical Garden, Royal Botanical Garden, Royal Botanical Garden. Lunch at Masala Magic Point. Dinner at Masala Magic Point.

 Top Attractions:
 - Sabarmati Ashram (Ahme