In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import json
import joblib

# Load the datasets
travel_df = pd.read_csv('/content/drive/MyDrive/Updated_Travel.csv')
distance_df = pd.read_csv('/content/drive/MyDrive/Distance.csv')

# Strip whitespace from column names
travel_df.columns = travel_df.columns.str.strip()
distance_df.columns = distance_df.columns.str.strip()


In [3]:
# Define features and target column names
features = ['Destination Type', 'Destination', 'Month', 'Climate', 'Activities',
            'Travel Companions', 'Special Requirement', 'Duration']
target = 'Plan'

In [4]:
# One-hot encode categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X = encoder.fit_transform(travel_df[features])
y = travel_df[target]

# Train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Save the model
joblib.dump(model, 'travel_model.joblib')


Accuracy: 0.9613095238095238


['travel_model.joblib']

In [5]:
def get_travel_time(from_loc, to_loc):
    # Check travel time in both directions
    mask_a_to_b = (distance_df['From Destination'] == from_loc) & (distance_df['To Destination'] == to_loc)
    mask_b_to_a = (distance_df['From Destination'] == to_loc) & (distance_df['To Destination'] == from_loc)

    if not distance_df.loc[mask_a_to_b].empty:
        return distance_df.loc[mask_a_to_b, 'Travel Time (Hours)'].iat[0]
    elif not distance_df.loc[mask_b_to_a].empty:
        return distance_df.loc[mask_b_to_a, 'Travel Time (Hours)'].iat[0]

    return None


In [12]:
def predict_trip_plan(user_input):
    required_duration = user_input.get('Duration', 1)
    destination_types = [dt.strip() for dt in user_input.get('Destination Type', '').split(',')]
    combined_plan = []
    used_activities_by_destination = {dest: set() for dest in travel_df['Destination'].unique()}

    # Filter destinations based on user-specified destination types if provided
    filtered_dfs = [travel_df[travel_df['Destination Type'].str.strip() == dt] for dt in destination_types]
    filtered_df = pd.concat(filtered_dfs).drop_duplicates()
    unique_destinations = filtered_df['Destination'].unique()

    if len(unique_destinations) < required_duration:
        additional_destinations = [d for d in travel_df['Destination'].unique() if d not in unique_destinations]
        unique_destinations = list(unique_destinations) + additional_destinations[:required_duration - len(unique_destinations)]

    current_location = None

    for day in range(required_duration):
        found_valid_plan = False

        for retry in range(len(unique_destinations)):
            destination_index = (day + retry) % len(unique_destinations)
            destination = unique_destinations[destination_index]

            # Check travel time constraint if there's a current location
            if current_location:
                travel_time_hours_forward = get_travel_time(current_location, destination)
                if travel_time_hours_forward is None or travel_time_hours_forward > 4:
                    continue  # Skip this destination if no valid route or travel time exceeds 4 hours

            # Ensure consecutive day logic is respected when repeating locations
            if len(combined_plan) > 0 and combined_plan[-1][f'Day {len(combined_plan)}']['Location'] != destination and any(plan[f'Day {i+1}']['Location'] == destination for i, plan in enumerate(combined_plan)):
                continue  # Skip non-consecutive repeats

            user_data = [user_input.get(feature, '') if feature != 'Destination' else destination for feature in features[:-1]]
            user_data.append(1)  # Predict one day at a time

            # Transform the user input using the encoder
            user_encoded = encoder.transform([user_data])

            # Predict the plan using the trained model
            predicted_plan = model.predict(user_encoded)[0]

            # Find matching entries to get details about activities and location
            mask = (travel_df['Plan'] == predicted_plan) & (travel_df['Destination'] == destination)

            if not travel_df.loc[mask].empty:
                location = destination

                # Get unique activities and remove already used ones for this destination
                all_activities_set = set(travel_df.loc[travel_df['Destination'] == location, 'Activities'].values[0].split(', '))
                available_activities_set = all_activities_set - used_activities_by_destination[location]

                if not available_activities_set:
                    continue  # If no new activities are available, try another location

                selected_activities_list = list(available_activities_set)[:3]  # Select up to three new activities

                combined_plan.append({
                    f'Day {day+1}': {
                        'Location': location,
                        'Activities': selected_activities_list
                    }
                })

                used_activities_by_destination[location].update(selected_activities_list)
                found_valid_plan = True
                current_location = location  # Update current location after successful plan suggestion

                break

        if not found_valid_plan and combined_plan:
            last_location_info_dict = combined_plan[-1]
            last_location_name_str = last_location_info_dict[f'Day {len(combined_plan)}']['Location']
            all_activities_for_last_location_set = set(travel_df.loc[travel_df['Destination'] == last_location_name_str, 'Activities'].values[0].split(', '))

            new_activities_for_last_location_set = all_activities_for_last_location_set - used_activities_by_destination[last_location_name_str]

            if not new_activities_for_last_location_set:
                new_activities_for_last_location_set = all_activities_for_last_location_set[:3]  # Repeat some activities

            combined_plan.append({
                f'Day {day+1}': {
                    'Location': last_location_name_str,
                    'Activities': list(new_activities_for_last_location_set)[:3]
                }
            })

    return json.dumps(combined_plan, indent=4)


In [13]:
# Example user input for testing
example_input_dict = {
    'Special Requirement': 'Private villa, surfing lessons',
    'Destination Type': 'Upcountry,Beachside',
    'Duration': 4
}

# Predict the trip plan considering constraints
predicted_plan_with_constraints_json_str = predict_trip_plan(example_input_dict)
print(f"Predicted Trip Plan:\n{predicted_plan_with_constraints_json_str}")



Predicted Trip Plan:
[
    {
        "Day 1": {
            "Location": "Nuwara Eliya",
            "Activities": [
                "Golfing",
                "Botanical Exploration",
                "Hiking"
            ]
        }
    },
    {
        "Day 2": {
            "Location": "Ella",
            "Activities": [
                "Flying Ravana Mega zipline",
                "Visit little Adam's Peak",
                "Hiking Ella rock"
            ]
        }
    },
    {
        "Day 3": {
            "Location": "Mirissa",
            "Activities": [
                "Temple Visit",
                "Visit coconut tree hill",
                "Surfing"
            ]
        }
    },
    {
        "Day 4": {
            "Location": "Galle",
            "Activities": [
                "Explore Galle fort",
                "Surfing",
                "Whale watching"
            ]
        }
    }
]


