In [10]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
import joblib

# -----------------------------
# Load dataset
# -----------------------------
df = pd.read_csv("travel_recommendation_dataset_v3.csv")

# Encode categorical columns
le_region = LabelEncoder()
df["Region_encoded"] = le_region.fit_transform(df["Region"])

le_type = LabelEncoder()
df["Type_encoded"] = le_type.fit_transform(df["Destination_Type"])

# -----------------------------
# Prepare training data
# -----------------------------
X = df[["Region_encoded", "Total_Days", "Total_Persons", "Budget_Per_Person"]]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)   # fit on dataframe

# Train NearestNeighbors model (global base model)
knn = NearestNeighbors(n_neighbors=5, metric="euclidean")
knn.fit(X_scaled)

# -----------------------------
# Recommendation function
# -----------------------------
def recommend_destinations(region, days, persons, budget, dest_type, top_k=5):
    # Filter dataset by chosen destination type
    df_filtered = df[df["Destination_Type"] == dest_type].copy()
    
    if df_filtered.empty:
        return f"No destinations found for type: {dest_type}"
    
    # Encode region
    region_encoded = le_region.transform([region])[0]
    
    # Filtered features
    X_filt = df_filtered[["Region_encoded", "Total_Days", "Total_Persons", "Budget_Per_Person"]]
    X_filt_scaled = scaler.transform(X_filt)  # keep feature names consistent
    
    # Fit temporary KNN on filtered subset
    knn_temp = NearestNeighbors(n_neighbors=min(top_k, len(df_filtered)), metric="euclidean")
    knn_temp.fit(X_filt_scaled)
    
    # Encode + scale user input (convert to DataFrame with same column names!)
    user_input = pd.DataFrame(
        [[region_encoded, days, persons, budget]],
        columns=["Region_encoded", "Total_Days", "Total_Persons", "Budget_Per_Person"]
    )
    user_scaled = scaler.transform(user_input)
    
    # Nearest neighbors
    distances, indices = knn_temp.kneighbors(user_scaled, n_neighbors=min(top_k, len(df_filtered)))
    
    recommendations = df_filtered.iloc[indices[0]][
        ["Destination_Name", "Destination_Type", "Region", "Total_Days", "Total_Persons", "Budget_Per_Person"]
    ]
    
    return recommendations.reset_index(drop=True)

# -----------------------------
# Example usage
# -----------------------------
user_recs = recommend_destinations(
    region="East", 
    days=7, 
    persons=4, 
    budget=30000, 
    dest_type="Hill Station", 
    top_k=5
)
print(user_recs)

# -----------------------------
# Save all required objects
# -----------------------------
label_encoders = {
    "Region": le_region, 
    "Destination_Type": le_type
}
joblib.dump(label_encoders, "label_encoders.pkl")

joblib.dump(scaler, "scaler.pkl")
joblib.dump(knn, "knn_model.pkl")   # base trained NN model
joblib.dump(df, "travel_dataset.pkl")


  Destination_Name Destination_Type Region  Total_Days  Total_Persons  \
0       Darjeeling     Hill Station   East           7              5   
1          Kolkata     Hill Station   East           7              5   
2           Konark     Hill Station   East           8              3   
3       Darjeeling     Hill Station   East           6              2   
4         Shillong     Hill Station   East           6              6   

   Budget_Per_Person  
0              28440  
1              23203  
2              37073  
3              33335  
4              25031  


['travel_dataset.pkl']

['travel_dataset.pkl']