In [140]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
import warnings
warnings.filterwarnings('ignore')
from joblib import dump, load


In [141]:
# Load dataset
data = pd.DataFrame({
    'Trek': ['Everest Base Camp Trek', 'Ama Yangri Trek', 'Mardi Himal Trek', 'Panch Pokhari Trek',
             'Annapurna Base Camp Trek', 'Manaslu Circuit Trek', 'Upper Mustang Trek', 'Shey Phoksundo Trek',
             'Langtang Valley Trek', 'Gosaikunda Trek', 'Peaky Peak Trek', 'Rara Lake Trek',
             'Kanchenjunga Trek', 'Upper Dolpo Trek', 'Kori - Kapuche Trek'],
    'Cost': [40000, 2500, 6000, 5000, 25000, 28000, 30000, 12000, 10000, 8000, 12000, 15000, 50000, 50000, 8000],
    'Days': [16, 3, 4, 5, 14, 14, 12, 7, 7, 3, 7, 8, 22, 20, 4],
    'Trip Grade': ['Difficult', 'Easy', 'Moderate', 'Moderate', 'Moderate', 'Difficult', 'Easy', 'Moderate',
                   'Easy', 'Moderate', 'Easy', 'Moderate', 'Difficult', 'Difficult', 'Moderate'],
    'Max Altitude': [5545, 3949, 4500, 4100, 4210, 5416, 4210, 3660, 5050, 4380, 4320, 2990, 5120, 5350, 3800],
    'Best Travel Month': ['March - May & Sept - Dec', 'Jan-Feb & March - May', 'March - May & Sept - Dec',
                          'March - May & Sept - Nov', 'March - May & Sept - Dec', 'March - May & Sept - Dec',
                          'March - May & Sept - Dec', 'March - May & Sept - Dec', 'March - May & Sept - Dec',
                          'March - May & Sept - Dec', 'March-May & Sept-Dec', 'March - May & Sept - Dec',
                          'March - May & Sept - Dec', 'March - May & Sept - Nov', 'March - May & Sept - Nov']
})

In [142]:
# Clean 'Cost' and 'Days' columns
data['Cost'] = data['Cost'].astype(float)
data['Days'] = data['Days'].astype(float)



In [143]:
# Normalize 'Cost' and 'Days' columns
scaler = MinMaxScaler()
data[['Cost', 'Days']] = scaler.fit_transform(data[['Cost', 'Days']])


In [144]:
# One-hot encode 'Best Travel Month'
month_encoder = OneHotEncoder(sparse=False)
months_encoded = month_encoder.fit_transform(data[['Best Travel Month']])
months_df = pd.DataFrame(months_encoded, columns=month_encoder.get_feature_names_out(['Best Travel Month']))
data = pd.concat([data, months_df], axis=1)

In [145]:
# Select features for similarity calculation
features = ['Cost', 'Days'] + list(month_encoder.get_feature_names_out(['Best Travel Month']))



In [146]:
# Check DataFrame columns
print("Number of columns in DataFrame after one-hot encoding:", len(data.columns))
print("Columns in DataFrame:", data.columns)

# Check input features
print("Number of input features:", len(features))
print("Input features:", features)

# Check data dimensions
print("Shape of DataFrame:", data.shape)


Number of columns in DataFrame after one-hot encoding: 10
Columns in DataFrame: Index(['Trek', 'Cost', 'Days', 'Trip Grade', 'Max Altitude',
       'Best Travel Month', 'Best Travel Month_Jan-Feb & March - May',
       'Best Travel Month_March - May & Sept - Dec',
       'Best Travel Month_March - May & Sept - Nov',
       'Best Travel Month_March-May & Sept-Dec'],
      dtype='object')
Number of input features: 6
Input features: ['Cost', 'Days', 'Best Travel Month_Jan-Feb & March - May', 'Best Travel Month_March - May & Sept - Dec', 'Best Travel Month_March - May & Sept - Nov', 'Best Travel Month_March-May & Sept-Dec']
Shape of DataFrame: (15, 10)


In [147]:
# Check for missing values
print(data.isnull().sum())

# Handle missing values
data.dropna(inplace=True)

# Check data range
print(data.describe())

Trek                                          0
Cost                                          0
Days                                          0
Trip Grade                                    0
Max Altitude                                  0
Best Travel Month                             0
Best Travel Month_Jan-Feb & March - May       0
Best Travel Month_March - May & Sept - Dec    0
Best Travel Month_March - May & Sept - Nov    0
Best Travel Month_March-May & Sept-Dec        0
dtype: int64
            Cost       Days  Max Altitude  \
count  15.000000  15.000000     15.000000   
mean    0.370526   0.354386   4440.000000   
std     0.340260   0.327156    729.029786   
min     0.000000   0.000000   2990.000000   
25%     0.115789   0.078947   4024.500000   
50%     0.200000   0.210526   4320.000000   
75%     0.557895   0.578947   5085.000000   
max     1.000000   1.000000   5545.000000   

       Best Travel Month_Jan-Feb & March - May  \
count                                15.000000   
m

In [165]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(data[features])

In [166]:
def get_recommendations(input_price, input_days, input_month, data, similarity_matrix, scaler, top_n=5):
    # Create input features DataFrame with only the relevant columns
    input_features = pd.DataFrame({
        'Cost': [input_price] * len(data),  # Repeat input_price for all data points
        'Days': [input_days] * len(data),  # Repeat input_days for all data points
        'Best Travel Month_' + input_month: [1] * len(data)  # Encode the selected month as 1 for all data points
    })
    
    # Normalize input features
    input_features[['Cost', 'Days']] = scaler.transform(input_features[['Cost', 'Days']])
    
    # Compute similarities
    similarities = similarity_matrix.dot(input_features.values.T).flatten()
    
    # Get indices of top recommendations
    recommended_indices = similarities.argsort()[-top_n:][::-1]
    
    # Get recommended treks
    recommendations = data.iloc[recommended_indices].copy()
    
    return recommendations

In [167]:
# Save the recommendation function
dump(get_recommendations, 'recommendation_model.joblib')

# Load the recommendation function
loaded_model = load('recommendation_model.joblib')

In [173]:
# Sample input values
input_price = 0.5  # Normalized cost value
input_days = 0.3  # Normalized days value
input_month = 'Jan-Feb & March - May'  # Selected month
