In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/Travel details dataset.csv')

In [None]:
df.describe()

Unnamed: 0,Trip ID,Duration (days),Traveler age
count,139.0,137.0,137.0
mean,70.0,7.605839,33.175182
std,40.269923,1.601276,7.145441
min,1.0,5.0,20.0
25%,35.5,7.0,28.0
50%,70.0,7.0,31.0
75%,104.5,8.0,38.0
max,139.0,14.0,60.0


In [None]:
df.head()

Unnamed: 0,Trip ID,Destination,Start date,End date,Duration (days),Traveler name,Traveler age,Traveler gender,Traveler nationality,Accommodation type,Accommodation cost,Transportation type,Transportation cost
0,1,"London, UK",5/1/2023,5/8/2023,7.0,John Smith,35.0,Male,American,Hotel,1200,Flight,600
1,2,"Phuket, Thailand",6/15/2023,6/20/2023,5.0,Jane Doe,28.0,Female,Canadian,Resort,800,Flight,500
2,3,"Bali, Indonesia",7/1/2023,7/8/2023,7.0,David Lee,45.0,Male,Korean,Villa,1000,Flight,700
3,4,"New York, USA",8/15/2023,8/29/2023,14.0,Sarah Johnson,29.0,Female,British,Hotel,2000,Flight,1000
4,5,"Tokyo, Japan",9/10/2023,9/17/2023,7.0,Kim Nguyen,26.0,Female,Vietnamese,Airbnb,700,Train,200


In [None]:
#Checking if we have any NaN values
df.isnull().sum()

Trip ID                 0
Destination             2
Start date              2
End date                2
Duration (days)         2
Traveler name           2
Traveler age            2
Traveler gender         2
Traveler nationality    2
Accommodation type      2
Accommodation cost      2
Transportation type     3
Transportation cost     3
dtype: int64

In [None]:
df = df.dropna()

In [None]:
print(df['Transportation type'].unique())

['Flight' 'Train' 'Plane' 'Bus' 'Car rental' 'Subway' 'Car' 'Ferry'
 'Airplane']


In [None]:
df["Transportation type"].replace({"Flight":"Plane", "Airplane": "Plane"}, inplace=True)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 136 entries, 0 to 138
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Trip ID               136 non-null    int64  
 1   Destination           136 non-null    object 
 2   Start date            136 non-null    object 
 3   End date              136 non-null    object 
 4   Duration (days)       136 non-null    float64
 5   Traveler name         136 non-null    object 
 6   Traveler age          136 non-null    float64
 7   Traveler gender       136 non-null    object 
 8   Traveler nationality  136 non-null    object 
 9   Accommodation type    136 non-null    object 
 10  Accommodation cost    136 non-null    object 
 11  Transportation type   136 non-null    object 
 12  Transportation cost   136 non-null    object 
dtypes: float64(2), int64(1), object(10)
memory usage: 14.9+ KB


In [None]:
#Let's keep our cost features more consistent
df["Transportation cost"] = df["Transportation cost"].replace("[^\d]+", "", regex=True)
df["Accommodation cost"] = df["Accommodation cost"].replace("[^\d]+", "", regex=True)

#Turn them integers
df["Transportation cost"] = df["Transportation cost"].astype(int)
df["Accommodation cost"] = df["Accommodation cost"].astype(int)

In [None]:
df.head(100)

Unnamed: 0,Trip ID,Destination,Start date,End date,Duration (days),Traveler name,Traveler age,Traveler gender,Traveler nationality,Accommodation type,Accommodation cost,Transportation type,Transportation cost
0,1,"London, UK",5/1/2023,5/8/2023,7.0,John Smith,35.0,Male,American,Hotel,1200,Plane,600
1,2,"Phuket, Thailand",6/15/2023,6/20/2023,5.0,Jane Doe,28.0,Female,Canadian,Resort,800,Plane,500
2,3,"Bali, Indonesia",7/1/2023,7/8/2023,7.0,David Lee,45.0,Male,Korean,Villa,1000,Plane,700
3,4,"New York, USA",8/15/2023,8/29/2023,14.0,Sarah Johnson,29.0,Female,British,Hotel,2000,Plane,1000
4,5,"Tokyo, Japan",9/10/2023,9/17/2023,7.0,Kim Nguyen,26.0,Female,Vietnamese,Airbnb,700,Train,200
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,98,London,2/14/2023,2/19/2023,5.0,Sarah Brown,37.0,Female,UK,Hotel,600,Plane,700
98,99,New York,5/8/2023,5/14/2023,6.0,Michael Wong,50.0,Male,China,Airbnb,800,Car rental,300
99,100,Rome,8/20/2023,8/27/2023,7.0,Jessica Chen,31.0,Female,Taiwan,Hotel,700,Plane,900
100,101,Bangkok,11/12/2023,11/20/2023,8.0,Ken Tanaka,42.0,Male,Japan,Hostel,300,Train,100


In [None]:
print(df.columns)

Index(['Trip ID', 'Destination', 'Start date', 'End date', 'Duration (days)',
       'Traveler name', 'Traveler age', 'Traveler gender',
       'Traveler nationality', 'Accommodation type', 'Accommodation cost',
       'Transportation type', 'Transportation cost'],
      dtype='object')


In [None]:
# df.to_csv('preprocessed1.csv')

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

class DestinationRecommendationModel:
    def __init__(self):
        self.model = None
        self.df = None
        self.tfidf_matrix = None

    def preprocess_data(self, data):
        data['combined_features'] = data.apply(lambda x: ' '.join(x[['Traveler age', 'Traveler gender', 'Traveler nationality',
                                                                     'Accommodation type', 'Accommodation cost',
                                                                     'Transportation type', 'Transportation cost']].astype(str)), axis=1)
        return data

    def train_model(self, data):
        self.df = self.preprocess_data(data)
        tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = tfidf_vectorizer.fit_transform(self.df['combined_features'])
        self.model = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

    def save_model(self, filename):
        joblib.dump((self.model, self.df, self.tfidf_matrix), filename)

    def load_model(self, filename):
        self.model, self.df, self.tfidf_matrix = joblib.load(filename)

    def recommend_destination(self, trip_id, top_n=5):
        if self.model is None:
            raise Exception("Model has not been trained or loaded.")
        trip_index = self.df[self.df['Trip ID'] == trip_id].index[0]
        sim_scores = list(enumerate(self.model[trip_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Exclude the first item (itself)
        trip_indices = [i[0] for i in sim_scores]
        recommended_destinations = []
        for index in trip_indices:
            destination = self.df.iloc[index]
            recommended_destinations.append({
                'Destination': destination['Destination'],
                'Accommodation Cost': destination['Accommodation cost'],
                'Transportation Cost': destination['Transportation cost']
            })
        return recommended_destinations

# Example usage:
if __name__ == "__main__":
    data = df

    model = DestinationRecommendationModel()
    model.train_model(pd.DataFrame(data))

    recommended_destinations = model.recommend_destination(10)
    print("Recommended Destinations:")
    for destination in recommended_destinations:
        print(destination['Destination'], destination['Accommodation Cost'], destination['Transportation Cost'])


Recommended Destinations:
Dubai 1000 800
Auckland, New Zealand 7000 2500
Cancun, Mexico 2200 800
Hawaii 2000 800
Phuket, Thailand 800 500


In [None]:
from sklearn.model_selection import train_test_split

# Assuming you have your dataset stored in a DataFrame called 'df'
# You can load your dataset here

# Define features and target variable
X = df.drop(columns=['Destination'])  # Features
y = df['Destination']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the training and testing sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)


Training set shape: (108, 13) (108,)
Testing set shape: (28, 13) (28,)


In [None]:
!pip install ipywidgets

import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class DestinationRecommendationModel:
    def __init__(self):
        self.model = None
        self.df = None
        self.tfidf_matrix = None
        self.tfidf_vectorizer = None

    def preprocess_data(self, data):
        data['combined_features'] = data.apply(lambda x: ' '.join(x[['Traveler age', 'Traveler gender', 'Traveler nationality',
                                                                     'Accommodation type', 'Accommodation cost',
                                                                     'Transportation type', 'Transportation cost']].astype(str)), axis=1)
        return data

    def train_model(self, data):
        self.df = self.preprocess_data(data)
        self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.df['combined_features'])
        self.model = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

    def recommend_destination(self, user_preferences, top_n=5):
        if self.model is None:
            raise Exception("Model has not been trained or loaded.")

        # Extract the user preferences
        user_features = ' '.join([str(value) for value in user_preferences.values()])
        user_tfidf = self.tfidf_vectorizer.transform([user_features])

        # Compute similarity between user preferences and destinations
        user_destination_similarity = cosine_similarity(user_tfidf, self.tfidf_matrix)

        sim_scores = user_destination_similarity[0]
        sim_scores = list(enumerate(sim_scores))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:top_n]  # Consider top n destinations
        trip_indices = [i[0] for i in sim_scores]

        recommended_destinations = []
        for index in trip_indices:
            destination = self.df.iloc[index]
            recommended_destinations.append({
                'Destination': destination['Destination'],
                'Accommodation Cost': destination['Accommodation cost'],
                'Transportation Cost': destination['Transportation cost']
            })
        return recommended_destinations

# Load your data into 'df'
# Example data loading:
# df = pd.read_csv('your_data.csv')

# Initialize and train the recommendation model
model = DestinationRecommendationModel()
model.train_model(df)

# Create form elements for user preferences
age = widgets.IntSlider(description='Age:', min=0, max=100, value=30)
gender = widgets.Dropdown(options=['Male', 'Female', 'Other'], description='Gender:')
nationality = widgets.Text(description='Nationality:')
accommodation_type = widgets.Dropdown(options=['Hotel', 'Hostel', 'Airbnb', 'Other'], description='Accommodation Type:')
accommodation_cost = widgets.FloatSlider(description='Accommodation Cost:', min=0, max=1000, value=200)
transportation_type = widgets.Dropdown(options=['Flight', 'Train', 'Bus', 'Other'], description='Transportation Type:')
transportation_cost = widgets.FloatSlider(description='Transportation Cost:', min=0, max=2000, value=500)

# Create a button to trigger recommendation
button = widgets.Button(description='Get Recommendations')

# Define the function to get recommendations
def get_recommendations(b):
    user_preferences = {
        'Traveler age': age.value,
        'Traveler gender': gender.value,
        'Traveler nationality': nationality.value,
        'Accommodation type': accommodation_type.value,
        'Accommodation cost': accommodation_cost.value,
        'Transportation type': transportation_type.value,
        'Transportation cost': transportation_cost.value
    }
    recommendations = model.recommend_destination(user_preferences)
    print("Recommended Destinations based on User Preferences:")
    for destination in recommendations:
        print(destination['Destination'], destination['Accommodation Cost'], destination['Transportation Cost'])

# Attach the function to the button
button.on_click(get_recommendations)

# Display the form and button
display(age, gender, nationality, accommodation_type, accommodation_cost, transportation_type, transportation_cost, button)




IntSlider(value=30, description='Age:')

Dropdown(description='Gender:', options=('Male', 'Female', 'Other'), value='Male')

Text(value='', description='Nationality:')

Dropdown(description='Accommodation Type:', options=('Hotel', 'Hostel', 'Airbnb', 'Other'), value='Hotel')

FloatSlider(value=200.0, description='Accommodation Cost:', max=1000.0)

Dropdown(description='Transportation Type:', options=('Flight', 'Train', 'Bus', 'Other'), value='Flight')

FloatSlider(value=500.0, description='Transportation Cost:', max=2000.0)

Button(description='Get Recommendations', style=ButtonStyle())

Recommended Destinations based on User Preferences:
Japan 800 500
Bangkok, Thailand 500 50
Rome 200 350
Bangkok 400 500
Paris 900 400


In [None]:
!pip install ipywidgets

import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class DestinationRecommendationModel:
    def __init__(self):
        self.model = None
        self.df = None
        self.tfidf_matrix = None
        self.tfidf_vectorizer = None

    def preprocess_data(self, data):
        data['combined_features'] = data.apply(lambda x: ' '.join(x[['Traveler age', 'Traveler gender', 'Traveler nationality',
                                                                     'Accommodation type', 'Accommodation cost',
                                                                     'Transportation type', 'Transportation cost']].astype(str)), axis=1)
        return data

    def train_model(self, data):
        self.df = self.preprocess_data(data)
        self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.df['combined_features'])
        self.model = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

    def recommend_destination(self, user_preferences, top_n=5):
        if self.model is None:
            raise Exception("Model has not been trained or loaded.")

        # Extract the user preferences
        user_features = ' '.join([str(value) for value in user_preferences.values()])
        user_tfidf = self.tfidf_vectorizer.transform([user_features])

        # Compute similarity between user preferences and destinations
        user_destination_similarity = cosine_similarity(user_tfidf, self.tfidf_matrix)

        sim_scores = user_destination_similarity[0]
        sim_scores = list(enumerate(sim_scores))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:top_n]  # Consider top n destinations
        trip_indices = [i[0] for i in sim_scores]

        recommended_destinations = []
        for index in trip_indices:
            destination = self.df.iloc[index]
            # Convert costs to float for comparison
            accommodation_cost = float(destination['Accommodation cost'])
            transportation_cost = float(destination['Transportation cost'])
            # Check if the accommodation and transportation costs are within the user's budget
            if accommodation_cost <= user_preferences['Accommodation cost'] and \
               transportation_cost <= user_preferences['Transportation cost']:
                recommended_destinations.append({
                    'Destination': destination['Destination'],
                    'Accommodation Cost': accommodation_cost,
                    'Transportation Cost': transportation_cost
                })
                # Break if we have found enough recommendations
                if len(recommended_destinations) == top_n:
                    break
        return recommended_destinations


# Initialize and train the recommendation model
model = DestinationRecommendationModel()
model.train_model(df)

# Create form elements for user preferences
age = widgets.IntSlider(description='Age:', min=0, max=100, value=30)
gender = widgets.Dropdown(options=['Male', 'Female', 'Other'], description='Gender:')
nationality = widgets.Text(description='Nationality:')
accommodation_type = widgets.Dropdown(options=['Hotel', 'Hostel', 'Airbnb', 'Other'], description='Accommodation Type:')
accommodation_cost = widgets.FloatSlider(description='Accommodation Cost:', min=0, max=1000, value=200)
transportation_type = widgets.Dropdown(options=['Flight', 'Train', 'Bus', 'Other'], description='Transportation Type:')
transportation_cost = widgets.FloatSlider(description='Transportation Cost:', min=0, max=2000, value=500)

# Create a button to trigger recommendation
button = widgets.Button(description='Get Recommendations')

# Define the function to get recommendations
def get_recommendations(b):
    user_preferences = {
        'Traveler age': age.value,
        'Traveler gender': gender.value,
        'Traveler nationality': nationality.value,
        'Accommodation type': accommodation_type.value,
        'Accommodation cost': accommodation_cost.value,
        'Transportation type': transportation_type.value,
        'Transportation cost': transportation_cost.value
    }
    recommendations = model.recommend_destination(user_preferences)
    print("Recommended Destinations based on User Preferences:")
    for destination in recommendations:
        print(destination['Destination'], destination['Accommodation Cost'], destination['Transportation Cost'])

# Attach the function to the button
button.on_click(get_recommendations)

# Display the form and button
display(age, gender, nationality, accommodation_type, accommodation_cost, transportation_type, transportation_cost, button)




IntSlider(value=30, description='Age:')

Dropdown(description='Gender:', options=('Male', 'Female', 'Other'), value='Male')

Text(value='', description='Nationality:')

Dropdown(description='Accommodation Type:', options=('Hotel', 'Hostel', 'Airbnb', 'Other'), value='Hotel')

FloatSlider(value=200.0, description='Accommodation Cost:', max=1000.0)

Dropdown(description='Transportation Type:', options=('Flight', 'Train', 'Bus', 'Other'), value='Flight')

FloatSlider(value=500.0, description='Transportation Cost:', max=2000.0)

Button(description='Get Recommendations', style=ButtonStyle())

Recommended Destinations based on User Preferences:
Rome 200.0 350.0
Recommended Destinations based on User Preferences:
Bangkok 400.0 500.0
Bangkok, Thailand 500.0 50.0
Recommended Destinations based on User Preferences:
Bangkok 400.0 500.0
Bangkok, Thailand 500.0 50.0
Bangkok, Thailand 400.0 50.0
Recommended Destinations based on User Preferences:
Bangkok, Thailand 400.0 50.0


In [None]:
df = pd.read_csv('/content/preprocessed2.csv')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136 entries, 0 to 135
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Unnamed: 0            136 non-null    int64 
 1   Trip ID               136 non-null    int64 
 2   Destination           136 non-null    object
 3   Start date            136 non-null    object
 4   End date              136 non-null    object
 5   Duration (days)       136 non-null    int64 
 6   Traveler name         136 non-null    object
 7   Traveler age          136 non-null    int64 
 8   Traveler gender       136 non-null    object
 9   Traveler nationality  136 non-null    object
 10  Accommodation type    136 non-null    object
 11  Accommodation cost    136 non-null    int64 
 12  Transportation type   136 non-null    object
 13  Transportation cost   136 non-null    int64 
 14  Budget                136 non-null    int64 
dtypes: int64(7), object(8)
memory usage: 16.

In [None]:
df.describe()

Unnamed: 0.1,Unnamed: 0,Trip ID,Duration (days),Traveler age,Accommodation cost,Transportation cost,Budget
count,136.0,136.0,136.0,136.0,136.0,136.0,136.0
mean,67.5,69.463235,7.610294,33.117647,1253.529412,645.183824,1898.713235
std,39.403892,40.388284,1.606343,7.139933,1338.644162,584.476153,1834.553533
min,0.0,1.0,5.0,20.0,150.0,20.0,200.0
25%,33.75,34.75,7.0,28.0,600.0,200.0,1000.0
50%,67.5,68.5,7.0,31.0,900.0,550.0,1400.0
75%,101.25,104.25,8.0,37.25,1200.0,800.0,1900.0
max,135.0,139.0,14.0,60.0,8000.0,3000.0,10500.0


In [None]:
!pip install ipywidgets

import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class DestinationRecommendationModel:
    def __init__(self):
        self.model = None
        self.df = None
        self.tfidf_matrix = None
        self.tfidf_vectorizer = None

    def preprocess_data(self, data):
        data['combined_features'] = data.apply(lambda x: ' '.join(x[['Traveler age', 'Traveler gender', 'Traveler nationality',
                                                                     'Accommodation type','Transportation type', 'Budget']].astype(str)), axis=1)
        return data

    def train_model(self, data):
        self.df = self.preprocess_data(data)
        self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.df['combined_features'])
        self.model = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

    def recommend_destination(self, user_preferences, top_n=5):
        if self.model is None:
            raise Exception("Model has not been trained or loaded.")

        # Extract the user preferences
        user_features = ' '.join([str(value) for value in user_preferences.values()])
        user_tfidf = self.tfidf_vectorizer.transform([user_features])

        # Compute similarity between user preferences and destinations
        user_destination_similarity = cosine_similarity(user_tfidf, self.tfidf_matrix)

        sim_scores = user_destination_similarity[0]
        sim_scores = list(enumerate(sim_scores))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:top_n]  # Consider top n destinations
        trip_indices = [i[0] for i in sim_scores]

        recommended_destinations = []
        for index in trip_indices:
            destination = self.df.iloc[index]
            # Convert budget to float for comparison
            budget = float(destination['Budget'])
            # Check if the budget is within the user's budget
            if budget <= user_preferences['Budget']:
                recommended_destinations.append({
                    'Destination': destination['Destination'],
                    'Budget': budget
                })
                # Break if we have found enough recommendations
                if len(recommended_destinations) == top_n:
                    break
        return recommended_destinations


model = DestinationRecommendationModel()
model.train_model(df)

# Create form elements for user preferences
age = widgets.IntSlider(description='Age:', min=0, max=100, value=30)
gender = widgets.Dropdown(options=['Male', 'Female', 'Other'], description='Gender:')
nationality = widgets.Text(description='Nationality:')
accommodation_type = widgets.Dropdown(options=['Hotel', 'Hostel', 'Airbnb', 'Other'], description='Accommodation Type:')
transportation_type = widgets.Dropdown(options=['Flight', 'Train', 'Bus', 'Other'], description='Transportation Type:')
budget = widgets.FloatSlider(description='Budget:', min=0, max=3000, value=1000)

# Create a button to trigger recommendation
button = widgets.Button(description='Get Recommendations')

# Define the function to get recommendations
def get_recommendations(b):
    user_preferences = {
        'Traveler age': age.value,
        'Traveler gender': gender.value,
        'Traveler nationality': nationality.value,
        'Accommodation type': accommodation_type.value,
        'Transportation type': transportation_type.value,
        'Budget': budget.value
    }
    recommendations = model.recommend_destination(user_preferences)
    print("Recommended Destinations based on User Preferences:")
    for destination in recommendations:
        print(destination['Destination'], destination['Budget'])

# Attach the function to the button
button.on_click(get_recommendations)

# Display the form and button
display(age, gender, nationality, accommodation_type, transportation_type, budget, button)




IntSlider(value=30, description='Age:')

Dropdown(description='Gender:', options=('Male', 'Female', 'Other'), value='Male')

Text(value='', description='Nationality:')

Dropdown(description='Accommodation Type:', options=('Hotel', 'Hostel', 'Airbnb', 'Other'), value='Hotel')

Dropdown(description='Transportation Type:', options=('Flight', 'Train', 'Bus', 'Other'), value='Flight')

FloatSlider(value=1000.0, description='Budget:', max=3000.0)

Button(description='Get Recommendations', style=ButtonStyle())

Recommended Destinations based on User Preferences:
Bangkok 1000.0
Greece 1000.0
London, UK 1000.0
Bangkok, Thailand 550.0
Bangkok 900.0
Recommended Destinations based on User Preferences:
Greece 1000.0
London, UK 1000.0
Egypt 800.0
Recommended Destinations based on User Preferences:
Bangkok 1000.0
Greece 1000.0
London, UK 1000.0
Bangkok, Thailand 550.0
Bangkok 900.0
