<a href="https://colab.research.google.com/github/esterna25/ConvolutionalNeuralNetwork/blob/main/Trial_CBF3_Ester.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Recommendation System: Content Based Filtering of Activity and Packet Tour

**Step 1: Import Libraries**

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re
import random

In [None]:
# Load the dataset
dataset = pd.read_csv("dataset_coba2.csv", sep=';', index_col='id')
dataset.head()

Unnamed: 0_level_0,travel_preference,transportation_preference,physical_activity_level,activity_preferences,activity_description,tour_preferences,tour_description,budget_range,trip_duration
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Group tour,Biking,Challenging,Village Tour Cycling,Biking is the activity of cycling on various t...,3D/2N Authentic Rural Experience at Tegal duku...,Tour packages that offer an authentic rural ex...,Rp 500.000 - Rp 1.000.000,2 or 3 Days
1,Group tour,Walking,Easy,Rice Field Trekking,Trekking is an activity of walking in nature t...,Treasure Hunt & Cooking Battle,Treasure Hunt & Cooking Battle adalah paket tu...,Rp 250.000 - Rp 500.000,1 Day
2,Solo,Biking,Medium,Village Tour Cycling,Biking is the activity of cycling on various t...,Treasure Hunt & Cooking Battle,Treasure Hunt & Cooking Battle adalah paket tu...,Rp 250.000 - Rp 500.000,1 Day
3,Solo,Car rental,Easy,Fireflies Watching,Activities that involve observing and witnessi...,Hiking Journey In Taro Village,Hiking trips in Taro Village that offer advent...,Rp 500.000 - Rp 1.000.000,1 Day
4,Group tour,Biking,Medium,Cooking Class,The highlight of the experience is travel back...,Treasure Hunt & Cooking Battle,Treasure Hunt & Cooking Battle adalah paket tu...,Rp 250.000 - Rp 500.000,1 Day


In [None]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7 entries, 0 to 6
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   travel_preference          7 non-null      object
 1   transportation_preference  7 non-null      object
 2   physical_activity_level    7 non-null      object
 3   activity_preferences       7 non-null      object
 4   activity_description       7 non-null      object
 5   tour_preferences           7 non-null      object
 6   tour_description           7 non-null      object
 7   budget_range               7 non-null      object
 8   trip_duration              7 non-null      object
dtypes: object(9)
memory usage: 560.0+ bytes


In [None]:
# Check activity_description
def print_description(index):
    example = dataset[dataset.index == index][['activity_description', 'travel_preference', 'activity_preferences']].values[0]
    if len(example) > 0:
        print(example[0])
        print('travel_preference:', example[1])
        print('activity_preferences:', example[2])  

In [None]:
print_description(1)

Trekking is an activity of walking in nature to explore mountain paths, forests or other natural areas. This activity is quite easy, followed by at least 2 people or a group. In this activity, there are Rice Field Trekking and Jungle Trekking.
travel_preference: Group tour
activity_preferences: Rice Field Trekking


In [None]:
print(dataset.columns)

Index(['travel_preference', 'transportation_preference',
       'physical_activity_level', 'activity_preferences',
       'activity_description', 'tour_preferences', 'tour_description',
       'budget_range', 'trip_duration'],
      dtype='object')


In [None]:
print(dataset.index)

Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64', name='id')


In [None]:
import nltk
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Text preprocessing activity_description
clean_spcl_1 = re.compile('[/(){}\[\]\|@,;]')
clean_symbol_1 = re.compile('[^0-9a-z #+_]')
stopworda_1 = set(stopwords.words('english'))

def clean_text_1(text_1):
    # lowercase text
    text_1 = text_1.lower() 
    text_1 = clean_spcl_1.sub(' ', text_1)
    text_1 = clean_symbol_1.sub('', text_1)

    # remove stopword form activity_description column
    text_1 = ' '.join(word for word in text_1.split() if word not in stopworda_1)
    return text_1

# Create an additional column for the description of the data that has been cleaned  
dataset['act_descrip_clean'] = dataset['activity_description'].apply(clean_text_1)

In [None]:
# Activity description after preprocessing
def print_act_descrip_clean(index):
    example_1 = dataset[dataset.index == index][['act_descrip_clean', 'travel_preference', 'activity_preferences']].values[0]
    if len(example_1) > 0:
        print(example_1[0])
        print('Travel Preference:', example_1[1])
        print('Activity Preference:', example_1[2])

In [None]:
print_act_descrip_clean(1)

trekking activity walking nature explore mountain paths forests natural areas activity quite easy followed least 2 people group activity rice field trekking jungle trekking
Travel Preference: Group tour
Activity Preference: Rice Field Trekking


In [None]:
# Text preprocessing tour_description
clean_spcl_2 = re.compile('[/(){}\[\]\|@,;]')
clean_symbol_2 = re.compile('[^0-9a-z #+_]')
stopworda_2 = set(stopwords.words('english'))

def clean_text_2(text_2):
    text_2 = text_2.lower() # lowercase text
    text_2 = clean_spcl_2.sub(' ', text_2)
    text_2 = clean_symbol_2.sub('', text_2)

    # remove stopword form tour_description column
    text_2 = ' '.join(word for word in text_2.split() if word not in stopworda_2)
    return text_2

# Buat kolom tambahan untuk data description yang telah dibersihkan   
dataset['tour_descrip_clean'] = dataset['tour_description'].apply(clean_text_2)

In [None]:
# Tour description after preprocessing
def print_tour_descrip_clean(index):
    example_2 = dataset[dataset.index == index][['tour_descrip_clean', 'travel_preference', 'tour_preferences']].values[0]
    if len(example_2) > 0:
        print(example_2[0])
        print('Travel Preference:', example_2[1])
        print('Tour Preference:', example_2[2])

In [None]:
print_tour_descrip_clean(3)

hiking trips taro village offer adventurous experiences natural exploration hiking trip participants pass predetermined paths various levels difficulty
Travel Preference: Solo
Tour Preference: Hiking Journey In Taro Village


In [None]:
# Feature extraction for act_descrip_clean
tfidf_activity = TfidfVectorizer()
activity_features = tfidf_activity.fit_transform(dataset['act_descrip_clean'])

In [None]:
# Similarity matrix for activity_description
similarity_matrix_activity = cosine_similarity(activity_features)

In [None]:
# Feature extraction for tour_descrip_clean
tfidf_tour = TfidfVectorizer()
tour_features = tfidf_tour.fit_transform(dataset['tour_descrip_clean'])

In [None]:
# Similarity matrix for tour_description
similarity_matrix_tour = cosine_similarity(tour_features)

In [None]:
# Function to get similar preferences based on activity and tour preferences
def get_similar_preferences(activity_preferences, tour_preferences, main_preference, top_n=3):
    activity_index = dataset[dataset['activity_preferences'] == activity_preferences].index[0]
    activity_similarity_scores = similarity_matrix_activity[activity_index]
    similar_activities_indices = activity_similarity_scores.argsort()[::-1][1:top_n+1]
    similar_activities = dataset.loc[similar_activities_indices, 'activity_preferences']
    
    tour_index = dataset[dataset['tour_preferences'] == tour_preferences].index[0]
    tour_similarity_scores = similarity_matrix_tour[tour_index]
    similar_tours_indices = tour_similarity_scores.argsort()[::-1][1:top_n+1]
    similar_tours = dataset.loc[similar_tours_indices, 'tour_preferences']
    
    # Combine similar activities and tours
    similar_preferences = similar_activities.tolist() + similar_tours.tolist()
    
    # Add the user's primary choice to the list of similar preferences
    similar_preferences = [main_preference] + similar_preferences
    
    # Remove duplicates from the list of similar_preferences
    similar_preferences = list(dict.fromkeys(similar_preferences))
    
    return similar_preferences

In [None]:
# Prompt user for activity_preferences and tour_preferences
activity_preferences = input("Enter your activity preferences: ")
tour_preferences = input("Enter your tour preferences: ")
main_preference = activity_preferences

similar_preferences = get_similar_preferences(activity_preferences, tour_preferences, main_preference, top_n=3)
print("Recommendation Activities and Packet Tours:")
for i, preference in enumerate(similar_preferences):
    print(f"{i+1}. {preference}")

Enter your activity preferences: Jungle Trekking
Enter your tour preferences: Treasure Hunt & Cooking Battle
Recommendation Activities and Packet Tours:
1. Jungle Trekking
2. Rice Field Trekking
3. Fireflies Watching
4. Down Hill Cycling
5. Treasure Hunt & Cooking Battle
6. Hiking Journey In Taro Village
