<a href="https://colab.research.google.com/github/gitnurrahman/Bangkit-2023/blob/main/TourSik_Content_Based_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import random
import os
from google.colab import drive
from sklearn.preprocessing import LabelEncoder

In [None]:
drive.mount('/content/drive')
path = "/content/drive/MyDrive/Dataset"
fnames = os.listdir(path)
print(fnames)

Mounted at /content/drive
['Datasets_03.csv', 'Data.csv', 'dataset.csv', 'datafix .csv']


In [None]:
datafix = pd.read_csv(os.path.join(path, fnames[3]),on_bad_lines='skip', sep=',')
datafix['Rating'] = datafix['Rating'].replace(np.nan,0)
datafix['Name'] = datafix['Name'].replace(np.nan,0)
datafix['Price'] = datafix['Price'].replace(np.nan,0)
# datafix['Category'] = datafix['Category'].replace(np.nan,'-')
datafix

Unnamed: 0,Province,Regency,Category,Name,Price,Rating
0,Aceh,Kabupaten Aceh Barat,Religi,Masjid Agung Baitul Makmur Meulaboh,0.0,4.7
1,Aceh,Kabupaten Aceh Barat,Alam,Pantai Ceuremen,0.0,0.0
2,Aceh,Kabupaten Aceh Barat,Alam,Meureubo,0.0,0.0
3,Aceh,Kabupaten Aceh Barat,Sejarah,Tugu Kupiah Teuku Umar,0.0,4.6
4,Aceh,Kabupaten Aceh Barat,Alam,Pantai Suak Ribe,0.0,4.2
...,...,...,...,...,...,...
2233,Papua Selatan,Merauke,Wisata Alam,Pantai Payum,0.0,4.0
2234,Papua Selatan,Merauke,Wisata Alam,Pantai Lampu Satu,5000.0,4.2
2235,Papua Selatan,Merauke,Wisata Alam,Pantai Wisata Onggaya,20000.0,4.1
2236,Papua Selatan,Merauke,Sejarah,Tugu 0 KM Merauke - Sabang,0.0,4.7


In [None]:
# Label Mapping
label_mapping = {}
for label in datafix['Name']:
    if label not in label_mapping:
        new_label = len(label_mapping)
        label_mapping[label] = new_label
datafix['Name'] = datafix['Name'].map(label_mapping)
# Handling Missing Values
datafix.replace('-', np.nan, inplace=True)
datafix.dropna(subset=['Name', 'Category', 'Regency', 'Province', 'Price', 'Rating'], inplace=True)

# Data Type Conversion
datafix['Price'] = datafix['Price'].astype(float)

# Round the 'Price' column
datafix['Price'] = datafix['Price'].round(-3)

# Encode the categorical variables
encoder = LabelEncoder()
datafix['Regency'] = encoder.fit_transform(datafix['Regency'])# Concatenate Price and Regency as text
datafix['Price_Regency'] = datafix['Price'].astype(str) + ' ' + datafix['Regency'].astype(str)
datafix

Unnamed: 0,Province,Regency,Category,Name,Price,Rating,Price_Regency
0,Aceh,24,Religi,0,0.0,4.7,0.0 24
1,Aceh,24,Alam,1,0.0,0.0,0.0 24
2,Aceh,24,Alam,2,0.0,0.0,0.0 24
3,Aceh,24,Sejarah,3,0.0,4.6,0.0 24
4,Aceh,24,Alam,4,0.0,4.2,0.0 24
...,...,...,...,...,...,...,...
2233,Papua Selatan,95,Wisata Alam,1893,0.0,4.0,0.0 95
2234,Papua Selatan,95,Wisata Alam,1894,5000.0,4.2,5000.0 95
2235,Papua Selatan,95,Wisata Alam,1895,20000.0,4.1,20000.0 95
2236,Papua Selatan,95,Sejarah,1896,0.0,4.7,0.0 95


In [None]:
def recommendations():
    input_price = int(input('Price:')) # Input price
    input_regency = int(input('Regency:'))  # Input regency

    # De-encode the output labels
    inverse_mapping = {v: k for k, v in label_mapping.items()}

    # Filter the data based on input regency
    filtered_data = datafix[datafix['Regency'] == input_regency]

    # Create a list of items with required fields (price, name, province, rating)
    items = [(int(price), name, province, rating) for price, name, province, rating in zip(
        filtered_data['Price'], filtered_data['Name'], filtered_data['Province'], filtered_data['Rating']
    )]

    ##### KNAPSACK PROBLEM #####
    n = len(items)
    # Shuffle a list of items to get different combinations
    random.shuffle(items)
    # Create a 2D array to store the maximum total price for each item and budget combination
    dp = np.zeros((n + 1, input_price + 1), dtype=int)
    for i in range(1, n + 1):
        for j in range(1, input_price + 1):
            if int(items[i - 1][0]) <= j:
                dp[i][j] = max(dp[i - 1][j], dp[i - 1][j - int(items[i - 1][0])] + 1)
            else:
                dp[i][j] = dp[i - 1][j]
    # Find the items included in the optimal solution
    included_items = []
    i = n
    j = input_price
    while i > 0 and j > 0:
        if dp[i][j] != dp[i - 1][j]:
            included_items.append(items[i - 1])
            j -= int(items[i - 1][0])
        i -= 1

    # De-encode the included items
    deencoded_included_items = [
        (price, inverse_mapping[name], province, rating) for price, name, province, rating in included_items
    ]

    # Print the selected items
    print("Recommendations based on Price:", input_price, "and Regency:", input_regency)
    for price, name, province, rating in deencoded_included_items:
        print("Name:", name)
        print("Price:", price)
        print("Province:", province)
        print("Rating:", rating)
        print()

In [None]:
recommendations()

Price:100000
Regency:12
Recommendations based on Price: 100000 and Regency: 12
Name: 1640
Price: 5000
Province: Sulawesi Selatan
Rating: 4.6

Name: 1638
Price: 0
Province: Sulawesi Selatan
Rating: 4.4

Name: 1631
Price: 10000
Province: Sulawesi Selatan
Rating: 4.8

Name: 1636
Price: 0
Province: Sulawesi Selatan
Rating: 4.4

Name: 1630
Price: 5000
Province: Sulawesi Selatan
Rating: 4.7

Name: 1641
Price: 10000
Province: Sulawesi Selatan
Rating: 4.7

Name: 1633
Price: 5000
Province: Sulawesi Selatan
Rating: 4.4

Name: 1639
Price: 0
Province: Sulawesi Selatan
Rating: 4.3

Name: 1635
Price: 5000
Province: Sulawesi Selatan
Rating: 4.4

Name: 1632
Price: 0
Province: Sulawesi Selatan
Rating: 4.5

Name: 1634
Price: 5000
Province: Sulawesi Selatan
Rating: 4.4

Name: 1637
Price: 0
Province: Sulawesi Selatan
Rating: 4.3

