# Catatan yang Diulik

1. Modality Based, mempertimbangkan kolom rating, jumlah reviews, nama tempat, tipe wisata, Waktu buka, dan koordinat
2. Interkoneksi antar tempat wisata, dengan mempertimbangkan nilai di user. Nilai yang dimaksud misal user A pernah merating berapa di tempat A, lalu telusuri dia pernah merating dimana saja. Nanti dijadikan identitas user yang digunakan untuk rekomendasi item, tapi soft recom, tidak user based.

Catatan:
- Untuk koordinat bisa coba ulik rumus **Haversine**
- Poin ke 2, bisa pake konsep inputan baru ke model rekomendasi, jadi inputan nnti di cross calculation dari hasil rekomendasi dari poin 1.
- Waktu membuka website atau waktu komputer dijadikan pertimbangan rekomendasi dari nilai waktu buka di data item.

In [44]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from math import radians, sin, cos, sqrt, atan2
from datetime import datetime

fixedDf = pd.read_csv('data/fixedData.csv')
fixedDf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 455 entries, 0 to 454
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   place_id               455 non-null    int64  
 1   name                   455 non-null    object 
 2   address                455 non-null    object 
 3   coordinates            455 non-null    object 
 4   workday_timing         455 non-null    object 
 5   closed_on              455 non-null    object 
 6   reviews                455 non-null    float64
 7   rating                 455 non-null    float64
 8   most_popular_times     455 non-null    object 
 9   popular_times          455 non-null    object 
 10  reviewer_name          409 non-null    object 
 11  rating_review          409 non-null    object 
 12  review_text            402 non-null    object 
 13  accessibility_enabled  455 non-null    object 
 14  children_enabled       455 non-null    object 
 15  types 

In [45]:
fixedDf.head()

Unnamed: 0,place_id,name,address,coordinates,workday_timing,closed_on,reviews,rating,most_popular_times,popular_times,reviewer_name,rating_review,review_text,accessibility_enabled,children_enabled,types,city
0,0,Agro Tainment Wisata Kampung Markisa,"Agro Tainment Wisata Kampung Markisa, Blunyahr...","-7.774189,110.3647986",Not Present,Buka Setiap Hari,2.0,5.0,"07.00, 08.00, 09.00\n---\nLabel Waktu: 07.00\n...",Senin:\n 06.00: 0% | Diam\n 07.00: 25% |...,"['Deyze Nghokeh', 'Khoari Koutsar', 'Deyze Ngh...","[5.0, 5.0, 5.0, 5.0]","['Teks review tidak tersedia', 'Teks review ti...","Pintu masuk khusus pengguna kursi roda, Tempat...",Cocok untuk anak-anak,Tourist attraction,"Kota Yogyakarta, Daerah Istimewa Yogyakarta"
1,1,Ancol Wisata Alam,"Ancol Wisata Alam, Unnamed Road, Selingan, Kar...","-7.6668655999999995,110.266713",Not Present,Buka Setiap Hari,390.0,4.4,Tidak Tersedia,Tidak Tersedia,"['Ifandri Dwi Aryono (IFANDRI DWI ARYONO)', 'C...","[5.0, 5.0, 4.0, 5.0, 4.0, 2.0, 5.0, 4.0]",['salah catu calon destinasi wisata alam berik...,"Pintu masuk khusus pengguna kursi roda, Tempat...",Cocok untuk anak-anak,Tourist attraction,"Kabupaten Magelang, Jawa Tengah"
2,2,BANYU WIYOSO DAGEN (Kolam Ikan),"4846+WH5 BANYU WIYOSO DAGEN (Kolam Ikan), Gumu...","-7.8927344999999995,110.3114727",Not Present,Buka Setiap Hari,4.0,5.0,Tidak Tersedia,Tidak Tersedia,"['Laili Isna Fatkhurrahmah', 'Dian Kusuma Dewi...","[5.0, 5.0, 5.0, 5.0]",['Tempat yang asik buat jalan2 pagi atau sore....,"Pintu masuk khusus pengguna kursi roda, Tempat...",Cocok untuk anak-anak,Tourist attraction,"Kabupaten Bantul, Daerah Istimewa Yogyakarta"
3,3,Bendhung Lepen,"Bendhung Lepen, Mrican UH7/ 338, Giwangan, Kec...","-7.8316668,110.3945119",Not Present,Buka Setiap Hari,1336.0,4.7,"17.00, 16.00, 18.00\n---\nLabel Waktu: 17.00\n...",Senin:\n 04.00: 0% | Diam\n 05.00: 6% | ...,"['Muhammad Hidayatullah', 'suwarso doang', 'L ...","[5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 5.0, 5.0]","['tempat menyenangkan untuk rekreasi, bisa dud...","Pintu masuk khusus pengguna kursi roda, Tempat...","Cocok untuk anak-anak, Cocok untuk ulang tahun...",Tourist attraction,"Kota Yogyakarta, Daerah Istimewa Yogyakarta"
4,4,Bendungan Irigasi Jalan Samas,"278H+G39 Bendungan Irigasi Jalan Samas, Unname...","-7.9837082,110.2777382",Not Present,Buka Setiap Hari,1.0,5.0,Tidak Tersedia,Tidak Tersedia,['Bastian Viery'],[5.0],['Bagus'],Tempat parkir khusus pengguna kursi roda,Tidak cocok untuk anak-anak,Tourist attraction,"Kabupaten Bantul, Daerah Istimewa Yogyakarta"


# Data Preparation

## Standarisasi dan Normalisasi

### Workday Timing

In [46]:
fixedDf.workday_timing.unique()

array(['Not Present', '07.00-11.00', '06.30-04.30', '04.30-05.00',
       '01.00-08.00', '09.00-04.30', '08.00-05.00', 'Open 24 hours',
       '08.00-03.00', '08.00-04.00', '09.00-03.00', '08.30-02.30',
       '08.00-02.00', '04.00-06.00', '08.30-03.30', '09.00-05.00',
       'Closed', '08.00-09.00', '07.00-04.00', '07.30-04.00',
       '11.00-05.00', '04.00-03.00', '06.00-06.00', '08.30-03.00',
       '10.00-10.00', '05.00-04.00', '08.00-06.00', '09.00-09.00',
       '08.30-04.30', '00.00-10.00', '09.00-02.30', '10.00-05.00',
       '09.00-10.00', '06.30-09.00', '06.00-11.00', '07.00-05.00',
       '07.00-06.00', '08.00-08.00', '08.00-06.15', '09.00-04.00',
       '06.00-04.00', '08.17-09.50', '09.30-06.00', '04.00-06.30',
       '07.30-06.30', '05.45-05.00', '09.00-05.30', '07.00-08.00',
       '08.30-09.00', '04.30-09.00', '09.00-08.00', '10.00-03.00',
       '06.00-07.00', '05.00-06.00', '05.00-07.00', '08.00-11.50',
       '09.00-06.00', '07.30-06.00', '08.00-06.30', '03.00-05.00'

## Rating

In [47]:
# Standarisasi Kolom Rating Menggunakan Z-score
scaler = StandardScaler()

# Data 'rating' akan distandarisasi
fixedDf['rating_scaled'] = scaler.fit_transform(fixedDf[['rating']])

## Types

# Model Development

### Perhitungan Haversine (Coordinates)

In [48]:
# Perhitungan jarak menggunakan rumus Haversine
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Radius bumi dalam kilometer
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    return R * c  # Hasil dalam kilometer

In [49]:
# Filter berdasarkan jarak terdekat (Haversine)
def filter_by_distance(df, user_lat, user_lon, max_distance_km=10):
    df['distance'] = df['coordinates'].apply(lambda coord: haversine(user_lat, user_lon, *map(float, coord.split(','))))
    return df[df['distance'] <= max_distance_km]

### Perhitungan Cosine Similarity

In [50]:
# Pencarian berdasarkan nama tempat menggunakan TF-IDF dan Cosine Similarity
def search_by_name(df, user_input):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['name'])
    user_input_tfidf = tfidf_vectorizer.transform([user_input])

    cosine_sim = cosine_similarity(user_input_tfidf, tfidf_matrix)
    df['similarity'] = cosine_sim[0]
    
    return df

### Filterisasi Waktu Kerja (Working Day)

In [51]:
# Filter berdasarkan waktu operasional (workday_timing)
def filter_by_workday(df, current_time):
    def is_open(workday_timing, current_time):
        if workday_timing == 'Not Present' or workday_timing == 'Closed':
            return False
        try:
            open_time, close_time = workday_timing.split('-')
            open_time = datetime.strptime(open_time.strip(), '%H.%M').time()
            close_time = datetime.strptime(close_time.strip(), '%H.%M').time()
            return open_time <= current_time <= close_time
        except:
            return False

    current_time = datetime.now().time()  # Ambil waktu sekarang
    df['is_open'] = df['workday_timing'].apply(lambda x: is_open(x, current_time))
    
    return df[df['is_open'] == True]

### Filterisasi Tipe Wisata

In [52]:
# Filter berdasarkan tipe tempat wisata
def filter_by_type(df, selected_type):
    return df[df['types'].str.contains(selected_type, case=False, na=False)]

## Filterisasi Umum

In [53]:
# Mengurutkan hasil berdasarkan jarak, similarity, dan rating
def sort_by_distance_similarity_rating(df):
    return df.sort_values(by=['distance', 'similarity', 'rating'], ascending=[True, False, False])

## Fungsi Rekomendasi

In [54]:
# Main function untuk rekomendasi
def recommend(df, user_input, user_lat, user_lon, selected_type, max_distance_km=10, top_n=5):
    # Langkah 1: Pencarian berdasarkan nama
    df = search_by_name(df, user_input)
    
    # Langkah 2: Filter berdasarkan waktu operasional
    df = filter_by_workday(df, datetime.now().time())
    
    # Langkah 3: Filter berdasarkan tipe tempat
    df = filter_by_type(df, selected_type)
    
    # Langkah 4: Filter berdasarkan jarak terdekat
    df = filter_by_distance(df, user_lat, user_lon, max_distance_km)
    
    # Langkah 5: Urutkan berdasarkan jarak terdekat, kesamaan, dan rating tertinggi
    df = sort_by_distance_similarity_rating(df)
    
    # Langkah 6: Ambil Top-N hasil
    return df[['name', 'address', 'distance', 'workday_timing', 'rating']].head(top_n)

In [55]:
user_lat = -7.774189
user_lon = 110.3647986
user_input = "Agro"
selected_type = "Tourist attraction"
max_distance_km = 0.2
top_n = 3

# Dapatkan rekomendasi Top-N
recommendations = recommend(fixedDf, user_input, user_lat, user_lon, selected_type, max_distance_km, top_n)
print(recommendations)

Empty DataFrame
Columns: [name, address, distance, workday_timing, rating]
Index: []
