# Catatan yang Diulik

1. Modality Based, mempertimbangkan kolom rating, jumlah reviews, nama tempat, tipe wisata, Waktu buka, dan koordinat
2. Interkoneksi antar tempat wisata, dengan mempertimbangkan nilai di user. Nilai yang dimaksud misal user A pernah merating berapa di tempat A, lalu telusuri dia pernah merating dimana saja. Nanti dijadikan identitas user yang digunakan untuk rekomendasi item, tapi soft recom, tidak user based.

Catatan:
- Untuk koordinat bisa coba ulik rumus **Haversine**
- Poin ke 2, bisa pake konsep inputan baru ke model rekomendasi, jadi inputan nnti di cross calculation dari hasil rekomendasi dari poin 1.
- Waktu membuka website atau waktu komputer dijadikan pertimbangan rekomendasi dari nilai waktu buka di data item.

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from math import radians, sin, cos, sqrt, atan2
from datetime import datetime

fixedDf = pd.read_csv('data/fixedData.csv')
fixedDf.info()

In [None]:
fixedDf.head()

# Data Preparation

## Standarisasi dan Normalisasi

### Workday Timing

In [None]:
fixedDf.workday_timing.unique()

## Rating

In [47]:
# Standarisasi Kolom Rating Menggunakan Z-score
scaler = StandardScaler()

# Data 'rating' akan distandarisasi
fixedDf['rating_scaled'] = scaler.fit_transform(fixedDf[['rating']])

## Types

# Model Development

### Perhitungan Haversine (Coordinates)

In [48]:
# Perhitungan jarak menggunakan rumus Haversine
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Radius bumi dalam kilometer
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    return R * c  # Hasil dalam kilometer

In [49]:
# Filter berdasarkan jarak terdekat (Haversine)
def filter_by_distance(df, user_lat, user_lon, max_distance_km=10):
    df['distance'] = df['coordinates'].apply(lambda coord: haversine(user_lat, user_lon, *map(float, coord.split(','))))
    return df[df['distance'] <= max_distance_km]

### Perhitungan Cosine Similarity

In [50]:
# Pencarian berdasarkan nama tempat menggunakan TF-IDF dan Cosine Similarity
def search_by_name(df, user_input):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['name'])
    user_input_tfidf = tfidf_vectorizer.transform([user_input])

    cosine_sim = cosine_similarity(user_input_tfidf, tfidf_matrix)
    df['similarity'] = cosine_sim[0]
    
    return df

### Filterisasi Waktu Kerja (Working Day)

In [51]:
# Filter berdasarkan waktu operasional (workday_timing)
def filter_by_workday(df, current_time):
    def is_open(workday_timing, current_time):
        if workday_timing == 'Not Present' or workday_timing == 'Closed':
            return False
        try:
            open_time, close_time = workday_timing.split('-')
            open_time = datetime.strptime(open_time.strip(), '%H.%M').time()
            close_time = datetime.strptime(close_time.strip(), '%H.%M').time()
            return open_time <= current_time <= close_time
        except:
            return False

    current_time = datetime.now().time()  # Ambil waktu sekarang
    df['is_open'] = df['workday_timing'].apply(lambda x: is_open(x, current_time))
    
    return df[df['is_open'] == True]

### Filterisasi Tipe Wisata

In [52]:
# Filter berdasarkan tipe tempat wisata
def filter_by_type(df, selected_type):
    return df[df['types'].str.contains(selected_type, case=False, na=False)]

## Filterisasi Umum

In [53]:
# Mengurutkan hasil berdasarkan jarak, similarity, dan rating
def sort_by_distance_similarity_rating(df):
    return df.sort_values(by=['distance', 'similarity', 'rating'], ascending=[True, False, False])

## Fungsi Rekomendasi

In [54]:
# Main function untuk rekomendasi
def recommend(df, user_input, user_lat, user_lon, selected_type, max_distance_km=10, top_n=5):
    # Langkah 1: Pencarian berdasarkan nama
    df = search_by_name(df, user_input)
    
    # Langkah 2: Filter berdasarkan waktu operasional
    df = filter_by_workday(df, datetime.now().time())
    
    # Langkah 3: Filter berdasarkan tipe tempat
    df = filter_by_type(df, selected_type)
    
    # Langkah 4: Filter berdasarkan jarak terdekat
    df = filter_by_distance(df, user_lat, user_lon, max_distance_km)
    
    # Langkah 5: Urutkan berdasarkan jarak terdekat, kesamaan, dan rating tertinggi
    df = sort_by_distance_similarity_rating(df)
    
    # Langkah 6: Ambil Top-N hasil
    return df[['name', 'address', 'distance', 'workday_timing', 'rating']].head(top_n)

In [None]:
user_lat = -7.774189
user_lon = 110.3647986
user_input = "Agro"
selected_type = "Tourist attraction"
max_distance_km = 0.2
top_n = 3

# Dapatkan rekomendasi Top-N
recommendations = recommend(fixedDf, user_input, user_lat, user_lon, selected_type, max_distance_km, top_n)
print(recommendations)