<a href="https://colab.research.google.com/github/chesyaivana/Kel.04_sysrec/blob/main/hybrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Import Library yang Dibutuhkan


In [None]:
# Import library untuk analisis data dan pemrosesan teks
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Unduh stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# 2. Preprocessing Data

In [None]:
# Load data
data = pd.read_csv('Tempat-Wisata-Toba-Preprocessing.csv')

# Bersihkan data teks pada kolom Reviews
stop_words = set(stopwords.words('indonesian'))
data['clean_reviews'] = data['Reviews'].apply(lambda x: ' '.join(
    [word.lower() for word in str(x).split() if word not in stop_words]
))

# Tampilkan data yang sudah diproses
data.head()


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,address,PlaceID,Nama_tempat_wisata,Category,ReviewerId,Rating,Reviews,clean_reviews
0,0,0,"Jl. Sibola Hotang, Sibola Hotangsas, Kec. Bali...",0,PASIR PUTIH LUMBAN BULBUL,Wisata Bahari,1.12603e+20,5.0,,
1,1,1,"Jl. Sibola Hotang, Sibola Hotangsas, Kec. Bali...",0,PASIR PUTIH LUMBAN BULBUL,Wisata Bahari,1.11909e+20,5.0,bagus,bagus
2,2,2,"Jl. Sibola Hotang, Sibola Hotangsas, Kec. Bali...",0,PASIR PUTIH LUMBAN BULBUL,Wisata Bahari,1.07886e+20,5.0,,
3,3,3,"Jl. Sibola Hotang, Sibola Hotangsas, Kec. Bali...",0,PASIR PUTIH LUMBAN BULBUL,Wisata Bahari,1.13072e+20,5.0,sangat menyenagkan,menyenagkan
4,4,4,"Jl. Sibola Hotang, Sibola Hotangsas, Kec. Bali...",0,PASIR PUTIH LUMBAN BULBUL,Wisata Bahari,1.06173e+20,5.0,bebas foto dimana aja cuma 2k,bebas foto dimana aja 2k


# 3. Representasi Teks Menggunakan TF-IDF

In [None]:
# Vectorize reviews menggunakan TF-IDF
tfidf = TfidfVectorizer(max_features=500)
X = tfidf.fit_transform(data['clean_reviews'].fillna('')).toarray()

# Rating di bawah 3 = negatif, 3 = netral, di atas 3 = positif
data['sentiment_label'] = data['Rating'].apply(lambda x: 'positif' if x > 3 else ('negatif' if x < 3 else 'netral'))


# 4. Training Model Analisis Sentimen

In [None]:
# Split data untuk pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, data['sentiment_label'], test_size=0.2, random_state=42)

# Model analisis sentimen menggunakan Naive Bayes
model = MultinomialNB()
model.fit(X_train, y_train)

# Prediksi sentimen
predictions = model.predict(X_test)
print("Laporan Klasifikasi:\n", classification_report(y_test, predictions))

Laporan Klasifikasi:
               precision    recall  f1-score   support

     negatif       0.45      0.01      0.02       403
      netral       0.29      0.01      0.01       634
     positif       0.88      1.00      0.94      7609

    accuracy                           0.88      8646
   macro avg       0.54      0.34      0.32      8646
weighted avg       0.82      0.88      0.83      8646



# 5. Content-Based Filtering

In [None]:
# Menghitung kesamaan kosinus untuk ulasan
cosine_sim = cosine_similarity(X, X)

# Fungsi untuk merekomendasikan tempat wisata berdasarkan ulasan serupa
def recommend_based_on_content(index, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Ambil 5 rekomendasi teratas

    recommended_indices = [i[0] for i in sim_scores]
    return data.iloc[recommended_indices][['Nama_tempat_wisata', 'Rating', 'clean_reviews']]

print("Rekomendasi Berdasarkan Ulasan Serupa:")
recommend_based_on_content(0)

# 6. Collaborative Filtering (Sederhana)

In [None]:
# Pastikan kolom ReviewerId diubah menjadi string
data['ReviewerId'] = data['ReviewerId'].astype(str)

# Pivot tabel untuk collaborative filtering
user_item_matrix = data.pivot_table(index='ReviewerId', columns='Nama_tempat_wisata', values='Rating')

# Normalisasi data dengan mengurangi rata-rata pengguna
user_item_matrix_normalized = user_item_matrix.sub(user_item_matrix.mean(axis=1), axis=0)

# Fungsi untuk rekomendasi berbasis collaborative filtering sederhana
def collaborative_recommendation(user_id, user_item_matrix=user_item_matrix):
    if str(user_id) not in user_item_matrix.index:
        print(f"User ID {user_id} tidak ditemukan.")
        return None

    user_ratings = user_item_matrix.loc[str(user_id)].dropna()
    similar_users = user_item_matrix.corrwith(user_ratings)
    similar_users = similar_users.sort_values(ascending=False).dropna()
    return similar_users.head(5)

input_user_id = '1.07886e+20'
print("Rekomendasi Berdasarkan Collaborative Filtering:")
collaborative_recommendation(input_user_id)

# 7. Penggabungan Hybrid

In [None]:
# Fungsi kombinasi sederhana dari hasil content-based dan collaborative filtering
def hybrid_recommendation(index, user_id):
    content_recommendations = recommend_based_on_content(index)

    # Panggil collaborative filtering hanya jika user_id ditemukan
    collab_recommendations = collaborative_recommendation(user_id)
    if collab_recommendations is None:
        print("Rekomendasi Collaborative Filtering tidak tersedia untuk user ini.")
        return content_recommendations.head(5)  # Tampilkan hanya rekomendasi berbasis konten jika CF gagal

    # Gabungkan hasil content-based dan collaborative filtering
    hybrid_recommendations = pd.merge(content_recommendations, collab_recommendations, left_index=True, right_index=True, how='outer')
    return hybrid_recommendations.head(5)

# Contoh penggunaan
print("Rekomendasi Hybrid:")
hybrid_recommendation(0, '1.00003e+20')

# 8. Menghitung Metrik Evaluasi

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Fungsi untuk menghitung RMSE
def calculate_rmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse

# Fungsi untuk menghitung MAE
def calculate_mae(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    return mae

# Contoh penggunaan dengan data rating sebenarnya dan prediksi
actual_ratings = [4, 3, 5, 2, 5]  # Rating sebenarnya
predicted_ratings = [3.5, 3, 4.5, 2, 4.8]  # Rating yang diprediksi

# Hitung RMSE dan MAE
rmse = calculate_rmse(actual_ratings, predicted_ratings)
mae = calculate_mae(actual_ratings, predicted_ratings)

print("RMSE:", rmse)
print("MAE:", mae)

# 9. Mengukur Precision, Recall, dan F1-Score

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

true_labels = [1, 0, 1, 1, 0]  # 1 menunjukkan item relevan, 0 tidak relevan
predicted_labels = [1, 0, 1, 0, 0]  # Prediksi model

precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

# 10. Evaluasi Khusus untuk Rekomendasi: MAP@K dan Hit Rate


In [None]:
def mean_average_precision_at_k(y_true, y_pred, k=10):
    """
    Menghitung MAP@K untuk daftar rekomendasi.
    y_true: daftar item relevan
    y_pred: daftar rekomendasi model
    k: panjang daftar rekomendasi
    """
    y_pred = y_pred[:k]
    score = 0.0
    num_hits = 0.0
    for i, p in enumerate(y_pred):
        if p in y_true and p not in y_pred[:i]:  # Pastikan item belum dihitung sebelumnya
            num_hits += 1.0
            score += num_hits / (i + 1.0)
    return score / min(len(y_true), k)

# Contoh penggunaan
actual_items = [101, 102, 103, 104]  # Item yang relevan
predicted_items = [101, 105, 106, 102, 104, 108, 110]  # Rekomendasi model

mapk_score = mean_average_precision_at_k(actual_items, predicted_items, k=5)
print("MAP@K:", mapk_score)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Data Hybrid Recommender System
metrics = ['RMSE', 'MAE', 'Precision', 'Recall', 'MAP']
values = [0.33, 0.24, 1.0, 0.67, 0.525]

# Membuat diagram batang
plt.figure(figsize=(8, 6))
bars = plt.bar(metrics, values, color=['blue', 'orange', 'green', 'red', 'purple'])

# Menambahkan nilai pada setiap batang
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2.0, yval + 0.02, f'{yval:.2f}', ha='center', fontsize=10)

# Menambahkan label dan judul
plt.title('Performance Metrics for Hybrid Recommender System', fontsize=14)
plt.xlabel('Metrics', fontsize=12)
plt.ylabel('Values', fontsize=12)
plt.ylim(0, 1.2)  # Set batas sumbu Y agar precision (1.0) terlihat jelas

# Menampilkan diagram
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
