<table style="float: left; width: 50%;">
    <thead>
        <tr>
            <th style="background-color: #ADD8E6; color: black; text-align: left; padding: 8px;">NIM</th>
            <th style="background-color: #ADD8E6; color: black; text-align: left; padding: 8px;">Nama</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td style="text-align: left; padding: 8px;">12S19036</td>
            <td style="text-align: left; padding: 8px;">Lucas Hutabarat</td>
        </tr>
        <tr>
            <td style="text-align: left; padding: 8px;">12S21004</td>
            <td style="text-align: left; padding: 8px;">Estomihi Pangaribuan</td>
        </tr>
        <tr>
            <td style="text-align: left; padding: 8px;">12S21014</td>
            <td style="text-align: left; padding: 8px;">Fritz Kevin Manurung</td>
        </tr>
    </tbody>
</table>

Import Library dan Load Dataset

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt

file_path = 'D:/Semester 7/SISREK/W11/Tempat-Wisata-Toba-Preprocessing.csv'
data = pd.read_csv(file_path)

Pembersihan Data

In [None]:
data = data.dropna(subset=['ReviewerId', 'Rating'])
data['ReviewerId'] = data['ReviewerId'].astype(str)
data['PlaceID'] = data['PlaceID'].astype(str)

Tangani Duplikasi Data

In [None]:
data_cleaned = data.groupby(['ReviewerId', 'PlaceID'])['Rating'].mean().reset_index()

Pembagian Dataset (Train, Validation, Test)

In [None]:
train_data, temp_data = train_test_split(data_cleaned, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

train_data = train_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

Buat Matriks User-Item dan Implementasi SVD

In [None]:
# Buat matriks user-item untuk train_data
user_item_matrix = train_data.pivot(index='ReviewerId', columns='PlaceID', values='Rating').fillna(0)
user_item_matrix_np = user_item_matrix.values

# Deklarasi model SVD dengan parameter terbaik (n_components=20)
svd = TruncatedSVD(n_components=20, random_state=42)
U = svd.fit_transform(user_item_matrix_np)
Sigma = svd.singular_values_
VT = svd.components_

# Rekonstruksi matriks prediksi
reconstructed_matrix = np.dot(np.dot(U, np.diag(Sigma)), VT)
reconstructed_matrix = np.clip(reconstructed_matrix, 1, 5)

# Simpan matriks rekonstruksi untuk evaluasi
user_item_reconstructed = pd.DataFrame(reconstructed_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)

Prediksi dengan Matriks Rekonstruksi

In [None]:
val_data['PredictedRating'] = val_data.apply(
    lambda row: user_item_reconstructed.loc[row['ReviewerId'], row['PlaceID']] 
    if row['ReviewerId'] in user_item_reconstructed.index and row['PlaceID'] in user_item_reconstructed.columns else np.nan,
    axis=1
)

test_data['PredictedRating'] = test_data.apply(
    lambda row: user_item_reconstructed.loc[row['ReviewerId'], row['PlaceID']] 
    if row['ReviewerId'] in user_item_reconstructed.index and row['PlaceID'] in user_item_reconstructed.columns else np.nan,
    axis=1
)

Evaluasi Metrik

In [None]:
# Hapus baris tanpa prediksi
test_data_clean = test_data.dropna(subset=['PredictedRating'])

# RMSE dan MAE
rmse_test = np.sqrt(mean_squared_error(test_data_clean['Rating'], test_data_clean['PredictedRating']))
mae_test = mean_absolute_error(test_data_clean['Rating'], test_data_clean['PredictedRating'])
print(f"RMSE: {rmse_test:.2f}, MAE: {mae_test:.2f}")

# Precision@K
def precision_at_k(test_data, k):
    grouped = test_data.groupby('ReviewerId')
    precisions = []
    for user, group in grouped:
        group = group.sort_values(by='PredictedRating', ascending=False)
        top_k = group.head(k)
        relevant = (top_k['Rating'] >= 4).sum()
        precisions.append(relevant / k)
    return np.mean(precisions)

# Recall@K
def recall_at_k(test_data, k):
    grouped = test_data.groupby('ReviewerId')
    recalls = []
    for user, group in grouped:
        group = group.sort_values(by='PredictedRating', ascending=False)
        top_k = group.head(k)
        relevant = (group['Rating'] >= 4).sum()
        retrieved_relevant = (top_k['Rating'] >= 4).sum()
        recalls.append(retrieved_relevant / relevant if relevant > 0 else 0)
    return np.mean(recalls)

# MAP@K
def mean_average_precision_at_k(test_data, k):
    grouped = test_data.groupby('ReviewerId')
    average_precisions = []
    for user, group in grouped:
        group = group.sort_values(by='PredictedRating', ascending=False)
        top_k = group.head(k)
        cumulative_precision = 0
        relevant_count = 0
        for i, (_, row) in enumerate(top_k.iterrows(), 1):
            if row['Rating'] >= 4:
                relevant_count += 1
                cumulative_precision += relevant_count / i
        average_precisions.append(cumulative_precision / k)
    return np.mean(average_precisions)

# Hitung metrik untuk Top 5
precision_5 = precision_at_k(test_data_clean, k=5)
recall_5 = recall_at_k(test_data_clean, k=5)
map_5 = mean_average_precision_at_k(test_data_clean, k=5)

# Hitung metrik untuk Top 10
precision_10 = precision_at_k(test_data_clean, k=10)
recall_10 = recall_at_k(test_data_clean, k=10)
map_10 = mean_average_precision_at_k(test_data_clean, k=10)

print(f"Precision@5: {precision_5:.2f}, Recall@5: {recall_5:.2f}, MAP@5: {map_5:.2f}")
print(f"Precision@10: {precision_10:.2f}, Recall@10: {recall_10:.2f}, MAP@10: {map_10:.2f}")

Visualisasi Performa

In [None]:
# Data untuk Rating Metriks
rating_metrics = ['RMSE', 'MAE']
rating_values = [rmse_test, mae_test]

plt.figure(figsize=(8, 6))
plt.bar(rating_metrics, rating_values, color='blue')
plt.title('Rating Metriks (RMSE dan MAE)')
plt.ylabel('Nilai')
plt.ylim(0, max(rating_values) + 0.1)
for i, v in enumerate(rating_values):
    plt.text(i, v + 0.01, f'{v:.2f}', ha='center')
plt.show()

# Data untuk Ranking Metriks
ranking_metrics = ['Precision@5', 'Recall@5', 'MAP@5', 'Precision@10', 'Recall@10', 'MAP@10']
ranking_values = [precision_5, recall_5, map_5, precision_10, recall_10, map_10]

plt.figure(figsize=(10, 6))
plt.bar(ranking_metrics, ranking_values, color='orange')
plt.title('Ranking Metriks (Precision, Recall, MAP)')
plt.ylabel('Nilai')
plt.ylim(0, max(ranking_values) + 0.1)
for i, v in enumerate(ranking_values):
    plt.text(i, v + 0.01, f'{v:.2f}', ha='center')
plt.show()

Top 5 Rekomendasi

In [None]:
def get_top_5_recommendations_svd(test_data, user_item_matrix, reconstructed_matrix):
    recommendations = []
    for user_id in test_data['ReviewerId'].unique():
        if user_id in user_item_matrix.index:
            user_index = user_item_matrix.index.get_loc(user_id)
            predicted_ratings = reconstructed_matrix[user_index]
            recommended_places = pd.DataFrame({
                'PlaceID': user_item_matrix.columns,
                'PredictedRating': predicted_ratings
            }).sort_values(by='PredictedRating', ascending=False)
            recommendations.append((user_id, recommended_places.head(5)))
    return recommendations

top_5_recommendations = get_top_5_recommendations_svd(test_data_clean, user_item_matrix, reconstructed_matrix)

for user, recs in top_5_recommendations[:5]:
    print(f"Top 5 tempat wisata untuk user {user}:")
    print(recs)
    print()