In [2]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import numpy as np


In [4]:
df = pd.read_csv("C:/Users/DEEPADHARSHINI/OneDrive/Desktop/Tourism/MergedTourismData.csv")

# Filter necessary columns
df = df[['UserId', 'Attraction', 'Rating']].dropna()

In [32]:
#user_encoder = LabelEncoder()
#item_encoder = LabelEncoder()

#df['UserId_enc'] = user_encoder.fit_transform(df['UserId'])
#df['Attraction_enc'] = item_encoder.fit_transform(df['Attraction'])

In [5]:
user_item_matrix = df.pivot_table(index='UserId', columns='Attraction', values='Rating').fillna(0)

print(user_item_matrix.head())

user_item_matrix.shape

# Transpose to get item-item matrix
#item_user_matrix = user_item_matrix.T

# Compute item-item similarity
#item_similarity = cosine_similarity(item_user_matrix)

Attraction  Balekambang Beach  Bromo Tengger Semeru National Park  \
UserId                                                              
14                        0.0                                 0.0   
16                        0.0                                 0.0   
20                        0.0                                 0.0   
23                        0.0                                 0.0   
25                        0.0                                 5.0   

Attraction  Coban Rondo Waterfall  Goa Cina Beach  Jodipan Colorful Village  \
UserId                                                                        
14                            0.0             0.0                       0.0   
16                            0.0             0.0                       0.0   
20                            0.0             0.0                       0.0   
23                            0.0             0.0                       0.0   
25                            0.0         

(33526, 30)

In [6]:
n_components = min(20, user_item_matrix.shape[1]) 
svd = TruncatedSVD(n_components=n_components)
user_attraction_matrix_reduced = svd.fit_transform(user_item_matrix)
user_attraction_matrix_reduced.shape

(33526, 20)

In [7]:
knn_model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='auto')
knn_model.fit(user_attraction_matrix_reduced)


In [9]:
def recommend_attractions(user_id, num_recommendations=5):
    if user_id not in user_item_matrix.index:
        return "User ID not found! Try with a different ID."
    
   
    user_idx = user_item_matrix.index.get_loc(user_id)
    distances, indices = knn_model.kneighbors([user_attraction_matrix_reduced[user_idx]], n_neighbors=5)
    
    
    similar_users = user_item_matrix.index[indices.flatten()[1:]]  

    
    user_ratings = user_item_matrix.loc[user_id]
    unseen_attractions = user_ratings[user_ratings == 0].index  

   
    attraction_scores = {}
    for sim_user in similar_users:
        for attraction in unseen_attractions:
            attraction_scores[attraction] = attraction_scores.get(attraction, 0) + user_item_matrix.loc[sim_user, attraction]

    
    recommended_attractions = sorted(attraction_scores, key=attraction_scores.get, reverse=True)[:num_recommendations]
    
    return recommended_attractions if recommended_attractions else "No new recommendations found."


In [12]:
user_id = 16
recommended = recommend_attractions(user_id)

print(f"Recommended Attractions for User {user_id}: {recommended}")

Recommended Attractions for User 16: ['Tanah Lot Temple', 'Tegenungan Waterfall', 'Balekambang Beach', 'Bromo Tengger Semeru National Park', 'Coban Rondo Waterfall']


In [13]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

    
# EVALUATING THE RECONSTRUCTION QUALITY OF THE SVD MODEL BY COMPARING THE ORIGINAL AND RECONSTRUCTED USER-ATTRACTION MATRIX  
# COMPUTING RMSE, MSE, MAE, AND R² SCORE TO ASSESS MODEL PERFORMANCE  
reconstructed_matrix = svd.inverse_transform(user_attraction_matrix_reduced)

original = user_item_matrix.to_numpy()
reconstructed = reconstructed_matrix
print(original)

rmse = np.sqrt(mean_squared_error(original, reconstructed))
mse = mean_squared_error(original, reconstructed)
mae = mean_absolute_error(original, reconstructed)
r2 = r2_score(original, reconstructed)

print(f"Reconstruction RMSE: {rmse:.4f}")
print(f"Reconstruction MSE: {mse:.4f}")
print(f"Reconstruction MAE: {mae:.4f}")
print(f"Reconstruction R² Score: {r2:.4f}")

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 5. 0.]
 [0. 0. 0. ... 0. 4. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Reconstruction RMSE: 0.0961
Reconstruction MSE: 0.0092
Reconstruction MAE: 0.0035
Reconstruction R² Score: 0.6685


In [14]:
# Save everything
import joblib
joblib.dump(user_item_matrix, "user_item_matrix.pkl")
joblib.dump(user_attraction_matrix_reduced, "user_matrix_reduced.pkl")
joblib.dump(knn_model, "knn_model.pkl")


['knn_model.pkl']

In [13]:
import streamlit as st
num_rated = (user_item_matrix > 0).sum(axis=1)
st.write("Avg. attractions rated per user:", num_rated.mean())



