# Basic Idea-  Here we are going to use machine learning to monitor the condition of historical structures and also build a recommendation engine

# Step-1 --- Preprocessing of the data if needed
# Step-2 --- use image classification and object detection model to identify stuructural issues with the help of open cv and tensorflow
# Step-3 ---split the data
# Step-4 ----Build and train the model
# Step-5 --- evaluate the performance of the model and fine tuning

In [24]:
# Import libraries

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input

In [25]:
train_dir = "C:\\Users\\hp\\Desktop\\Dataset\\Part 1\\train"
test_dir =  "C:\\Users\\hp\\Desktop\\Dataset\\Part 1\\test"

In [26]:
image_height= 150
image_width = 150
batch_size = 32

In [27]:
# preprocee the image
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [28]:
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='categorical',  
)
test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='categorical',
)

Found 10235 images belonging to 10 classes.
Found 1474 images belonging to 10 classes.


In [29]:
# Model building
model = Sequential([
    Input(shape=(image_height, image_width, 3)),  
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(train_data.class_indices), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [30]:
# Training the model
history = model.fit(
    train_data,
    epochs=10,
    validation_data=test_data
)

Epoch 1/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 851ms/step - accuracy: 0.3623 - loss: 1.8296 - val_accuracy: 0.5739 - val_loss: 1.2212
Epoch 2/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 793ms/step - accuracy: 0.6491 - loss: 1.0088 - val_accuracy: 0.6567 - val_loss: 0.9865
Epoch 3/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 771ms/step - accuracy: 0.7496 - loss: 0.7416 - val_accuracy: 0.6588 - val_loss: 0.9818
Epoch 4/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 765ms/step - accuracy: 0.8189 - loss: 0.5456 - val_accuracy: 0.6784 - val_loss: 0.9512
Epoch 5/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 748ms/step - accuracy: 0.8778 - loss: 0.3553 - val_accuracy: 0.6479 - val_loss: 1.3777
Epoch 6/10
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 787ms/step - accuracy: 0.9174 - loss: 0.2492 - val_accuracy: 0.6588 - val_loss: 1.4979
Epoc

In [31]:
test_loss, test_acc = model.evaluate(test_data)
print(f"Test accuracy: {test_acc}")

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 183ms/step - accuracy: 0.7150 - loss: 1.4204
Test accuracy: 0.7130258083343506


In [32]:
# second part

In [38]:
tourism_rating = pd.read_csv("tourism_rating.csv",  encoding='ISO-8859-1')
tourism_id = pd.read_csv("tourism_with_id.csv",  encoding='ISO-8859-1')
user_data = pd.read_csv("user.csv", encoding='ISO-8859-1')

In [39]:
print("Tourism Rating Dataset:")
print(tourism_rating.head())

Tourism Rating Dataset:
   User_Id  Place_Id  Place_Ratings
0        1       179              3
1        1       344              2
2        1         5              5
3        1       373              3
4        1       101              4


In [40]:
print("\nTourism ID Dataset:")
print(tourism_id.head())


Tourism ID Dataset:
   Place_Id                         Place_Name  \
0         1                   Monumen Nasional   
1         2                           Kota Tua   
2         3                      Dunia Fantasi   
3         4  Taman Mini Indonesia Indah (TMII)   
4         5           Atlantis Water Adventure   

                                         Description       Category     City  \
0  Monumen Nasional atau yang populer disingkat d...         Budaya  Jakarta   
1  Kota tua di Jakarta, yang juga bernama Kota Tu...         Budaya  Jakarta   
2  Dunia Fantasi atau disebut juga Dufan adalah t...  Taman Hiburan  Jakarta   
3  Taman Mini Indonesia Indah merupakan suatu kaw...  Taman Hiburan  Jakarta   
4  Atlantis Water Adventure atau dikenal dengan A...  Taman Hiburan  Jakarta   

    Price  Rating  Time_Minutes  \
0   20000     4.6          15.0   
1       0     4.6          90.0   
2  270000     4.6         360.0   
3   10000     4.5           NaN   
4   94000     4.5     

In [41]:
print("\nUser Dataset:")
print(user_data.head())


User Dataset:
   User_Id                   Location  Age
0        1      Semarang, Jawa Tengah   20
1        2         Bekasi, Jawa Barat   21
2        3        Cirebon, Jawa Barat   23
3        4         Bekasi, Jawa Barat   21
4        5  Lampung, Sumatera Selatan   20


In [42]:
tourism_id = pd.read_csv("tourism_with_id.csv", encoding='ISO-8859-1', usecols=lambda column: column not in ['Unnamed: 11', 'Unnamed: 12'])


In [43]:
print("\nTourism ID Dataset:")
print(tourism_id.head())


Tourism ID Dataset:
   Place_Id                         Place_Name  \
0         1                   Monumen Nasional   
1         2                           Kota Tua   
2         3                      Dunia Fantasi   
3         4  Taman Mini Indonesia Indah (TMII)   
4         5           Atlantis Water Adventure   

                                         Description       Category     City  \
0  Monumen Nasional atau yang populer disingkat d...         Budaya  Jakarta   
1  Kota tua di Jakarta, yang juga bernama Kota Tu...         Budaya  Jakarta   
2  Dunia Fantasi atau disebut juga Dufan adalah t...  Taman Hiburan  Jakarta   
3  Taman Mini Indonesia Indah merupakan suatu kaw...  Taman Hiburan  Jakarta   
4  Atlantis Water Adventure atau dikenal dengan A...  Taman Hiburan  Jakarta   

    Price  Rating  Time_Minutes  \
0   20000     4.6          15.0   
1       0     4.6          90.0   
2  270000     4.6         360.0   
3   10000     4.5           NaN   
4   94000     4.5     

In [45]:
tourism_data = pd.merge(tourism_rating, tourism_id, on="Place_Id")

In [46]:
print(tourism_data.head())

   User_Id  Place_Id  Place_Ratings       Place_Name  \
0        1       179              3  Candi Ratu Boko   
1       22       179              4  Candi Ratu Boko   
2       40       179              3  Candi Ratu Boko   
3       49       179              5  Candi Ratu Boko   
4       74       179              3  Candi Ratu Boko   

                                         Description Category        City  \
0  Situs Ratu Baka atau Candi Boko (Hanacaraka:Í¶...   Budaya  Yogyakarta   
1  Situs Ratu Baka atau Candi Boko (Hanacaraka:Í¶...   Budaya  Yogyakarta   
2  Situs Ratu Baka atau Candi Boko (Hanacaraka:Í¶...   Budaya  Yogyakarta   
3  Situs Ratu Baka atau Candi Boko (Hanacaraka:Í¶...   Budaya  Yogyakarta   
4  Situs Ratu Baka atau Candi Boko (Hanacaraka:Í¶...   Budaya  Yogyakarta   

   Price  Rating  Time_Minutes                               Coordinate  \
0  75000     4.6          90.0  {'lat': -7.7705416, 'lng': 110.4894158}   
1  75000     4.6          90.0  {'lat': -7.7705416

In [47]:
# Build a recommendation engine

In [60]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

user_item_matrix = tourism_data.pivot_table(
    index="User_Id", columns="Place_Name", values="Place_Ratings"
).fillna(0)


In [61]:
print(user_item_matrix.head())

Place_Name  Air Mancur Menari  Air Terjun Kali Pancur  \
User_Id                                                 
1                         0.0                     0.0   
2                         0.0                     0.0   
3                         0.0                     0.0   
4                         0.0                     0.0   
5                         0.0                     0.0   

Place_Name  Air Terjun Kedung Pedut  Air Terjun Semirang  \
User_Id                                                    
1                               0.0                  0.0   
2                               0.0                  0.0   
3                               0.0                  0.0   
4                               0.0                  0.0   
5                               0.0                  0.0   

Place_Name  Air Terjun Sri Gethuk  Alive Museum Ancol  \
User_Id                                                 
1                             0.0                 0.0   
2       

In [62]:
# Calculate user-user similarities using cosine similarity
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(
    user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index
)

# Preview user similarity matrix
print("User Similarity Matrix:")
print(user_similarity_df.head())


User Similarity Matrix:
User_Id       1         2         3         4         5         6         7    \
User_Id                                                                         
1        1.000000  0.058921  0.010902  0.120602  0.041520  0.027104  0.000000   
2        0.058921  1.000000  0.048176  0.000000  0.086006  0.029943  0.011765   
3        0.010902  0.048176  1.000000  0.028665  0.063653  0.011081  0.065302   
4        0.120602  0.000000  0.028665  1.000000  0.032752  0.116877  0.131601   
5        0.041520  0.086006  0.063653  0.032752  1.000000  0.166165  0.000000   

User_Id       8         9         10   ...       291       292       293  \
User_Id                                ...                                 
1        0.090879  0.097536  0.106227  ...  0.055298  0.016140  0.000000   
2        0.059059  0.010775  0.088733  ...  0.072546  0.086185  0.000000   
3        0.101078  0.069780  0.039720  ...  0.007065  0.076983  0.089604   
4        0.073093  0.061550 

In [71]:
def recommend_for_user(user_id, top_n=5):
    if user_id not in user_similarity_df.index:
        return "User not found!"

    
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)

  
    similar_users_ratings = user_item_matrix.loc[similar_users.index]

 
    weighted_ratings = similar_users_ratings.T.dot(similar_users) / similar_users.sum()

   
    user_ratings = user_item_matrix.loc[user_id]
    recommendations = weighted_ratings[user_ratings == 0].sort_values(ascending=False)

    return recommendations.head(top_n)


user_id = 5
recommendations = recommend_for_user(user_id)

print(f"Top recommendations for user {user_id}:")
print(recommendations)


Top recommendations for user 5:
Place_Name
Keraton Surabaya                 0.393517
Gunung Lalakon                   0.383507
Museum Pos Indonesia             0.375689
Geoforest Watu Payung Turunan    0.375465
Alive Museum Ancol               0.369141
dtype: float64
