In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load datasets
city_df = pd.read_csv("Data/City.csv")
places_df = pd.read_csv("Data/Places.csv")

# Merge datasets on 'City' column
merged_df = pd.merge(city_df, places_df, on="
                     ")


In [2]:
merged_df = merged_df.dropna(subset=['Type'])

In [3]:
merged_df.head(3)

Unnamed: 0,City,Ratings_x,Ideal_duration,Best_time_to_visit,City_desc,Type,Place,Ratings_y,Distance,DistanceFromCity,Place_desc
1,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Valley,2. Engage in the Adventures of Solang Valley,4.6,8 km from city center,8.0,Solang Valley is one of the most popular tour...
2,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Waterfall,3. Jogini Waterfall,4.6,4 km from city center,4.0,Jogini Waterfall is located about 3 kilometres...
3,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Temple,4. Hadimba Temple,4.4,1 km from city center,1.0,"Hadimba temple, away from the hustle and bust..."


In [4]:
merged_df["Best_time_to_visit"].unique()

array(['October-June', 'JulyOctober', 'September-June', 'October-March',
       'September-February', 'November-February', 'April-October', nan,
       'September-March', 'SeptemberJune', '-February', 'March-June',
       'November-March', 'October-February', 'OctoberMarch',
       'October-May', 'December-February', 'July-March', '-'],
      dtype=object)

In [5]:
merged_df["Type"].unique()

array(['Valley', 'Waterfall', 'Temple', 'Gufa', 'Culture', 'Lake', 'Trek',
       'Falls', 'Club', 'Sanctuary', 'Estate', 'Museum', 'Palace',
       'Shopping', 'Festival', 'Fort', 'Park', 'Hills', 'Beach', 'Dam',
       'Lighthouse', 'Church', 'Market', 'Ghat', 'Zoo', 'Aquarium',
       'Gram', 'Garden', 'Bazar', 'Smarak', 'Mahal', 'View Point', 'Mutt',
       'Cave', 'Trail'], dtype=object)

In [6]:
merged_df["City"].unique()

array(['Manali', 'Leh Ladakh', 'Coorg', 'Andaman', 'Lakshadweep', 'Goa',
       'Udaipur', 'Srinagar', 'Gangtok', 'Munnar', 'Varkala',
       'Mcleodganj', 'Rishikesh', 'Alleppey', 'Darjeeling', 'Nainital',
       'Shimla', 'Ooty', 'Jaipur', 'Lonavala', 'Mussoorie', 'Kodaikanal',
       'Dalhousie', 'Pachmarhi', 'Varanasi', 'Mumbai', 'Agra', 'Kolkata',
       'Jodhpur', 'Bangalore', 'Amritsar', 'Delhi', 'Jaisalmer',
       'Mount Abu', 'Wayanad', 'Hyderabad', 'Pondicherry', 'Khajuraho',
       'Chennai', 'Vaishno Devi', 'Ajanta and Ellora Caves', 'Haridwar',
       'Kanyakumari', 'Pune', 'Kochi', 'Ahmedabad', 'Mysore',
       'Chandigarh', 'Hampi', 'Gulmarg', 'Almora', 'Shirdi', 'Auli',
       'Madurai', 'Amarnath', 'Bodh Gaya', 'Mahabaleshwar',
       'Visakhapatnam', 'Kasol', 'Nashik', 'Tirupati', 'Ujjain',
       'Jim Corbett National Park', 'Gwalior', 'Mathura', 'Alibaug',
       'Rameshwaram', 'Vrindavan', 'Coimbatore', 'Lucknow', 'Digha',
       'Dharamshala', 'Kovalam', 'Madiker

In [7]:
# Calculate median values for each column
median_ratings_y = merged_df['Ratings_y'].median()
median_distance = merged_df['DistanceFromCity'].median()

# Fill NaN values in specified columns with median
merged_df['Ratings_y'].fillna(median_ratings_y, inplace=True)
merged_df['DistanceFromCity'].fillna(median_distance, inplace=True)
merged_df['Best_time_to_visit'].fillna(method='ffill', inplace=True)
merged_df['City_desc'].fillna(method='ffill', inplace=True)
merged_df['Distance'].fillna(method='ffill', inplace=True)
merged_df['City_desc'].fillna(method='ffill', inplace=True)
merged_df['Place_desc'].fillna(method='ffill', inplace=True)

In [8]:
merged_df.isna().sum()

City                  0
Ratings_x             0
Ideal_duration        0
Best_time_to_visit    0
City_desc             0
Type                  0
Place                 0
Ratings_y             0
Distance              0
DistanceFromCity      0
Place_desc            0
dtype: int64

In [9]:
merged_df.reset_index(drop=True, inplace=True)
merged_df['Place'] = merged_df['Place'].str.replace(r'^\d+\.\s*', '', regex=True)

In [10]:
merged_df.head(3)

Unnamed: 0,City,Ratings_x,Ideal_duration,Best_time_to_visit,City_desc,Type,Place,Ratings_y,Distance,DistanceFromCity,Place_desc
0,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Valley,Engage in the Adventures of Solang Valley,4.6,8 km from city center,8.0,Solang Valley is one of the most popular tour...
1,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Waterfall,Jogini Waterfall,4.6,4 km from city center,4.0,Jogini Waterfall is located about 3 kilometres...
2,Manali,4.5,2-4,October-June,[' One of the most popular hill stations in Hi...,Temple,Hadimba Temple,4.4,1 km from city center,1.0,"Hadimba temple, away from the hustle and bust..."


In [11]:
merged_df.tail(3)

Unnamed: 0,City,Ratings_x,Ideal_duration,Best_time_to_visit,City_desc,Type,Place,Ratings_y,Distance,DistanceFromCity,Place_desc
1614,Poovar,4.3,1,-,[' Poovar is a small rustic town situated 27 k...,Falls,Thirparappu Falls,4.0,13 km from city center,13.0,Located at a distance of about 55 kilometres ...
1615,Poovar,4.3,1,-,[' Poovar is a small rustic town situated 27 k...,Shopping,Shopping in Poovar,3.0,0 km from city center,0.0,Poovar is a small coastal area where one does...
1616,Poovar,4.3,1,-,[' Poovar is a small rustic town situated 27 k...,Beach,Kovalam Beach,3.1,12 km from city center,12.0,A captivating beach with nature's beauty at i...


In [12]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1617 entries, 0 to 1616
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   City                1617 non-null   object 
 1   Ratings_x           1617 non-null   float64
 2   Ideal_duration      1617 non-null   object 
 3   Best_time_to_visit  1617 non-null   object 
 4   City_desc           1617 non-null   object 
 5   Type                1617 non-null   object 
 6   Place               1617 non-null   object 
 7   Ratings_y           1617 non-null   float64
 8   Distance            1617 non-null   object 
 9   DistanceFromCity    1617 non-null   float64
 10  Place_desc          1617 non-null   object 
dtypes: float64(3), object(8)
memory usage: 139.1+ KB


# Predictive Model

In [13]:
#merged_df.to_csv("Data/training_data.csv")

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Assuming df is the DataFrame containing the data
df = pd.read_csv("Data/mahat.csv")

# Drop irrelevant columns and rows with missing values
df = df.dropna(subset=['Best_time_to_visit', 'Type', 'DistanceFromCity', 'Place'])

# Encode categorical variables
label_encoders = {}
for col in ['Best_time_to_visit', 'Type', 'DistanceFromCity', 'Place', 'City']:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Split the data into features (X) and target (y)
X = df[['City',  'Type']]
y = df['Place']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Example prediction
user_input = {'City': 'Ahmednagar', 'Type': 'Shopping'}
user_input_encoded = {col: label_encoders[col].transform([user_input[col]])[0] if user_input[col] in label_encoders[col].classes_ else -1 for col in user_input}
predicted_place_encoded = clf.predict([list(user_input_encoded.values())])[0]
predicted_place = label_encoders['Place'].inverse_transform([predicted_place_encoded])[0]
print("Predicted place to visit:", predicted_place)

Accuracy: 0.00911854103343465
Predicted place to visit: Radhe Shopping Mall




In [15]:
df = pd.read_csv("Data/training_data.csv")

predicted_row = df[df['Place'] == predicted_place]

# Retrieve the corresponding City and Place_desc
recommended_city = predicted_row['City'].iloc[0]
recommended_place_desc = predicted_row['Place_desc'].iloc[0]
recommended_place_ratings = predicted_row['Ratings_y'].iloc[0]

# Print the results
print("Recommended place to visit:", predicted_place, "\n")
print("Recommended City:", recommended_city, "\n")
print("Place Description:", recommended_place_desc, "\n")
print("Ratings:", recommended_place_ratings, "\n")

Recommended place to visit: Radhe Shopping Mall 

Recommended City: Ahmedabad 

Place Description: Enjoy shopping at Radhe Shopping Mall for a great experience and a variety of brands to choose from. 

Ratings: 3.0 



# Recommender Engine

In [16]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Convert 'Best_time_to_visit' to dummy variables using one-hot encoding
df = pd.get_dummies(merged_df, columns=['Best_time_to_visit'])

# Concatenate 'City' and numerical features into a single feature vector
df['features'] = df['City'] + ' ' + df['Ratings_y'].astype(str) + ' ' + df['DistanceFromCity'].astype(str)

# TF-IDF Vectorization
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['features'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [18]:
# Function to recommend places based on similarity
def recommend(place, cosine_sim=cosine_sim):
    places = df[df['Place'] == place]
    
    if places.empty:
        print(f"Error: Place '{place}' not found in the dataset.")
        return
    
    idx = places.index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    place_indices = [i[0] for i in sim_scores]
    return df['Place'].iloc[place_indices]


# Example: Recommend places similar to 'Manali Valley'
recommendations = recommend(predicted_place)
if recommendations is not None:
    print(recommendations)

944                       Radhe Shopping Mall
930                             Kankaria Lake
932                Bhadra Fort & Teen Darwaza
934                    Hathisingh Jain Temple
935                   Law Garden Night Market
936                             City Shopping
937                     Calico Textile Museum
938    Sardar Vallabhai Patel National Museum
940                          Kamala Nehru Zoo
941                             ISKCON Temple
Name: Place, dtype: object
