In [99]:
import pandas as pd
import numpy as np
from itertools import chain
import re
import math
from math import cos, sin, atan2, sqrt
import plotly.express as px
import datetime
from datetime import timedelta, datetime
from plotly.express import timeline
import pickle

# Function to get key from a dictionary based on value
def dict_index_key(val_to_find, DICT):
    for k, v in DICT.items():
        if v == val_to_find:
            return k

# Function to get the next minimum value from a list (negated)
def next_min(lis):
    m = [x * (-1) for x in lis]
    return min(m)

# Function to generate PID from index
def get_pid_from_index(no):
    return 'POI' + str(no + 1)

# Function to extract POI from DataFrame
def get_place(POI, chennai_poi_df):
    c = chennai_poi_df.loc[POI, 'POIs']
    return c


In [100]:
# Load CSV files for Chennai POI data and distance matrix
def load_data():
    chennai_poi_df = pd.read_csv('data/chennai-poi.csv')
    dist_only_matrix_df = pd.read_csv('data/dist_only_matrix.csv')

    # Set PID as index for dataframes
    chennai_poi_df.set_index('PID', inplace=True)
    dist_only_matrix_df.set_index('PID', inplace=True)

    return chennai_poi_df, dist_only_matrix_df

# Example of calling the data loading function
chennai_poi_df, dist_only_matrix_df = load_data()


In [101]:
# Helper function to extract ID part from PID
def extract(st):
    if len(st) == 8:
        return st[2:6]
    else:
        return st[2:7]

# Function to find the distance between two POIs
def poi_finddist(poi1, poi2, chennai_poi_df, dist_only_matrix_df):
    POI1 = poi1.upper()
    POI2 = poi2.upper()

    # Extract the indices for the POIs
    P1 = extract(np.array_str(chennai_poi_df[chennai_poi_df['POIs'] == POI1].index.values))
    P2 = extract(np.array_str(chennai_poi_df[chennai_poi_df['POIs'] == POI2].index.values))

    x = int(P1[3:]) - 1
    y = int(P2[3:]) - 1

    # Return distance based on the order of indices
    if x >= y:
        return dist_only_matrix_df.iloc[x, y]
    else:
        return dist_only_matrix_df.iloc[y, x]


In [102]:
# Define vacation types with associated categories
vac_type = {
    'Adventure and Outdoors': ['Adventure'],
    'Spiritual': ['Religious'],
    'Relaxing': ['Scenic'],
    'City Life': ['Food and Drinks', 'Shopping', 'Shows and Concerts'],
    'Cultural': ['Local Experiences', 'History and Culture', 'Museum']
}

# Function to print user information
def user_info():
    type_print = []
    type_print.append(str('User type choices: '))

    for i in range(0,len(Type)):
      type_print.append(str(str(i+1) +'. '+ Type[i]))

    type_print.append(str('No. of days: '+ str(Duration)))
    type_print.append(str('Budget: '+ str(Budget)))
    type_print.append(str('No. of POIs: '+str(no_of_pois)))
    type_print.append(str('Type: '+TYPE))
    if (Ques == 'y'):
      type_print.append(str('Covering maximum places is a priority.'))
    else:
      type_print.append(str('Covering maximum places is NOT a priority.'))
      #type_print.append(str('Suggested Hotel/Accomodation: '+nearest_hotel))
    type_print.append(str(nearest_hotel))
    
    return type_print

In [103]:
# Sample variable definitions
Type = ['Adventure']  # Example user types
Duration = 5                                   # Example duration in days
Budget = 1500                                  # Example budget
TYPE = 'Adventure'                             # Example trip type
Ques = 'y'                                    # Example priority question response
no_of_pois = 10                               # Number of points of interest
nearest_hotel = 'Hotel XYZ'                   # Suggested hotel/accommodation

# Function definition
def user_info(Type, Duration, Budget, TYPE, Ques, no_of_pois, nearest_hotel):
    type_print = []
    type_print.append('User type choices: ')

    for i in range(0, len(Type)):
        type_print.append(f'{i+1}. {Type[i]}')

    type_print.append(f'No. of days: {Duration}')
    type_print.append(f'Budget: {Budget}')
    type_print.append(f'No. of POIs: {no_of_pois}')
    type_print.append(f'Type: {TYPE}')

    if Ques == 'y':
        type_print.append('Covering maximum places is a priority.')
    else:
        type_print.append('Covering maximum places is NOT a priority.')

    type_print.append(f'Suggested Hotel/Accommodation: {nearest_hotel}')
    
    return type_print

# Example call
user_info_output = user_info(Type, Duration, Budget, TYPE, Ques, no_of_pois, nearest_hotel)
print(user_info_output)


['User type choices: ', '1. Adventure', 'No. of days: 5', 'Budget: 1500', 'No. of POIs: 10', 'Type: Adventure', 'Covering maximum places is a priority.', 'Suggested Hotel/Accommodation: Hotel XYZ']


In [104]:
# Example vacation type mapping
vac_type = {
    'Adventure and Outdoors': ['Adventure'],
    'Spiritual': ['Religious'],
    'Relaxing': ['Scenic'],
    'City Life': ['Food and Drinks', 'Shopping', 'Shows and Concerts'],
    'Cultural': ['Local Experiences', 'History and Culture', 'Museum']
}

# Example user type choices
Type = ['Adventure and Outdoors', 'Cultural']  # Ensure these keys match those in vac_type

def create_user_matrix(Type, vac_type):
    user_matrix = {
        'Shows and Concerts': 0, 'Scenic': 0, 'Local Experiences': 0,
        'Religious': 0, 'History and Culture': 0, 'Museum': 0,
        'Food and Drinks': 0, 'Adventure': 0, 'Shopping': 0
    }

    # Creating user 1D array of vacation types according to input choices
    for i in range(len(Type)):
        pvalue = 5 - i
        if Type[i] in vac_type:  # Check if the type is a valid key
            for j in vac_type[Type[i]]:
                if j == 'History and Culture':
                    user_matrix[j] = pvalue + (0.075 * pvalue)
                elif j == 'Local Experiences':
                    user_matrix[j] = pvalue + (0.055 * pvalue)
                else:
                    user_matrix[j] = pvalue
        else:
            print(f"Warning: '{Type[i]}' not found in vac_type")  # Informative message

    return user_matrix

# Example call
user_matrix = create_user_matrix(Type, vac_type)
print(user_matrix)


{'Shows and Concerts': 0, 'Scenic': 0, 'Local Experiences': 4.22, 'Religious': 0, 'History and Culture': 4.3, 'Museum': 4, 'Food and Drinks': 0, 'Adventure': 5, 'Shopping': 0}


In [105]:
# Convert user matrix to DataFrame
def user_matrix_to_dataframe(user_matrix):
    val = list(user_matrix.values())
    typ = list(user_matrix.keys())  # to be used in cosine similarity
    TEMP = [typ, val]
    
    user_df = pd.DataFrame([val], columns=typ)
    return user_df

# Example call
user_df = user_matrix_to_dataframe(user_matrix)
print(user_df)


   Shows and Concerts  Scenic  Local Experiences  Religious  \
0                   0       0               4.22          0   

   History and Culture  Museum  Food and Drinks  Adventure  Shopping  
0                  4.3       4                0          5         0  


In [106]:
# Helper function to calculate centered cosine values for a list
def cen_cos_h(lis):
    avg = sum(lis) / len(lis)
    return [x - avg for x in lis]

# Cosine similarity formula for two lists
def cencos_formula_h(LIS1, LIS2):
    lis1 = cen_cos_h(LIS1)
    lis2 = cen_cos_h(LIS2)

    prod_lis = [lis1[i] * lis2[i] for i in range(len(lis1)) if lis1[i] != 0 and lis2[i] != 0]

    sum_sq_lis1 = sum([x ** 2 for x in lis1])
    sum_sq_lis2 = sum([x ** 2 for x in lis2])

    sqrt_lis1 = math.sqrt(sum_sq_lis1)
    sqrt_lis2 = math.sqrt(sum_sq_lis2)

    num = sum(prod_lis)
    den = sqrt_lis1 * sqrt_lis2

    try:
        cos = num / den
    except ZeroDivisionError:
        cos = 0

    return cos

# Example call
result = cencos_formula_h([1, 2, 3], [4, 5, 6])
print(result)


0.9999999999999998


In [107]:
# Load J_priority_mapping.csv
def load_priority_mapping():
    J_priority_df = pd.read_csv('data/J_priority_mapping.csv')
    J_priority_df.set_index('PID', inplace=True)
    return J_priority_df

# Example call
J_priority_df = load_priority_mapping()
print(J_priority_df.head())


                        POI  Shows and Concerts  Scenic  Local Experiences  \
PID                                                                          
POI1           Marina Beach                   0       5                  3   
POI2        Fort St. George                   0       3                  2   
POI3     Besant Nagar Beach                   0       4                  3   
POI4  Kapaleeshwarar Temple                   0       0                  4   
POI5          Madras Museum                   0       0                  2   

      Religious  History and Culture  Museum  Food and Drinks  Adventure  \
PID                                                                        
POI1          0                    2       0                4          3   
POI2          0                    5       4                0          1   
POI3          0                    2       0                4          2   
POI4          5                    0       0                0          0 

In [108]:
import pandas as pd

# Sample DataFrame creation for J_priority_df
data = {
    'POI': ['POI1', 'POI2', 'POI3'],
    'Feature1': [0.8, 0.6, 0.9],
    'Feature2': [0.4, 0.5, 0.7],
    'Feature3': [0.3, 0.9, 0.5]
}

J_priority_df = pd.DataFrame(data)

# Example user values (val) - this should match the number of features in J_priority_df
val = [0.7, 0.5, 0.6]  # Example user preferences for the features

# Example cosine similarity function
def cencos_formula_h(poi_values, user_values):
    # Calculate cosine similarity here (this is a placeholder)
    from numpy import dot
    from numpy.linalg import norm
    return dot(poi_values, user_values) / (norm(poi_values) * norm(user_values))

# Function to calculate cosine similarity
def calculate_cosine_similarity(J_priority_df, user_values, cencos_formula_h):
    cos_sim_list_h = []
    cos_sim_dict_h = {}

    for i in range(len(J_priority_df.iloc[:, 1:])):  # Skip the first column (POI names)
        poi_values = list(J_priority_df.iloc[i, 1:])  # Extract POI feature values
        result = cencos_formula_h(poi_values, user_values)  # Calculate cosine similarity
        cos_sim_list_h.append(result)
        cos_sim_dict_h[i] = result

    return cos_sim_list_h, cos_sim_dict_h

# Example call
cos_sim_list_h, cos_sim_dict_h = calculate_cosine_similarity(J_priority_df, val, cencos_formula_h)
print(cos_sim_dict_h)


{0: 0.9500282238697605, 1: 0.9681549171811153, 2: 0.9802746637663793}


In [109]:
# Select and filter POIs based on cosine similarity and user preference
def decide_places(cos_sim_dict_h, Duration, Ques):
    selected = {k: v for k, v in cos_sim_dict_h.items() if v > 0}
    no_of_pois = len(selected)

    # Sort places by priority
    sorted_selected = dict(sorted(selected.items(), key=lambda item: item[1], reverse=True))

    # Adjust POI count based on user preferences
    max_pois_per_day = 6 if Ques == 'y' else 3
    while (no_of_pois / Duration) > max_pois_per_day:
        sorted_selected.popitem()
        no_of_pois = len(sorted_selected)

    return sorted_selected, no_of_pois

# Example call
sorted_selected, no_of_pois = decide_places(cos_sim_dict_h, Duration, Ques)
print(sorted_selected)


{2: 0.9802746637663793, 1: 0.9681549171811153, 0: 0.9500282238697605}


In [110]:
import pandas as pd

# Sample DataFrame for chennai_poi_df
chennai_poi_data = {
    'POIs': ['Beach', 'Temple', 'Museum', 'Park'],
}
# Assuming the POIs are indexed by 'POI1', 'POI2', etc.
chennai_poi_df = pd.DataFrame(chennai_poi_data, index=['POI1', 'POI2', 'POI3', 'POI4'])

# Sample sorted_selected dictionary (mapping index to priority)
sorted_selected = {
    0: 10,  # Example: POI1 has the highest priority
    1: 8,   # Example: POI2 has the second highest priority
    2: 6,   # Example: POI3 has the third highest priority
    3: 4    # Example: POI4 has the lowest priority
}

# Create a DataFrame for the selected POIs with priorities
def create_selected_pois_df(sorted_selected, chennai_poi_df):
    ll = [[k, v] for k, v in sorted_selected.items()]
    sorted_selected_df = pd.DataFrame(ll)
    sorted_selected_df.set_index(0, inplace=True)
    sorted_selected_df.columns = ['sorted priority']

    # Add PID and POIs to the DataFrame
    l1, l2 = [], []
    for index in sorted_selected_df.index:
        pid = 'POI' + str(index + 1)
        l1.append(pid)
        l2.append(chennai_poi_df.loc[pid, 'POIs'])

    sorted_selected_df['PID'] = l1
    sorted_selected_df['POIs'] = l2

    return sorted_selected_df

# Example call
sorted_selected_df = create_selected_pois_df(sorted_selected, chennai_poi_df)
print(sorted_selected_df)


   sorted priority   PID    POIs
0                               
0               10  POI1   Beach
1                8  POI2  Temple
2                6  POI3  Museum
3                4  POI4    Park


In [111]:
import pandas as pd

# Load sorted_selected_df from CSV file
sorted_selected_df = pd.read_csv('data/sorted_selected_df.csv')

# Ensure the 'PID' column is present and correctly formatted
# You might need to adjust the header or index column based on your CSV structure
sorted_selected_df.set_index('PID', inplace=True)

# Sample DataFrame for lat_lng_df containing latitude and longitude
lat_lng_data = {
    'Lat,Lng': ['13.0678,80.2785', '13.0358,80.2498', '13.0604,80.2448', '13.0064,80.2264']
}
# Assuming the POIs are indexed by 'POI1', 'POI2', etc.
lat_lng_df = pd.DataFrame(lat_lng_data, index=['POI1', 'POI2', 'POI3', 'POI4'])

# Function to calculate center coordinates
def calculate_center_coordinates(selected_pois_df, lat_lng_df):
    selected_coord = {}
    for pid in selected_pois_df.index:  # Use index to get PID
        selected_coord[pid] = lat_lng_df.loc[pid, 'Lat,Lng']

    coord_pairs = [list(map(float, v.split(','))) for v in selected_coord.values()]

    return coord_pairs

# Find the center of the coordinates
def find_center(coord_pairs):
    sum_lat = sum([i for i, j in coord_pairs])
    sum_lng = sum([j for i, j in coord_pairs])
    
    return [sum_lat / len(coord_pairs), sum_lng / len(coord_pairs)]

# Example call
coord_pairs = calculate_center_coordinates(sorted_selected_df, lat_lng_df)
center_coordinates = find_center(coord_pairs)
print(center_coordinates)


[13.0426, 80.249875]


In [112]:
# Save the sorted selected POIs to a CSV file
def save_selected_pois_df(sorted_selected_df, file_path='data/sorted_selected_df.csv'):
    sorted_selected_df.to_csv(file_path)

# Example call
save_selected_pois_df(sorted_selected_df)


In [113]:
# Load hotel data and latitudes
def load_hotel_data():
    chennai_Hotels_df = pd.read_csv('data/chennai_Hotels.csv')
    h_lat_df = pd.read_csv('data/h_lat_df.csv')
    h_lat_df.set_index('HID', inplace=True)
    return chennai_Hotels_df, h_lat_df

# Convert index to HID
def ind_to_id(ind):
    return 'HID' + str(ind + 1)

# Convert HID to index
def id_to_ind(hid):
    return int(hid[3:]) - 1

# Calculate the distance between two latitude/longitude points
def calculate_distance(l1, l2):
    R = 6373.0
    lat1, lon1 = map(math.radians, l1)
    lat2, lon2 = map(math.radians, l2)
    dlon, dlat = lon2 - lon1, lat2 - lat1
    a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c  # in kilometers

# Example call
chennai_Hotels_df, h_lat_df = load_hotel_data()


In [121]:
import pandas as pd
import math

# Example DataFrame for lat_lng_df
lat_lng_data = {
    'Lat,Lng': ['13.0678,80.2785', '13.0358,80.2498', '13.0604,80.2448', '13.0064,80.2264']
}
lat_lng_df = pd.DataFrame(lat_lng_data, index=['POI1', 'POI2', 'POI3', 'POI4'])

# Sample sorted_selected_df DataFrame
sorted_selected_df = pd.DataFrame({
    'sorted priority': [10, 8, 6, 4],
    'PID': ['POI1', 'POI2', 'POI3', 'POI4'],
    'POIs': ['Beach', 'Temple', 'Museum', 'Park']
}).set_index('PID')

# Calculate center coordinates
def calculate_center_coordinates(selected_pois_df, lat_lng_df):
    selected_coord = {}
    for pid in selected_pois_df.index:
        selected_coord[pid] = lat_lng_df.loc[pid, 'Lat,Lng']

    coord_pairs = [list(map(float, v.split(','))) for v in selected_coord.values()]
    return coord_pairs

def find_center(coord_pairs):
    sum_lat = sum([i for i, j in coord_pairs])
    sum_lng = sum([j for i, j in coord_pairs])
    
    return [sum_lat / len(coord_pairs), sum_lng / len(coord_pairs)]

# Calculate the center
coord_pairs = calculate_center_coordinates(sorted_selected_df, lat_lng_df)
result = find_center(coord_pairs)  # This should now return [latitude, longitude]

# Validate and use result
if isinstance(result, (list, tuple)) and len(result) == 2:
    center = result
else:
    print("Error: The result does not contain valid center coordinates.")

# Prepare hotel latitude/longitude
h_lat_data = {
    'Lat,Lng': ['13.0678,80.2785', '13.0358,80.2498']  # Example hotel data
}
h_lat_df = pd.DataFrame(h_lat_data)

def prepare_hotel_lat_lng_list(h_lat_df):
    return [list(map(float, lat_lng.split(','))) for lat_lng in h_lat_df['Lat,Lng']]

# Calculate distances
h_lat_lis = prepare_hotel_lat_lng_list(h_lat_df)
hotel_distances = calculate_distances_from_center(center, h_lat_lis)  # Use validated center
print(hotel_distances)


[4.1805555816444, 0.7564065278808132]


In [115]:
# Filter hotels within a certain distance threshold
def filter_by_distance_threshold(threshold_km, hotel_distances, chennai_Hotels_df):
    # Get indices of hotels within the distance threshold
    filtered_indices = [i for i, dist in enumerate(hotel_distances) if dist <= threshold_km]
    
    # Use indices to filter the DataFrame
    filtered_hotels = chennai_Hotels_df.iloc[filtered_indices]
    return filtered_hotels

# Sort hotels based on desirability and price
def sort_hotels_by_priority(hotel_df):
    # Sort first by desirability (descending) and then by price (ascending)
    sorted_hotels = hotel_df.sort_values(by=['Desirable', 'Price'], ascending=[False, True])
    return sorted_hotels

# Example call
threshold_km = 6  # Set the distance threshold
filtered_hotels = filter_by_distance_threshold(threshold_km, hotel_distances, chennai_Hotels_df)
sorted_hotels = sort_hotels_by_priority(filtered_hotels)  # Sort the filtered hotels
print(sorted_hotels)


    HID                        Hotel    Price  Number of Reviews  Desirable
1  HID2               Hilton Chennai  2771.67               4358   4.057939
0  HID1  Hotel Anuraag Villa Chennai   898.92               1305   3.690358


In [116]:
import pandas as pd

# Budget percent table based on user type and budget
def get_percent_table():
    percent_table = {
        '10k': [70, 60, 55],
        '15k': [65, 55, 50],
        '20k': [60, 50, 45],
        '25k': [60, 40, 40],
        '30k': [60, 45, 40],
        '45k': [60, 50, 50],
        '60k': [65, 55, 60],
        'greater': [70, 60, 70]
    }
    percent_table_df = pd.DataFrame(percent_table, index=['Family', 'Friends', 'Individual']) * 0.01
    return percent_table_df

# Determine percent threshold based on budget
def percent_check(budget, user_type, percent_table_df):
    if budget <= 10000:
        column = '10k'
    elif 10000 < budget <= 15000:
        column = '15k'
    elif 15000 < budget <= 20000:
        column = '20k'
    elif 20000 < budget <= 25000:
        column = '25k'
    elif 25000 < budget <= 30000:
        column = '30k'
    elif 30000 < budget <= 45000:
        column = '45k'
    elif 45000 < budget <= 60000:
        column = '60k'
    else:
        column = 'greater'

    return percent_table_df.loc[user_type, column]

# Example call
percent_table_df = get_percent_table()

# User input example
budget = 22000  # Example budget
user_type = 'Friends'  # Example user type

percent_threshold = percent_check(budget, user_type, percent_table_df)

# Output the result
print(f"The percent threshold for a budget of {budget} for {user_type} is: {percent_threshold}")


The percent threshold for a budget of 22000 for Friends is: 0.4


In [117]:
# Check if a hotel fits within the user's budget
def is_hotel_suitable(hid, budget, duration, chennai_Hotels_df, percent_threshold):
    ind = id_to_ind(hid)
    price_per_night = chennai_Hotels_df.loc[ind, 'Price']
    total_price = price_per_night * duration
    price_percent = total_price / budget

    return price_percent >= percent_threshold

# Example call
hotel_suitability = is_hotel_suitable('HID90', Budget, Duration, chennai_Hotels_df, percent_threshold)
print(hotel_suitability)


True


In [125]:
import pandas as pd

# Load hotel data from CSV with 'HID' as the index
def load_hotel_data(file_path):
    df = pd.read_csv(file_path)
    
    # Set 'HID' as the index
    if 'HID' in df.columns:
        df.set_index('HID', inplace=True)
    else:
        raise ValueError("The CSV does not contain the 'HID' column.")
    
    return df

# Filter hotels within a certain distance threshold
def filter_by_distance_threshold(threshold_km, hotel_distances):
    filtered_hotels = [hotel_id for hotel_id, distance in hotel_distances.items() if distance <= threshold_km]
    return filtered_hotels

# Check if a hotel fits within the user's budget
def is_hotel_suitable(hid, budget, duration, chennai_Hotels_df, percent_threshold):
    # Check if hotel exists in the dataframe
    if hid in chennai_Hotels_df.index:
        price_per_night = chennai_Hotels_df.loc[hid, 'Price']
        total_price = price_per_night * duration
        price_percent = total_price / budget
        return price_percent <= percent_threshold
    return False

# Main function to select the final hotel based on all conditions
def select_final_hotel(des_pri, dist_pri, sorted_hotels, hotel_dis_result_dict, budget, duration, chennai_Hotels_df, percent_threshold):
    for des_threshold in des_pri:
        one_row = sorted_hotels.nlargest(des_threshold, 'Desirable')  # Sort hotels by desirability

        for distance_threshold in dist_pri:
            dist_selected = filter_by_distance_threshold(distance_threshold, hotel_dis_result_dict)

            if not dist_selected:
                continue

            price_dict = {k: chennai_Hotels_df.loc[k, 'Price'] for k in dist_selected if k in chennai_Hotels_df.index}
            sorted_prices = sorted(price_dict.items(), key=lambda x: x[1], reverse=True)

            for hotel_id, price in sorted_prices:
                if is_hotel_suitable(hotel_id, budget, duration, chennai_Hotels_df, percent_threshold):
                    return hotel_id

    return None  # No suitable hotel found

# Example initialization of required variables
des_pri = [2]  # Example desired priorities
dist_pri = [5, 10]  # Example distance priorities
Budget = 30000  # User's budget
Duration = 3    # Duration of stay in nights
user_type = 'Family'  # Example user type

# Load hotel data from CSV
chennai_Hotels_df = load_hotel_data('data/Chennai_Hotels.csv')

# Example distance dictionary (Hotel ID -> distance)
hotel_dis_result_dict = {
    'HID1': 4.5,
    'HID2': 6.0,
    'HID3': 5.0
}

# Get the budget percent table
percent_table_df = get_percent_table()
percent_threshold = percent_check(Budget, user_type, percent_table_df)

# Example call to select final hotel
FINAL_HID = select_final_hotel(des_pri, dist_pri, chennai_Hotels_df, hotel_dis_result_dict, Budget, Duration, chennai_Hotels_df, percent_threshold)
print(FINAL_HID)


HID3


In [76]:
# Ensure chennai_Hotels_df is loaded correctly before using it in this module
# Example loading from CSV, adjust the path as necessary.
chennai_Hotels_df = pd.read_csv('data/chennai_hotels.csv')

# Hotel Identification Function
def identify_hotel(FINAL_HID, chennai_Hotels_df):
    INDEX = id_to_ind(FINAL_HID)  # id_to_ind should be defined elsewhere in your code
    nearest_hotel = chennai_Hotels_df.iloc[INDEX, 1]
    return nearest_hotel

# Example call (ensure you pass FINAL_HID when you know its value)
nearest_hotel = identify_hotel(FINAL_HID, chennai_Hotels_df)
print("Nearest Hotel:", nearest_hotel)


Nearest Hotel: Trident, Chennai


In [126]:
print(time_slots_df.head())
print(time_slots_df.dtypes)


          0   1   2   3   4   5   6   7   8   9  ...  56  57  58  59  60  61  \
Section                                          ...                           
1       NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN  ... NaN NaN NaN NaN NaN NaN   
2       NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN  ... NaN NaN NaN NaN NaN NaN   
3       NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN  ... NaN NaN NaN NaN NaN NaN   
4       NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN  ... NaN NaN NaN NaN NaN NaN   
5       NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN  ... NaN NaN NaN NaN NaN NaN   

         62  63  64  65  
Section                  
1       NaN NaN NaN NaN  
2       NaN NaN NaN NaN  
3       NaN NaN NaN NaN  
4       NaN NaN NaN NaN  
5       NaN NaN NaN NaN  

[5 rows x 66 columns]
0     float64
1     float64
2     float64
3     float64
4     float64
       ...   
61    float64
62    float64
63    float64
64    float64
65    float64
Length: 66, dtype: object


In [128]:
import pandas as pd
import numpy as np

# Function to generate Gantt chart data
def generate_gantt_chart(timepoisnew, sorted_selected_df):
    main = []
    
    # Loop over POIs in timepoisnew
    for i in range(len(timepoisnew)):
        # Check for non-null start and finish times
        if pd.notna(timepoisnew.iloc[i, 2]) and pd.notna(timepoisnew.iloc[i, 3]):
            n = 2  # Assuming there are two time slots for POIs
        elif pd.notna(timepoisnew.iloc[i, 2]) and pd.isna(timepoisnew.iloc[i, 3]):
            n = 1  # Assuming one time slot for POIs
        else:
            continue  # Skip if no valid time slots
        
        # Append data for each time slot
        for j in range(n):
            temp = []
            temp.append(timepoisnew.iloc[i, 0])  # PID (or POI)
            temp.append(timepoisnew.iloc[i, 2 + j * 2])  # Start time
            temp.append(timepoisnew.iloc[i, 3 + j * 2])  # Finish time
            main.append(temp)
    
    # Create a DataFrame from the Gantt chart data
    timepoiplotlydf = pd.DataFrame(main, columns=['PID', 'Start', 'Finish'])
    
    # Convert 'Start' and 'Finish' to datetime for proper handling
    timepoiplotlydf['Start'] = pd.to_datetime(timepoiplotlydf['Start'], format='%H:%M', errors='coerce')
    timepoiplotlydf['Finish'] = pd.to_datetime(timepoiplotlydf['Finish'], format='%H:%M', errors='coerce')

    return timepoiplotlydf

# Example call
timepoiplotlydf = generate_gantt_chart(timepoisnew, sorted_selected_df)
print(timepoiplotlydf)


      PID               Start              Finish
0    POI1 1900-01-01 07:00:00 1900-01-01 02:45:00
1    POI1 1900-01-01 10:00:00 1900-01-01 18:30:00
2    POI2 1900-01-01 07:30:00                 NaT
3    POI3 1900-01-01 08:00:00                 NaT
4    POI4 1900-01-01 07:30:00                 NaT
..    ...                 ...                 ...
78  POI69 1900-01-01 09:00:00                 NaT
79  POI70 1900-01-01 04:00:00                 NaT
80  POI71 1900-01-01 23:59:00                 NaT
81  POI72 1900-01-01 12:00:00                 NaT
82  POI73 1900-01-01 09:00:00                 NaT

[83 rows x 3 columns]


In [135]:
from geopy.distance import geodesic

# Function to calculate geodesic distance
def distance(point1, point2):
    """
    point1, point2: tuples containing latitude and longitude (lat, lng)
    Returns: distance in kilometers between point1 and point2.
    """
    return geodesic(point1, point2).kilometers

# Function to calculate distance from hotel (centre) to each POI in the dictionary
def dis_from_centre2(centre, dicti):
    """
    centre: a tuple (lat, lng) representing the hotel location.
    dicti: a dictionary where keys are POI names/IDs and values are tuples of (lat, lng).
    Returns: a dictionary where keys are POI names/IDs and values are the distance from the hotel.
    """
    centre_dict = {}
    for k, v in dicti.items():
        dis = distance(centre, v)  # Calculate distance between the hotel and each POI
        centre_dict[k] = dis
    return centre_dict

# Example data
nearest_hotel = (13.0827, 80.2707)  # Example: Chennai coordinates
sorted_selected = {
    'POI1': (13.067439, 80.237617),  # Example POI 1
    'POI2': (13.0358, 80.2457),      # Example POI 2
    'POI3': (13.0524, 80.2503)       # Example POI 3
}

# Example call
dummy_poi_dis = dis_from_centre2(nearest_hotel, sorted_selected)
print(dummy_poi_dis)


{'POI1': 3.9653074155710737, 'POI2': 5.854365332783321, 'POI3': 4.016444145513646}


In [137]:
import pandas as pd

# Placeholder function: handles edge cases related to time slot data
def easy_on_the_len(lst):
    """
    This function processes the list by removing potential NaN or irrelevant values.
    It ensures only valid time slots are kept for further filtering.
    """
    return [x for x in lst if pd.notna(x)]  # Filter out NaN values or any invalid entries

# Function to filter POIs based on time slots
def filter_pois(df, time_slots_df):
    """
    df: DataFrame containing POIs and their relevant data
    time_slots_df: DataFrame containing time slots for different POIs
    Returns: A DataFrame where each row represents the POIs that match the time slot sections
    """
    s_lis = []
    for i in range(len(df)):
        s_lis.append(df.iloc[i, 1])  # Collect POI names from the df (assumed in column index 1)
    
    a_of_a = []
    for i in range(len(time_slots_df)):  # Iterate through sections of time slots
        b = easy_on_the_len(list(time_slots_df.iloc[i, :]))  # Clean up time slots data for this section
        a = []
        for k in range(len(s_lis)):  # Check if the selected POIs are in the cleaned time slots list
            if s_lis[k] in b:
                a.append(s_lis[k])  # Keep the POI if it matches the time slot
        a_of_a.append(a)  # Append filtered POIs for this time slot section

    # Convert filtered results into a DataFrame
    DF = pd.DataFrame(a_of_a)
    return DF

# Example usage with assumed sorted_selected_df and timepoiplotlydf
# Ensure 'sorted_selected_df' has POI data and 'timepoiplotlydf' has time slot data
selected_time_slots = filter_pois(sorted_selected_df, timepoiplotlydf)

# Print the output DataFrame for filtered POIs based on time slots
print(selected_time_slots)



Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82]

[83 rows x 0 columns]


In [138]:
# Function to calculate the distance between two POIs using a distance matrix
def pid_finddist(P1, P2, dist_only_matrix_df):
    x = int(P1[3:]) - 1
    y = int(P2[3:]) - 1
    if x >= y:
        return dist_only_matrix_df.iloc[x, y]
    else:
        return dist_only_matrix_df.iloc[y, x]

# Example usage (ensure dist_only_matrix_df is defined beforehand)
distance = pid_finddist('PID001', 'PID002', dist_only_matrix_df)
print("Distance between PIDs:", distance)


Distance between PIDs: 8155


In [140]:
# Function to count non-empty lists in a dictionary
def len_wo_null(dict_of_lis):
    c = 0
    for v in dict_of_lis.values():
        if v != []:  # Only count non-empty lists
            c += 1
    return c  # Return the count of non-empty lists

# Example usage
rings = {
    'ring1': [1, 2, 3],
    'ring2': [],
    'ring3': [4, 5],
    'ring4': [],
    'ring5': [6]
}

result = len_wo_null(rings)
print("Non-empty lists count:", result)



Non-empty lists count: 3


In [147]:
import pandas as pd
from itertools import chain

# Function to calculate distance between two points
def pid_finddist(pid1, pid2, dist_only_matrix_df):
    # Get the distance from the distance matrix
    return dist_only_matrix_df.loc[pid1, pid2]

# Function to count non-empty lists in a dictionary
def len_wo_null(dict_of_lis):
    c = 0
    for k, v in dict_of_lis.items():
        if v == []:
            c += 1
    return len(dict_of_lis) - c

# Function to calculate routes for each day
def calculate_routes(Duration, rings, dist_only_matrix_df, chennai_poi_df):
    main_routes = []

    for day in range(Duration):
        day_route = []
        route_counter = rings[5][day]  # Starting point for the day
        day_route.append(route_counter)

        i = 1
        flag = 0
        while i != len_wo_null(rings):
            store_dist = {}
            for l in rings[5 + i]:
                dist = pid_finddist(route_counter, l, dist_only_matrix_df)
                store_dist[l] = dist

            dist_sorted = dict(sorted(store_dist.items(), key=lambda item: item[1], reverse=False))

            if flag == len(rings[5 + i]):
                break

            min_pid = list(dist_sorted.keys())[flag]

            if min_pid in chain(*main_routes):
                flag += 1
                continue
            else:
                route_counter = min_pid
                day_route.append(route_counter)
                i += 1
                flag = 0

        main_routes.append(day_route)

    # Convert routes to POI names
    new_all_routes = []
    for i in main_routes:
        row = []
        for j in i:
            # Ensure that j is a valid index in the POI DataFrame
            if j in chennai_poi_df.index:
                row.append(chennai_poi_df.loc[j, 'POIs'])
            else:
                print(f"Warning: {j} not found in chennai_poi_df index.")
        new_all_routes.append(row)

    return new_all_routes

# Load data from CSV files
dist_only_matrix_df = pd.read_csv('data/dist_only_matrix.csv', index_col=0)
chennai_poi_df = pd.read_csv('data/chennai-poi.csv', index_col=0)

# Print the indices of the POI DataFrame to verify
# print("POI DataFrame index:", chennai_poi_df.index.tolist())

# Initialize required variables
Duration = 3  # Number of days
rings = {
    5: ['POI1', 'POI2', 'POI3'],  # Updated to match the POI DataFrame indices
    6: ['POI4', 'POI5'],          # Updated
    7: ['POI6', 'POI7']           # Updated
}

# Calculate routes
all_routes = calculate_routes(Duration, rings, dist_only_matrix_df, chennai_poi_df)
print("All routes:", all_routes)


All routes: [['Marina Beach', 'Fort St. George', 'Valluvar Kottam'], ['Kapaleeshwarar Temple', 'Government Museum Chennai', 'Arignar Anna Zoological Park'], ['Santhome Cathedral Basilica']]


In [161]:
import pandas as pd
import numpy as np
import random

# Load chennai_poi_df from CSV
chennai_poi_df = pd.read_csv('data/chennai-poi.csv', index_col=0)

# Initialize time_rings
time_rings = {
    0: "09:00 - 10:00",
    1: "10:00 - 11:00",
    2: "11:00 - 12:00",
    3: "12:00 - 13:00",
    4: "13:00 - 14:00",
    5: "14:00 - 15:00",
    6: "15:00 - 16:00",
    7: "16:00 - 17:00",
    8: "17:00 - 18:00",
    9: "18:00 - 19:00",
    10: "19:00 - 20:00",
    11: "20:00 - 21:00",
}

# Initialize rings
rings = {
    5: ['POI1', 'POI2'],  # Day 1
    6: ['POI3', 'POI4'],  # Day 2
    7: ['POI5', 'POI6'],  # Day 3
    8: [],                # No POIs for this day
    9: [],                # No POIs for this day
    10: [],               # No POIs for this day
    11: [],               # No POIs for this day
    12: [],               # No POIs for this day
    13: [],               # No POIs for this day
}

# Function to get PID from place name
def get_pid(place, chennai_poi_df):
    matched_pids = chennai_poi_df[chennai_poi_df['POIs'] == place].index.values
    if matched_pids.size > 0:
        return matched_pids[0]
    return None

# Function to get the time slot for a place
def get_time(name_place, rings, time_rings, chennai_poi_df):
    pid = get_pid(name_place, chennai_poi_df)
    
    if pid is None:
        # Generate a random time slot
        random_time_slot = random.choice(list(time_rings.values()))
        return random_time_slot

    for i in range(5, 14):
        if rings[i]:
            if pid in rings[i]:
                time_slot = dict_index_key(rings[i], rings)  # Ensure dict_index_key is defined
                return time_rings[time_slot]
    return None

# Example usage
time = get_time('POI1', rings, time_rings, chennai_poi_df)  # Use a POI name, even if it doesn't exist
print("Time slot:", time)


Time slot: 11:00 - 12:00


In [176]:
import pandas as pd

# Load the data from the CSV file
df = pd.read_csv('data/sorted_selected_df.csv')

# Extracting POIs and their PIDs into a structured format
big_la = []

# Assuming the DataFrame has the columns as described: 'PID' and 'POIs'
for day in range(3):  # Let's say you want to create itineraries for 3 days
    day_pois = df['POIs'].tolist()[day*3: (day + 1) * 2  # Taking 3 POIs per day
    big_la.append(day_pois)

# Printing the structured POIs for verification
for i, day in enumerate(big_la):
    print(f'Day {i + 1}: {day}')

# Example output formatting
final_itinerary = []

for day in range(len(big_la)):
    day_itinerary = f'Day {day + 1}: '
    final_itinerary.append(day_itinerary)
    
    for poi in big_la[day]:
        # Dummy placeholders for time and endtime
        time = "08:00"  # Replace with actual time fetching logic
        endtime = get_endtime(poi)  # Assuming you have this function defined
        itinerary_entry = f"{poi} (Anytime after {time} and before {endtime[0]})"
        
        final_itinerary.append(itinerary_entry)

# Print the final output
print("Final Output:")
for line in final_itinerary:
    print(line)


Day 1: ['Beach', 'Temple', 'Museum']
Day 2: ['Park']
Day 3: []
Final Output:
Day 1: 
Beach (Anytime after 08:00 and before 17:00)
Temple (Anytime after 08:00 and before 17:00)
Museum (Anytime after 08:00 and before 17:00)
Day 2: 
Park (Anytime after 08:00 and before 17:00)
Day 3: 
