## Load API keys

In [2]:
# Import libraries
import requests
import os
from dotenv import load_dotenv

# Load the tripadvisor .env file explicitly
trip_advisor_env_loaded = load_dotenv("tripadvisor.env")

# Check it loaded succesfully
print("Was .env loaded successfully?", trip_advisor_env_loaded)

# Fetch API key
TA_API_KEY = os.getenv("TRIP_ADVISOR_API_KEY")

#print("Trip Advisor API Key:", TA_API_KEY[:5])  # Print first 5 characters of the key to ensure it loaded

Was .env loaded successfully? True


In [3]:
# Base URL for API
BASE_URL = 'https://api.content.tripadvisor.com/api/v1'

print("Using BASE_URL:", BASE_URL)

Using BASE_URL: https://api.content.tripadvisor.com/api/v1


## Function to cache location name with corresponding location ID (This to avoid querying the API repeatedly)

In [6]:
# Initialize empty location ID cache
locations_cache = {}

In [8]:
# Define functio to cache query location IDs
def update_location_cache(location_name, location_ID):
    # If not cached add to cache dictionary
    if location_name not in locations_cache:
        locations_cache[location_name] = location_ID
        # Debug print
        print(f"Added {location_name}: {location_ID} to cache.")
    # Else, do not add and skip
    else:
        # Debug print
        print(f"{location_name} already exists in cache.")

# App Features

## Location ID Function

In [10]:
# Define function to get location ID (Location ID is a dependancy for the rest of TripAdvisor's GET methods)
def get_location_id(location_name):
    # Get's location ID from cache or trip advisor
    if location_name in locations_cache:
        print(f"Cache hit: {location_name} → {locations_cache[location_name]}")
        return locations_cache[location_name]
    
    # API request to TripAdvisor
    url = "https://api.content.tripadvisor.com/api/v1/location/search"
    
    headers = {"accept": "application/json"}
    
    params = {
        "key": TA_API_KEY,
        "searchQuery": location_name,
    }
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if "data" in data and data["data"]:  # Check if results exist
            location_id = data["data"][0]["location_id"]  # Get first result
            update_location_cache(location_name, location_id)  # Cache it
            return location_id
        else:
            print(f"No results found for {location_name}.")
            return None
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

## Fetch Location Data Function
- Fetches location_id, address, category, rating, number of reviews, reviews, photos, and descriptions

In [12]:
# Define function to get static Trip Advisor Data and store in cache
"""
This function's purpose is to fetch from the Trip Advisor API static data that does not change frequently
such as the ones listed in the function below in a cache to prevent from repeatedly querying the API, 
esentially saving on API costs and computational power.
"""

def fetch_static_location_info(location_name):
    """
    Fetches static information for a given location from TripAdvisor,
    then caches and returns the data in a structured dictionary.
    """
    # Get location id from a previously defined function.
    location_id = get_location_id(location_name)
    if not location_id:
        print(f"Could not retrieve location ID for {location_name}")
        return None

    # Build the endpoint URLs using the location ID.
    details_url = f"https://api.content.tripadvisor.com/api/v1/location/{location_id}/details"
    photos_url = f"https://api.content.tripadvisor.com/api/v1/location/{location_id}/photos"
    reviews_url = f"https://api.content.tripadvisor.com/api/v1/location/{location_id}/reviews"

    # Define headers
    headers = {'accept': 'application/json'}

    # Define parameters
    params = {'key': TA_API_KEY}

    # Fetch data from the endpoints.
    details = requests.get(details_url, headers=headers, params=params).json()
    photos = requests.get(photos_url, headers=headers, params=params).json()
    reviews = requests.get(reviews_url, headers=headers, params=params).json()

   # Process photos: extract a list of dictionaries containing both original and thumbnail URLs.
    photo_data = [
        {
            "original": photo.get("images", {}).get("original", {}).get("url", ""),
            "thumbnail": photo.get("images", {}).get("thumbnail", {}).get("url",
                             photo.get("images", {}).get("original", {}).get("url", ""))
        }
        for photo in photos.get("data", [])
    ][:5] # Only display the top 5 photos


    # Extract a summary of reviews. For now, we take the first review text.
    review_summary = ""
    reviews_data = reviews.get("data", [])
    if reviews_data:
        review_summary = reviews_data[0].get("text", "")

    # Cache the static data with additional fields
    locations_cache[location_name] = {
        'location_id': location_id,
        'name': details.get('name', 'N/A'),
        'description': details.get('description', 'No description available.'),
        'address': details.get('address', 'N/A'),
        'phone_number': details.get('phone', 'N/A'),
        'web_url': details.get('web_url', 'N/A'),
        'category': details.get('category', 'N/A'),
        'latitude': details.get('latitude', 'N/A'),
        'longitude': details.get('longitude', 'N/A'),
        'timezone': details.get('timezone', 'N/A'),
        'rating': details.get('rating', 'N/A'),
        'num_reviews': details.get('num_reviews', 0),
        'photo_count': details.get('photo_count', 'N/A'),
        'photos': photo_data,
        'review_summary': review_summary,
        'hours': details.get('hours', {}),  # May include periods and weekday_text
        'ancestors': details.get('ancestors', []),
    }

    print(f"Stored static information for {location_name} in cache.")
    return locations_cache[location_name]

## Function that retrieves location info (To be used conditionally for the RAG Framework)

In [14]:
# Define function to retrieve location info
def get_location_info(location_name, info_type=None):
    """
    Retrieve specific or full location info from the cache.
    If not present, fetch the static information using fetch_static_location_info.
    """
    if location_name not in locations_cache:
        print(f"{location_name} not found in cache. Fetching now...")
        # Use fetch static function to get the information if not in the cache
        fetch_static_location_info(location_name)
    
    place_data = locations_cache.get(location_name, {})
    
    # If no specific info type is requested, return everything
    if not info_type:
        return place_data

    # Otherwise, return the specific requested info
    return place_data.get(info_type, "Info not available")

# RAG System

In [26]:
# Define function to load json files
def load_json_files(json_folder):
    json_files = []
    for filename in os.listdir(json_folder):
        if filename.endswith('.json'):
            file_path = os.path.join(json_folder, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                json_files.append(data)

    return json_files

In [28]:
# Batch process the pertinent json files
json_folder_path = 'json files'
json_files = load_json_files(json_folder_path)

#Print success
print('Successfully loaded files!')

#print(json_files)

Successfully loaded files!


In [30]:
# Get names from the json files above to pass onto the fetch_static_information function 
location_queries = []

for data in json_files:
    for entry in data:
        metadata = entry.get("metadata", {})
        landmark_name = metadata.get("name")
        city = metadata.get("city")
        if landmark_name and city:
            # Construct specific search query(best format for getting information):
            search_query = f"{landmark_name}, {city}, Puerto Rico"
            location_queries.append(search_query)

In [4]:
# Uncomment to use
#print(location_queries)

In [6]:
# Uncomment to use

# # Get static info for each name in the location list
# import time

# for query in location_queries:
#     fetch_static_location_info(query)
#     print(f"Processed: {query}")
#     time.sleep(10) # Wait 10 seconds before the next query

In [39]:
# Check location cache
first_five = list(locations_cache.items())[:5]
print(first_five)

[('El Yunque', {'location_id': '1448148', 'name': 'El Yunque National Forest', 'description': 'Plan a day trip to the only tropical rainforest in the U.S. National Forest System. With 29,000 acres of lush foliage and 25 miles of year-round trails, this is a nature enthusiast’s sanctuary. Attempt to spot elusive wildlife like the Puerto Rican parrot on a rejuvenating hike, soar through the canopies on a zipline, or go horseback riding at Carabalí Rainforest Park. If time permits, visit the Yokahú Observation Tower which has an elevation of 1,575 feet and provides expansive views of the mountains. Sporadic showers are common here—check their website for the latest information about trail statuses and park closures before setting out.', 'address': 'N/A', 'phone_number': '+1 787-888-1880', 'web_url': 'https://www.tripadvisor.com/Attraction_Review-g147324-d1448148-Reviews-El_Yunque_National_Forest-El_Yunque_National_Forest_Puerto_Rico.html?m=66827', 'category': {'name': 'attraction', 'local

In [41]:
# Make new json file (this approach makes the a new json file with the information for landmarks and municipalities all merged)
import json

with open("final_locations_info_cache.json", "w", encoding="utf-8") as f:
    json.dump(locations_cache, f, indent=4, ensure_ascii=False)

print("Merged JSON file created: final_locations_info_cache.json")

Merged JSON file created: final_locations_info_cache.json


In [8]:
# Load Weather Api Key
weather_api_env_loaded = load_dotenv("weatherapi.env")

# Check it loaded succesfully
print("Was .env loaded successfully?", weather_api_env_loaded)

# Fetch API key
WEATHER_API_KEY = os.getenv("WEATHER_API_KEY")

# print("Open Weather API Key:", WEATHER_API_KEY[:5]) # Show the first 5 characters

Was .env loaded successfully? True


In [101]:
BASE_URL = ' http://api.weatherapi.com/v1'
print(BASE_URL)

 http://api.weatherapi.com/v1


In [89]:
# Define folder and file name
folder_path = "json files"  # Folder name
file_name = "updated_puerto_rico_municipalities.json"  # File name

# Construct full path
file_path = os.path.join(folder_path, file_name)

# Read the JSON file
with open(file_path, "r", encoding="utf-8") as file:
    json_file = json.load(file)

print("File loaded successfully!")

File loaded successfully!


In [91]:
# Empty Weather query list to store queries
weather_queries = []

# Iterate through JSON data
for entry in json_file:
    metadata = entry.get("metadata", {})
    city = metadata.get("city")
    
    if city:
        # Construct specific search query
        weather_query = f"{city}, Puerto Rico"
        weather_queries.append(weather_query)

In [93]:
print("Weather Queries List:", weather_queries)

Weather Queries List: ['Adjuntas, Puerto Rico', 'Aguada, Puerto Rico', 'Aguadilla, Puerto Rico', 'Aguas Buenas, Puerto Rico', 'Aibonito, Puerto Rico', 'Añasco, Puerto Rico', 'Arecibo, Puerto Rico', 'Arroyo, Puerto Rico', 'Barceloneta, Puerto Rico', 'Barranquitas, Puerto Rico', 'Bayamón, Puerto Rico', 'Cabo Rojo, Puerto Rico', 'Caguas, Puerto Rico', 'Camuy, Puerto Rico', 'Canóvanas, Puerto Rico', 'Carolina, Puerto Rico', 'Cataño, Puerto Rico', 'Cayey, Puerto Rico', 'Ceiba, Puerto Rico', 'Ciales, Puerto Rico', 'Cidra, Puerto Rico', 'Coamo, Puerto Rico', 'Comerío, Puerto Rico', 'Corozal, Puerto Rico', 'Culebra, Puerto Rico', 'Dorado, Puerto Rico', 'Fajardo, Puerto Rico', 'Florida, Puerto Rico', 'Guánica, Puerto Rico', 'Guayama, Puerto Rico', 'Guayanilla, Puerto Rico', 'Guaynabo, Puerto Rico', 'Gurabo, Puerto Rico', 'Hatillo, Puerto Rico', 'Hormigueros, Puerto Rico', 'Humacao, Puerto Rico', 'Isabela, Puerto Rico', 'Jayuya, Puerto Rico', 'Juana Díaz, Puerto Rico', 'Juncos, Puerto Rico', 'La

In [107]:
# Define function to get weather for locations based on municipality
def get_location_weather(location, weather_type):  
    
    # API endpoints
    endpoint_mapping = {
        "forecast": "forecast.json",
        "current": "current.json",
        "alerts": "alerts.json"
    }

    # Ensure valid weather_type
    if weather_type not in endpoint_mapping:
        return {"error": "Invalid weather type. Choose 'forecast', 'current', or 'alerts'."}

    # Construct URL
    api_url = f"{BASE_URL}/{endpoint_mapping[weather_type]}"

    # Define parameters
    parameters = {
        "key": WEATHER_API_KEY,
        "q": location,
    }

    # Add 'days' parameter only for forecast
    if weather_type == "forecast":
        parameters["days"] = 5  # Adjust based on need (1-10)

    # Make API request
    response = requests.get(api_url, params=parameters)

    # Check response status
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": f"Failed to fetch data: {response.status_code}"}

In [109]:
weather_data = get_location_weather("San Juan, Puerto Rico", "current")
print(weather_data)

{'location': {'name': 'San Juan', 'region': 'Puerto Rico', 'country': 'United States of America', 'lat': 18.4683, 'lon': -66.1061, 'tz_id': 'America/Puerto_Rico', 'localtime_epoch': 1739481815, 'localtime': '2025-02-13 17:23'}, 'current': {'last_updated_epoch': 1739481300, 'last_updated': '2025-02-13 17:15', 'temp_c': 26.1, 'temp_f': 79.0, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 18.6, 'wind_kph': 29.9, 'wind_degree': 86, 'wind_dir': 'E', 'pressure_mb': 1019.0, 'pressure_in': 30.1, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 84, 'cloud': 75, 'feelslike_c': 28.5, 'feelslike_f': 83.3, 'windchill_c': 26.2, 'windchill_f': 79.1, 'heatindex_c': 28.6, 'heatindex_f': 83.5, 'dewpoint_c': 21.9, 'dewpoint_f': 71.5, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 0.6, 'gust_mph': 25.0, 'gust_kph': 40.2}}


In [111]:
weather_data = get_location_weather("San Juan, Puerto Rico", "forecast")
print(weather_data)

{'location': {'name': 'San Juan', 'region': 'Puerto Rico', 'country': 'United States of America', 'lat': 18.4683, 'lon': -66.1061, 'tz_id': 'America/Puerto_Rico', 'localtime_epoch': 1739481846, 'localtime': '2025-02-13 17:24'}, 'current': {'last_updated_epoch': 1739481300, 'last_updated': '2025-02-13 17:15', 'temp_c': 26.1, 'temp_f': 79.0, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 18.6, 'wind_kph': 29.9, 'wind_degree': 86, 'wind_dir': 'E', 'pressure_mb': 1019.0, 'pressure_in': 30.1, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 84, 'cloud': 75, 'feelslike_c': 28.5, 'feelslike_f': 83.3, 'windchill_c': 26.2, 'windchill_f': 79.1, 'heatindex_c': 28.6, 'heatindex_f': 83.5, 'dewpoint_c': 21.9, 'dewpoint_f': 71.5, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 0.6, 'gust_mph': 25.0, 'gust_kph': 40.2}, 'forecast': {'forecastday': [{'date': '2025-02-13', 'date_epoch': 1739404800, 'day': {'maxtemp_c': 29.2,

In [113]:
weather_data = get_location_weather("San Juan, Puerto Rico", "alerts")
print(weather_data)

{'location': {'name': 'San Juan', 'region': 'Puerto Rico', 'country': 'United States of America', 'lat': 18.4683, 'lon': -66.1061, 'tz_id': 'America/Puerto_Rico', 'localtime_epoch': 1739481850, 'localtime': '2025-02-13 17:24'}, 'alerts': {'alert': []}}
