In [1]:
from bs4 import BeautifulSoup
import re
import requests
import json
from tabulate import tabulate
from concurrent.futures import ThreadPoolExecutor

# URLs and API endpoints
main_page = "https://www.csusb.edu/recreation-wellness/adventure/trips/all-trips-date"
base_url = "https://www.csusb.edu"
api_url = "https://recshop.csusb.edu/Program/GetOfferingsForSemester"
querystring = {
    "semesterId": "29ff794d-548e-407f-b511-10d04ec2798e",
    "programId": "d921b108-4059-4eef-bd42-9bd9c72c2557"
}
headers = {"cookie": "ASP.NET_SessionId=b4omal1okxattmuwb1rmyqpn"}

# Fetch JS-rendered data from the API
def fetch_api_data():
    try:
        response = requests.get(api_url, headers=headers, params=querystring)
        if response.status_code == 200:
            return json.loads(response.text)
        else:
            print(f"API request failed with status code: {response.status_code}")
            return []
    except Exception as e:
        print(f"Error fetching API data: {e}")
        return []

# Get trip data from the API
api_data = fetch_api_data()

# Create lookup dictionary for spots information from API data
spots_lookup = {}
if api_data:
    for offering in api_data:
        trip_name = offering.get('Name', '')
        spots_total = offering.get('MaxRegistrants', 0)
        spots_taken = offering.get('CurrentRegistrations', 0)
        spots_left = max(0, spots_total - spots_taken)
        spots_lookup[trip_name.strip().lower()] = f"{spots_left} spot(s) left"

# Getting the adventure page to get all the current trips
response = requests.get(main_page)
soup = BeautifulSoup(response.text, 'html.parser')

list_of_href = soup.find_all('a', href=True)
pattern = re.compile(r'\/event\/\d{5,6}')

href_trips = []

for i in list_of_href:
    match = re.search(pattern, str(i))
    if match:
        href_trips.append(match.group())

def fetch_trip_info(page):
    try:
        trip = requests.get(base_url + page)
        trip_soup = BeautifulSoup(trip.text, 'html.parser')
        
        # Extract basic trip information
        trip_date = trip_soup.find('div', class_="event--date").text.strip()
        trip_name = trip_soup.find('h2', class_="event--title").text.strip()
        
        # Try to get spots left information
        fusion_button = trip_soup.find('p', class_="card-text")
        spots_left = "0 spot(s)"
        
        # First check if we have this trip in our API data
        trip_key = trip_name.lower()
        if trip_key in spots_lookup:
            spots_left = spots_lookup[trip_key]
        # If not in API data, try the traditional scraping method
        elif fusion_button is not None and hasattr(fusion_button, 'get') and fusion_button.get('href'):
            try:
                trip_spots = requests.get(fusion_button.get('href'))
                fusions_page = BeautifulSoup(trip_spots.text, 'html.parser')
                spots_left_outer = fusions_page.find('div', class_="tag-bg-grey spots-tag rounded p-1 mr-3 mb-2")
                if spots_left_outer and spots_left_outer.find('p', class_="card-text"):
                    spots_left = spots_left_outer.find('p', class_="card-text").text
            except Exception as e:
                print(f"Error fetching fusion page for {trip_name}: {e}")
        
        return [trip_name, trip_date, spots_left]
    except Exception as e:
        print(f"Error processing {page}: {e}")
        return ["Error", "Error", "Error"]

# Using ThreadPoolExecutor to fetch trip info concurrently
with ThreadPoolExecutor(max_workers=16) as executor:
    scraped_data = list(executor.map(fetch_trip_info, href_trips))

# Filter out error entries
scraped_data = [entry for entry in scraped_data if entry[0] != "Error"]

# Insert header row
scraped_data.insert(0, ["Trip Name", "Trip Date", "Spots left"])

# Print the tabulated data
table = tabulate(scraped_data, headers="firstrow", tablefmt="grid")
print(table)

# Optionally save to a file
with open("csusb_trips.txt", "w") as f:
    f.write(table)

Error fetching API data: Expecting value: line 3 column 5 (char 8)
+------------------------------------------------+---------------------------------+--------------+
| Trip Name                                      | Trip Date                       | Spots left   |
| Disneyland Shuttle - SBC                       | March 2, 2025                   | 0 spot(s)    |
+------------------------------------------------+---------------------------------+--------------+
| Queer Skies Ahead Hike - SBC                   | March 7, 2025                   | 0 spot(s)    |
+------------------------------------------------+---------------------------------+--------------+
| Silverwood Lake Kayak - SBC                    | March 9, 2025                   | 0 spot(s)    |
+------------------------------------------------+---------------------------------+--------------+
| Morro Bay Camp and Kayak - SBC                 | March 14, 2025 - March 16, 2025 | 0 spot(s)    |
+--------------------------------

In [2]:
from bs4 import BeautifulSoup
import re
import requests
import json
from tabulate import tabulate
from concurrent.futures import ThreadPoolExecutor
import time

# URLs and API endpoints
main_page = "https://www.csusb.edu/recreation-wellness/adventure/trips/all-trips-date"
base_url = "https://www.csusb.edu"
api_url = "https://recshop.csusb.edu/Program/GetOfferingsForSemester"
querystring = {
    "semesterId": "29ff794d-548e-407f-b511-10d04ec2798e",
    "programId": "d921b108-4059-4eef-bd42-9bd9c72c2557"
}
headers = {"cookie": "ASP.NET_SessionId=b4omal1okxattmuwb1rmyqpn"}

def fetch_api_data():
    """Fetch trip data from the RecShop API"""
    try:
        response = requests.get(api_url, params=querystring, headers=headers)
        print(f"API Response Status: {response.status_code}")
        
        if response.status_code == 200:
            # Print a sample of the response to debug
            print(f"API Response Preview: {response.text[:200]}...")
            return json.loads(response.text)
        else:
            print(f"API request failed with status code: {response.status_code}")
            return []
    except Exception as e:
        print(f"Error fetching API data: {e}")
        return []

# Fetch API data and create lookup dictionary
print("Fetching data from API...")
api_data = fetch_api_data()
spots_lookup = {}

if api_data:
    print(f"Successfully retrieved {len(api_data)} items from API")
    for offering in api_data:
        try:
            trip_name = offering.get('Name', '').strip()
            max_registrants = offering.get('MaxRegistrants', 0)
            current_registrations = offering.get('CurrentRegistrations', 0)
            spots_left = max(0, max_registrants - current_registrations)
            
            # Add to lookup with multiple key variations to improve matching
            spots_lookup[trip_name.lower()] = f"{spots_left} spot(s)"
            
            # Create additional keys with simplified text for better matching
            simple_name = re.sub(r'[^\w\s]', '', trip_name).lower()
            spots_lookup[simple_name] = f"{spots_left} spot(s)"
            
            print(f"Added to lookup: {trip_name} -> {spots_left} spots")
        except Exception as e:
            print(f"Error processing API offering: {e}")
else:
    print("No data retrieved from API, will rely on web scraping only")

# Getting the adventure page to get all the current trips
print("Fetching main trips page...")
response = requests.get(main_page)
soup = BeautifulSoup(response.text, 'html.parser')

list_of_href = soup.find_all('a', href=True)
pattern = re.compile(r'\/event\/\d{5,6}')

href_trips = []

for i in list_of_href:
    match = re.search(pattern, str(i))
    if match:
        href_trips.append(match.group())

print(f"Found {len(href_trips)} trips on main page")

def find_fusion_link(trip_soup):
    """Find the link to the fusion page which contains spots left information"""
    # Try different selectors that might contain the fusion link
    fusion_button = trip_soup.find('a', class_="fusion-button")
    if not fusion_button:
        fusion_button = trip_soup.find('p', class_="card-text")
    if not fusion_button:
        fusion_button = trip_soup.find('a', text=re.compile(r'register|sign up', re.IGNORECASE))
    
    if fusion_button and hasattr(fusion_button, 'get') and fusion_button.get('href'):
        return fusion_button.get('href')
    return None

def fetch_trip_info(page):
    """Fetch information for a specific trip page"""
    try:
        trip_url = base_url + page
        print(f"Fetching trip info from: {trip_url}")
        
        trip = requests.get(trip_url)
        trip_soup = BeautifulSoup(trip.text, 'html.parser')
        
        # Extract basic trip information
        trip_date_element = trip_soup.find('div', class_="event--date")
        trip_date = trip_date_element.text.strip() if trip_date_element else "No date found"
        
        trip_name_element = trip_soup.find('h2', class_="event--title")
        trip_name = trip_name_element.text.strip() if trip_name_element else "No title found"
        
        print(f"Found trip: {trip_name} on {trip_date}")
        
        # Try to get spots left information
        spots_left = "N/A"
        
        # First check API data (most reliable)
        trip_key = trip_name.lower()
        simple_key = re.sub(r'[^\w\s]', '', trip_name).lower()
        
        if trip_key in spots_lookup:
            spots_left = spots_lookup[trip_key]
            print(f"Found in API lookup: {spots_left}")
        elif simple_key in spots_lookup:
            spots_left = spots_lookup[simple_key]
            print(f"Found in API lookup (simple match): {spots_left}")
        else:
            # If not in API, try to get the fusion link and check there
            fusion_link = find_fusion_link(trip_soup)
            if fusion_link:
                print(f"Found fusion link: {fusion_link}")
                try:
                    if not fusion_link.startswith('http'):
                        if fusion_link.startswith('/'):
                            fusion_link = base_url + fusion_link
                        else:
                            fusion_link = 'https://' + fusion_link
                    
                    trip_spots = requests.get(fusion_link)
                    fusion_soup = BeautifulSoup(trip_spots.text, 'html.parser')
                    
                    # Try different selectors that might contain spots info
                    spots_element = fusion_soup.find('div', class_="tag-bg-grey spots-tag rounded p-1 mr-3 mb-2")
                    if spots_element and spots_element.find('p', class_="card-text"):
                        spots_left = spots_element.find('p', class_="card-text").text
                        print(f"Found spots from fusion page: {spots_left}")
                    else:
                        # Try alternative selectors
                        spots_text = fusion_soup.find(text=re.compile(r'\d+\s*spot', re.IGNORECASE))
                        if spots_text:
                            spots_left = spots_text
                            print(f"Found spots using regex: {spots_left}")
                except Exception as e:
                    print(f"Error fetching fusion page: {e}")
        
        return [trip_name, trip_date, spots_left]
    except Exception as e:
        print(f"Error processing {page}: {e}")
        return ["Error", "Error", "Error"]

# Using ThreadPoolExecutor to fetch trip info concurrently
print("Starting concurrent fetch of all trips...")
with ThreadPoolExecutor(max_workers=8) as executor:  # Reduced workers to avoid rate limiting
    scraped_data = list(executor.map(fetch_trip_info, href_trips))
    
    # Add small delay between batches to avoid rate limiting
    for i in range(0, len(href_trips), 5):
        batch = href_trips[i:i+5]
        partial_results = list(executor.map(fetch_trip_info, batch))
        scraped_data.extend(partial_results)
        time.sleep(1)  # Small delay between batches

# Filter out error entries
scraped_data = [entry for entry in scraped_data if entry[0] != "Error"]

# Insert header row
scraped_data.insert(0, ["Trip Name", "Trip Date", "Spots left"])

# Print the tabulated data
table = tabulate(scraped_data, headers="firstrow", tablefmt="grid")
print("\n\n--- FINAL RESULTS ---\n")
print(table)

# Save to a file
with open("csusb_trips.txt", "w") as f:
    f.write(table)

print("\nResults saved to csusb_trips.txt")

Fetching data from API...
API Response Status: 200
API Response Preview: 

    <div class="list-wrapper">
<input data-val="true" data-val-required="The PromptForFamily field is required." id="PromptForFamily" name="PromptForFamily" type="hidden" value="False" />
<input...
Error fetching API data: Expecting value: line 3 column 5 (char 8)
No data retrieved from API, will rely on web scraping only
Fetching main trips page...
Found 33 trips on main page
Starting concurrent fetch of all trips...
Fetching trip info from: https://www.csusb.edu/event/577464
Fetching trip info from: https://www.csusb.edu/event/503544
Fetching trip info from: https://www.csusb.edu/event/582012
Fetching trip info from: https://www.csusb.edu/event/99841
Fetching trip info from: https://www.csusb.edu/event/570378
Fetching trip info from: https://www.csusb.edu/event/577528
Fetching trip info from: https://www.csusb.edu/event/100206
Fetching trip info from: https://www.csusb.edu/event/503542
Found trip: Black Canyon

  fusion_button = trip_soup.find('a', text=re.compile(r'register|sign up', re.IGNORECASE))
  spots_text = fusion_soup.find(text=re.compile(r'\d+\s*spot', re.IGNORECASE))


Fetching trip info from: https://www.csusb.edu/event/577507Fetching trip info from: https://www.csusb.edu/event/500487

Fetching trip info from: https://www.csusb.edu/event/99601
Fetching trip info from: https://www.csusb.edu/event/500382
Fetching trip info from: https://www.csusb.edu/event/577531
Fetching trip info from: https://www.csusb.edu/event/585714
Fetching trip info from: https://www.csusb.edu/event/577534
Fetching trip info from: https://www.csusb.edu/event/99596
Found trip: Six Flags Magic Mountain - SBC on April 6, 2025
Found fusion link: /sites/default/files/Who%20Can%20Register%20And%20How%20To%20Do%20It_100.pdf
Found trip: Leo Carrillo Beach Camp and Explore - SBC on April 12, 2025 - April 13, 2025
Found fusion link: https://recshop.csusb.edu/Program/GetProgramDetails?courseId=54d62972-c7bb-4097-9675-302d6a501d9c
Found trip: La Jolla Snorkel and Hike - SBC on April 13, 2025
Found fusion link: /sites/default/files/Who%20Can%20Register%20And%20How%20To%20Do%20It_57.pdf
Fou