In [1]:
import common_functions
import pandas as pd

In [2]:
site = "Musement"
css_selectors = {
    'currency': 'div[data-test*="dropdown-currency"]',
    'currency_list': 'section[class*="row-start-center"]',
    'products_count': 'span[data-test-id*="search-component-activity-count-text"]',
    'view_more_button': 'button[data-test-id="search-component-test-btn"]',
    'show_more_button': 'a[data-qa-marker*="loading-button"]',
    'product_card': 'div[data-test*="ActivityCard"]',
    'tour_price': 'span[data-test="realPrice"]',
    'tour_price_discount': 'div[class="tour-scratch-price"]',
    'ratings': 'div[data-test="reviewTest"]',
    'review_count': 'p[class*="reviewsNumber"]',
    'category_label': 'div[data-test="main-category"]',
    'js_script_for_shadow_root': 'return document.querySelector("msm-cookie-banner").shadowRoot',
    'cookies_banner': 'button[data-test*="decline-cookies"]',
    'sort_by': 'select[data-test-id="search-component-sort-selector"]',
    'option_rating': 'option[value*="rating"]',
    'option_popularity': 'option[value*="relevance-city"]',
    'provider': 'div[class*="src-shared_component-blockBody"]'
}


In [3]:
file_manager = common_functions.FilePathManager(site, "NA")
logger = common_functions.LoggerManager(file_manager)
file_path_xlsx_operator = file_manager.get_file_paths()['file_path_xlsx_operator']


In [4]:

df = pd.read_excel(file_path_xlsx_operator)

for index, row in df.iterrows():
    
    url = row['Link']
    # Log the current row being processed
    logger.logger_info.info(f"Processing row {index} with URL: {url}")

    if row['Operator'] != "ToDo":
        logger.logger_info.info(f"Skipping row {index} as the URL is not 'ToDo'.")
        continue
    try:
        scraper = common_functions.ProductScraperMusment(url, None, css_selectors,  file_manager, logger, provider=True)
        # Log the initiation of the scraping process
        logger.logger_info.info(f"Initialized scraper for URL: {url}")
        scraper.get_url()
        
        provider_name = scraper.get_provider_name()
        # Log that the provider name was successfully fetched
        logger.logger_done.info(f"Provider name fetched for row {index}: {provider_name.text}")
            
        df.at[index, 'Operator'] = provider_name.text
    except Exception as e:
        # Log any errors encountered during the scraping process
        logger.logger_err.error(f"Error processing row {index} with URL {url}: {str(e)}")
    finally:
        # Ensure that the driver is closed
        scraper.quit_driver(scraper.driver)
        logger.logger_done.info(f"Closed scraper for URL: {url}")

scraper._save_dataframe(df)


2024-09-28 14:59:22,995 - Info_logger - INFO - Processing row 0 with URL: https://www.musement.com/uk/lisbon/lisboa-card-for-24h-48h-or-72h-83764/
2024-09-28 14:59:22,998 - Info_logger - INFO - Skipping row 0 as the URL is not 'ToDo'.
2024-09-28 14:59:22,999 - Info_logger - INFO - Processing row 1 with URL: https://www.musement.com/uk/lisbon/lisbon-oceanarium-entrance-tickets-133813/
2024-09-28 14:59:22,999 - Info_logger - INFO - Skipping row 1 as the URL is not 'ToDo'.
2024-09-28 14:59:23,000 - Info_logger - INFO - Processing row 2 with URL: https://www.musement.com/uk/lisbon/belem-and-modern-lisbon-bus-hop-on-hop-off-combined-tickets-347911/
2024-09-28 14:59:23,001 - Info_logger - INFO - Initializing the Chrome driver and logging into the website
2024-09-28 14:59:24,228 - Info_logger - INFO - Successfully initiated ProductScraper for city: None
2024-09-28 14:59:24,229 - Info_logger - INFO - Initialized scraper for URL: https://www.musement.com/uk/lisbon/belem-and-modern-lisbon-bus-ho

In [5]:
scraper._save_dataframe(df)
