In [1]:
import json
import common_functions

In [2]:
css_selectors = {
    'currency': 'button[class="elementText"]',
    'currency_list': 'div[class*="symbol-bold"]',
    'products_count': 'span[class*="product-count-text"]',
    'show_more_button': 'a[data-qa-marker*="loading-button"]',
    'product_card': 'div[id*="product-card-container"]',
    'tour_price': 'span[class*="tour-price"]',
    'tour_price_discount': 'div[class="tour-scratch-price"]',
    'ratings': 'span[class*="rating-count"]',
    'review_count': 'span[class*="review-count"]',
    'category_label': 'span[class*="booster-label"]'
}
js_selectors = {
    'js_script_for_shadow_root': 'return document.querySelector("msm-cookie-banner").shadowRoot',

}

site = "Headout"
file_manager_logger = common_functions.FilePathManager(site, "NA")
logger = common_functions.LoggerManager(file_manager_logger)

In [3]:
# Load the config from the JSON file
with open('config.json', 'r') as config_file:
    config = json.load(config_file)
config = config.get(site)

# Access the city from the config
cities = config.get('settings').get('city')

for city in cities:
    url = config.get('settings').get('url').replace("city", city)
    file_manager = common_functions.FilePathManager(site, city)
    scraper = common_functions.ScraperHeadout(url, city, css_selectors, file_manager, logger)
    
    
    if scraper.is_city_already_done():
        logger.logger_info.info(f"Data for {city} already exists. Skipping...")
        continue
    elif scraper.is_today_already_done():
        logger.logger_info.info(f"Data for today already exists. Exitng...")
        break

    scraper.get_url()
    scraper.select_currency()
    products_count = scraper.get_product_count()
    scraper.load_all_products(products_count)
    df = scraper.scrape_products(global_category=True)
    scraper.save_to_csv(df)
    
scraper.combine_csv_to_xlsx()

2024-09-29 11:54:18,089 - Info_logger - INFO - Initializing the Chrome driver and logging into the website
2024-09-29 11:54:19,371 - Info_logger - INFO - Successfully initiated ProductScraper for city: Lisbon
2024-09-29 11:54:47,152 - Done_logger - INFO - Rows: 197 Data saved to G:/.shortcut-targets-by-id/1ER8hilqZ2TuX2C34R3SMAtd1Xbk94LE2/MyOTAs/Baza Excel/Headout/Daily/2024-09-29-Lisbon-Headout.csv
2024-09-29 11:54:47,161 - Info_logger - INFO - Initializing the Chrome driver and logging into the website
2024-09-29 11:54:48,423 - Info_logger - INFO - Successfully initiated ProductScraper for city: Rome
2024-09-29 11:55:33,170 - Done_logger - INFO - Rows: 349 Data saved to G:/.shortcut-targets-by-id/1ER8hilqZ2TuX2C34R3SMAtd1Xbk94LE2/MyOTAs/Baza Excel/Headout/Daily/2024-09-29-Rome-Headout.csv
2024-09-29 11:55:33,171 - Info_logger - INFO - Initializing the Chrome driver and logging into the website
2024-09-29 11:55:34,377 - Info_logger - INFO - Successfully initiated ProductScraper for ci

In [4]:
# Initialize the AzureBlobUploader with storage account details
blob_uploader = common_functions.AzureBlobUploader(file_manager, logger)
blob_uploader.upload_excel_to_azure_storage_account()
blob_uploader.transform_upload_to_refined()

2024-09-29 11:59:17,409 - Info_logger - INFO - Sucessfuly initiated AzureBlobUploader
2024-09-29 11:59:18,168 - Done_logger - INFO - File uploaded successfully to Azure Blob Storage (raw).
2024-09-29 11:59:19,563 - Done_logger - INFO - File uploaded successfully to Azure Blob Storage (refined).
