# v1

In [1]:
# # Import packages

from datetime import datetime
from typing import Optional, Tuple, Union
from urllib.request import urlopen

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
from tqdm import tqdm
from webdriver_manager.chrome import ChromeDriverManager

from loguru import logger
from bs4 import BeautifulSoup
import time


from src.Vendors.scraper import Scraper

# # STATICS
VENDOR_URL = 'https://www.foscam.com/downloads/index.html'
DOWNLOAD_PATH = 'downloads/'

# Selenium Webdriver Options, Download Path, Headless, Screensize, Webbrowser Version
options = Options()
options.headless = False

options.add_experimental_option("prefs", {
    "download.default_directory": rf"{DOWNLOAD_PATH}"
})

user_agent = 'foscam Download Assistant/1.0'
options.add_argument(f'user-agent={user_agent}')
# # Initialize Chrome and open Vendor Website


In [22]:
class FoscamScraper(Scraper):

    def __init__(
            self,
            logger,
            url: str = VENDOR_URL,
            headless: bool = True,
            options: Options = options,
            max_products: int = float('inf')

    ):
        self.headless = headless
        self.url = url
        self.name = "foscam"
        self.max_products = max_products
        self.driver = webdriver.Chrome(service=Service(
            ChromeDriverManager().install()), options=options)
        self.driver.implicitly_wait(0.5)  # has to be set only once
        self.logger = logger

    def _open_website(self, url: str = '') -> None:
        try:
            if not url:
                url = self.url
            self.driver.get(url)
            self.logger.debug(f'Opened foscam website {url}')
        except Exception as e:
            self.logger.error(f"Could not open foscam website {url}!")
            self.logger.error(e)

    def _close_website(self) -> None:
        self.driver.close()
        self.logger.debug('Closes Window')

    def _get_current_displayed_product_list(self, html) -> list:
        """parses html and returns a list of tuples with product name and product url

        Args:
            html (_type_): html of current product list

        Returns:
            list: list of tuples with product name and product url
        """    
        self.driver.implicitly_wait(1)
        product_list = BeautifulSoup(html, 'html.parser').find_all('li')
        products_tuple_list = []
        for product in product_list:
            # append product name and product url
            products_tuple_list.append((product.find_all('a')[1].text, product.find_all('a')[1]['href']))
        return products_tuple_list

    def _next_page(self, counter:int=0) -> bool:
        """clicks next page button if it exists

        Returns:
            bool: True if next page exists and performs action, False if not
        """
        self.driver.implicitly_wait(1)
        #self.logger.debug(f'{counter = }')
        if counter == 0:
            # do nothing in first run
            return True
        if counter == 1:
            SELECTOR_NEXT_PAGE = '/html/body/div[6]/div[4]/a'
        else:
            SELECTOR_NEXT_PAGE = '/html/body/div[6]/div[4]/a[2]'
        try:
            self.driver.find_element(
                By.XPATH, value=SELECTOR_NEXT_PAGE).click()
            self.driver.implicitly_wait(1)
            return True
        except Exception as e:
            self.logger.debug('No next page')
            self.logger.debug(f'Found {counter=} pages')
            return False

    
    def _create_product_catalog(self) -> list:
        """clicks once through all product lines and products and saves them in a dict

        Returns:
            dict: product catalog
        """
        SELECTOR_PRODUCTS = '/html/body/div[6]/div[3]' # product caroussel 
        self.driver.implicitly_wait(1)
        self.logger.debug(f'get html of product caroussel')
        counter=0
        products_list = []
        self.logger.debug(f'Iterare over pages to create product catalog')
        while self._next_page(counter):
            counter+=1
            time.sleep(2)
            a = self.driver.find_element(By.XPATH, value=SELECTOR_PRODUCTS).get_attribute('innerHTML')
            products_list.append(self._get_current_displayed_product_list(a))
            
        # flat python list
        products_list = [item for sublist in products_list for item in sublist]
        self.products_list = products_list

    def _find_metadata_table(self, product_url: str):
        """scrapes product page and returns a dict with product name, product url, product line, product line url, release note url, release note text, checksum

        Args:
            product_url (str): url of product page
        """

        SELECTOR_METADATA_TABLE = '//*[@id="val"]/div/table'
        self.driver.implicitly_wait(1)
        try:
            product_metadata = self.driver.find_element(
                By.XPATH, value=SELECTOR_METADATA_TABLE).get_attribute('innerHTML')
            self.driver.implicitly_wait(1)
            return product_metadata
        except Exception as e:
            self.logger.debug('No metadata table, skip product')

    def _convert_date(self, date_str: str):
        try:
            return datetime.strptime(date_str, "%Y/%m/%d").strftime("%Y-%m-%d")
        except Exception as ex:
            self.logger.error(f'Could not convert date {date_str}')
            self.logger.error(ex)
            return None

    def scrape_metadata(self):
        """scrapes metadata from foscam website

        Returns:
            list[dict]: list of dicts with metadata
        """
        self._open_website()
        self._create_product_catalog()
        
        metadata = []
        self.logger.debug(f'Iterate over product catalog and scrape metadata')
        for product, product_url in self.products_list:
            if len(metadata) > self.max_products:
                    break
            try:
                self._open_website(f'https://www.foscam.com{product_url}')
                metadata_html = self._find_metadata_table(product_url)
                fw_releases_list = BeautifulSoup(metadata_html, 'html.parser').find_all('tr')
                for fw_release in fw_releases_list[1:]:
                    metadata_current = fw_release.find_all('td')
                    tmp_metadata_dict = {'manufacturer': 'foscam',
                                                    'version': metadata_current[0].text,
                                                    'product_type': None, #  not available
                                                    'product_name': product,
                                                    'url': f'https://www.foscam.com{product_url}',
                                                    'checksum_scraped': None, # not available
                                                    'download_link': f"https://www.foscam.com{metadata_current[-1].find_all('a')[0]['href']}",
                                                    'release_date': self._convert_date(metadata_current[1].text),
                                                    "additional_data": {} # nothing valuable
                                                    }
                                                    
                    metadata.append(tmp_metadata_dict)
                print(f'Finished scraping {product=}, {product_url=}')
            except Exception as ex:
                print('{product}, {product_url} is missing crucial data')
                print(ex)
                pass
            time.sleep(1)
        self.driver.quit()
        return metadata

    

In [24]:
if __name__ == '__main__':
    from loguru import logger
    import json 
    logger.debug('Start foscam')
    foscam = FoscamScraper(logger)
    metadata = foscam.scrape_metadata()
    
    # save metadata to json file
    with open("scraped_metadata/firmware_data_foscam.json", "w") as firmware_file:
        json.dump(metadata, firmware_file)

    foscam.logger.debug('Finished foscam')


2022-12-11 19:52:11.767 | DEBUG    | __main__:<module>:4 - Start foscam
2022-12-11 19:53:52.104 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/index.html
2022-12-11 19:53:52.108 | DEBUG    | __main__:_create_product_catalog:86 - get html of product caroussel
2022-12-11 19:53:52.109 | DEBUG    | __main__:_create_product_catalog:89 - Iterare over pages to create product catalog
2022-12-11 19:54:07.727 | DEBUG    | __main__:_next_page:73 - No next page
2022-12-11 19:54:07.728 | DEBUG    | __main__:_next_page:74 - Found counter=7 pages
2022-12-11 19:54:07.729 | DEBUG    | __main__:scrape_metadata:136 - Iterate over product catalog and scrape metadata
2022-12-11 19:54:08.324 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=161


2021/12/08
2022/10/26
Finished scraping product='VD1/DBW5', product_url='/downloads/firmware_details.html?id=161'


2022-12-11 19:54:09.791 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=163


2021/12/08
2022/10/26
Finished scraping product='F41/FLC', product_url='/downloads/firmware_details.html?id=163'


2022-12-11 19:54:11.231 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=162


2021/12/08
2022/10/26
Finished scraping product='S41/SPC', product_url='/downloads/firmware_details.html?id=162'


2022-12-11 19:54:12.658 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=148


2021/5/10
Finished scraping product='C2M/C2M V2/C2M V3', product_url='/downloads/firmware_details.html?id=148'


2022-12-11 19:54:14.074 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=134


2022/3/30
2022/05/26
Finished scraping product='R2M', product_url='/downloads/firmware_details.html?id=134'


2022-12-11 19:54:15.482 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=143


2022/4/25
2022/05/26
Finished scraping product='R4M', product_url='/downloads/firmware_details.html?id=143'


2022-12-11 19:54:16.942 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=135


2022/4/28
Finished scraping product='FI9926P', product_url='/downloads/firmware_details.html?id=135'


2022-12-11 19:54:18.348 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=132


2022/3/30
2022/05/26
Finished scraping product='FI9902P', product_url='/downloads/firmware_details.html?id=132'


2022-12-11 19:54:19.745 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=137


2022/4/25
2022/05/26
Finished scraping product='G4', product_url='/downloads/firmware_details.html?id=137'


2022-12-11 19:54:21.248 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=144


2022/4/25
2022/05/26
Finished scraping product='G4P', product_url='/downloads/firmware_details.html?id=144'


2022-12-11 19:54:23.747 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=142


2022/4/25
2022/05/26
Finished scraping product='G4EP', product_url='/downloads/firmware_details.html?id=142'


2022-12-11 19:54:25.458 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=156


2022/4/25
2022/05/26
Finished scraping product='D4Z/VZ4', product_url='/downloads/firmware_details.html?id=156'


2022-12-11 19:54:27.123 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=154


2022/4/25
2022/05/26
Finished scraping product='SD2X', product_url='/downloads/firmware_details.html?id=154'


2022-12-11 19:54:28.737 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=155


2022/04/25
2022/05/26
Finished scraping product='SD2/HT2', product_url='/downloads/firmware_details.html?id=155'


2022-12-11 19:54:30.138 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=62


2022/4/28
Finished scraping product='FI9928P', product_url='/downloads/firmware_details.html?id=62'


2022-12-11 19:54:31.544 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=141


2022/3/30
2022/05/26
Finished scraping product='G2EP', product_url='/downloads/firmware_details.html?id=141'


2022-12-11 19:54:32.986 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=151


2022/3/30
2022/05/26
Finished scraping product='D2EP', product_url='/downloads/firmware_details.html?id=151'


2022-12-11 19:54:34.430 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=159


2022/09/23
Finished scraping product='FN7108W-B4', product_url='/downloads/firmware_details.html?id=159'


2022-12-11 19:54:35.861 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=43


2019/07/25
2018/11/20
2018/5/17
2018/5/17
2017/07/07
2015/03/16
2014/12/22
2014/05/16
2014/04/08
2014/02/19
2013/11/13
2013/08/22
Finished scraping product='FI9821W V2', product_url='/downloads/firmware_details.html?id=43'


2022-12-11 19:54:37.251 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=1


2018/10/16
Finished scraping product='C1/C1 V2', product_url='/downloads/firmware_details.html?id=1'


2022-12-11 19:54:38.655 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=2


2018/10/16
Finished scraping product='C1 Lite/C1 Lite V2', product_url='/downloads/firmware_details.html?id=2'


2022-12-11 19:54:40.120 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=3


2022/4/28
Finished scraping product='FI9900P', product_url='/downloads/firmware_details.html?id=3'


2022-12-11 19:54:41.542 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=4


2022/4/28
Finished scraping product='FI9961EP', product_url='/downloads/firmware_details.html?id=4'


2022-12-11 19:54:42.956 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=5
2022-12-11 19:54:42.998 | ERROR    | __main__:_convert_date:122 - Could not convert date 2014/01/02 
2022-12-11 19:54:42.999 | ERROR    | __main__:_convert_date:123 - unconverted data remains:  


2019/07/25
2018/11/20
2018/5/17
2018/5/17
2017/07/07
2015/03/16
2014/12/22
2014/05/16
2014/04/10
2014/02/13
2014/01/02 
2013/09/22
Finished scraping product='FI9821P', product_url='/downloads/firmware_details.html?id=5'


2022-12-11 19:54:44.394 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=6


2019/07/25
2018/11/20
2018/5/17
2018/5/17
2017/07/07
2015/03/16
2014/12/22
2014/08/05
Finished scraping product='FI9831P', product_url='/downloads/firmware_details.html?id=6'


2022-12-11 19:54:45.810 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=7
2022-12-11 19:54:45.845 | ERROR    | __main__:_convert_date:122 - Could not convert date 2014/05/16 
2022-12-11 19:54:45.846 | ERROR    | __main__:_convert_date:123 - unconverted data remains:  


2019/07/25
2018/11/20
2018/5/17
2018/5/17
2017/07/07
2015/03/16
2014/12/22
2014/05/16 
2014/04/10
Finished scraping product='FI9826P', product_url='/downloads/firmware_details.html?id=7'


2022-12-11 19:54:47.291 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=8


2018/10/16
Finished scraping product='FI9816P/FI9816P V2', product_url='/downloads/firmware_details.html?id=8'


2022-12-11 19:54:48.740 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=11


2022/4/28
Finished scraping product='C2', product_url='/downloads/firmware_details.html?id=11'


2022-12-11 19:54:50.191 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=12


2019/07/25
2018/11/19
2018/5/11
2017/6/14
Finished scraping product='FI9821EP', product_url='/downloads/firmware_details.html?id=12'


2022-12-11 19:54:51.688 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=13


2018/5/11
2016/05/26
Finished scraping product='FosBaby', product_url='/downloads/firmware_details.html?id=13'


2022-12-11 19:54:53.132 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=14


2018/5/11
2016/05/26
Finished scraping product='FosBaby P1', product_url='/downloads/firmware_details.html?id=14'


2022-12-11 19:54:54.534 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=15


2022/4/28
Finished scraping product='R4', product_url='/downloads/firmware_details.html?id=15'


2022-12-11 19:54:55.997 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=16


2019/07/25
2018/11/19
2018/5/11
2017/6/14
Finished scraping product='FI9803EP', product_url='/downloads/firmware_details.html?id=16'


2022-12-11 19:54:57.416 | DEBUG    | __main__:_open_website:26 - Opened foscam website https://www.foscam.com/downloads/firmware_details.html?id=120


2017/08/30
Finished scraping product='FI9800E', product_url='/downloads/firmware_details.html?id=120'


KeyboardInterrupt: 