In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, WebDriverException, TimeoutException
import time
import os
import shutil
from Constants import months_to_download 
from Constants import stations_mapping
from Utils import rename_downloaded_file


# Clearing the browser cache and cookies ensures that the test environment is reset. 
# This helps to prevent issues such as the website skipping login steps or other flow interruptions that might occur due to cached data or cookies
def clear_cache_and_cookies(driver):
    driver.get("about:blank")
    driver.execute_cdp_cmd('Network.clearBrowserCache', {})
    print("Browser cache cleared.")
    driver.delete_all_cookies()
    print("Browser cookies cleared.")


# Logging into the website
def initialize_and_login_chrome_driver(download_directory):
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_experimental_option("prefs", {
            "download.default_directory": download_directory,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing.enabled": True
        })
        
        driver = webdriver.Chrome(options=chrome_options)
        
        clear_cache_and_cookies(driver)
        
        driver.get('https://dataonline.bmkg.go.id/data_iklim')
        print("Navigated to BMKG website")

        language = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/button/i")))
        language.click()
        print("Language button clicked")

        english = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/ul/li[2]/button/i")))
        english.click()
        print("English language selected")

        email = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[1]/div/input")
        email.click()
        email.send_keys('noy@vespertool.com')
        print("Email entered")

        passw = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[2]/div/input")
        passw.click()
        passw.send_keys('IndonesiaWeather15')
        print("Password entered")

        capcha = driver.find_element(By.ID, "captcha")
        capcha.send_keys('X')
        print("Captcha entered")

        time.sleep(10)

        sign_in = driver.find_element(By.XPATH,"//button[@type='submit' and contains(@class, 'btn btn-success pull-right') and contains(text(), 'Sign in')]")
        sign_in.click()
        print("Sign in clicked")

        time.sleep(15)
     
        return driver
    # except TimeoutException as e:
        # print(f"Error initializing and logging into the driver: Timeout occurred - {e}")
    except NoSuchElementException as e:
        print(f"Error initializing and logging into the driver: Element not found - {e}")
    except Exception as e:
        print(f"Error initializing and logging into the driver: {e}")
    return None


# The website is buggy and hard refresh is needed sometimes
def hard_refresh(driver):
    driver.get(driver.current_url + "?cache=false")



#Here we select a meteorological station for which we need data
def select_station(driver, station_name):
    max_attempts = 20
    attempt = 0

    while attempt < max_attempts:
        try:
            select_data = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "//a[contains(text(), 'Climate Data') and @data-hover='dropdown' and @data-toggle='dropdown']")))
            select_data.click()

            select_daily = driver.find_element(By.XPATH, "//a[@href='https://dataonline.bmkg.go.id/data_iklim' and text()='Daily Data']")
            select_daily.click()
            
            station_type_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[1]/span/span[1]/span/span[1]")))
            station_type_select.click()

            upt_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/span/span/span[2]/ul/li[2]")))
            upt_option.click()

            parameter_checkboxes = driver.find_elements(By.XPATH, "//input[@type='checkbox' and @name='parameter[]']")
            for checkbox in parameter_checkboxes:
                checkbox.click()

            station_name_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[5]/span/span[1]/span/span[1]")))
            station_name_select.click()

            station_name_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/span/span/span[1]/input")))
            station_name_input.send_keys(station_name)

            station_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, f"//li[contains(text(), '{station_name}')]")))
            station_option.click()

            print(f"Station {station_name} selected successfully")
            break
        except (NoSuchElementException, WebDriverException) as e:
            print(f"Station selection attempt {attempt + 1} failed: {e}. Performing a hard refresh and retrying...")
            hard_refresh(driver)
            attempt += 1

    if attempt == max_attempts:
        print(f"All station selection attempts exhausted. Exiting without successful station selection.")

#Once the right data is selected, we download it 
def download_data(driver, from_date_str, to_date_str, download_directory, station_name, max_download_attempts=20):
    attempt = 0

    while attempt < max_download_attempts:
        try:
            from_date = driver.find_element(By.ID, 'from')
            from_date.clear()
            from_date.send_keys(from_date_str)

            to_date = driver.find_element(By.ID, 'to')
            to_date.clear()
            to_date.send_keys(to_date_str)

            body = driver.find_element(By.TAG_NAME, 'body')
            body.click()

            process_button = WebDriverWait(driver, 30).until(
                EC.element_to_be_clickable((By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[7]/div/div/button/span[1]")))
            process_button.click()
            process_button.click()

            time.sleep(20)

            
            excellent_rating_1 = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[3]/form/div[1]/div/div[1]/ul/a[5]/i")))
            excellent_rating_1.click()

            easiness_label = driver.find_element(By.XPATH, "//label[contains(text(), 'Easiness of data access')]")
            excellent_rating_2 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_2.click()

            completeness_data = driver.find_element(By.XPATH, "//label[contains(text(), 'Data has relevancy with your work')]")
            excellent_rating_3 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_3.click()

            data_online = driver.find_element(By.XPATH, "//label[contains(text(), 'DataOnline services help your work ')]")
            excellent_rating_4 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_4.click()


            send_selection = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//i[@class='fa fa-send-o']")))
            send_selection.click()

            time.sleep(20)

            download_button = WebDriverWait(driver, 30).until(
                EC.element_to_be_clickable((By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[4]/div/form/button[1]")))
            download_button.click()

            time.sleep(10)

            rename_downloaded_file(download_directory, from_date_str, to_date_str, station_name)

            print("Download successful")
            break
        except (NoSuchElementException, WebDriverException, Exception) as e:
            print(f"Download attempt {attempt + 1} failed: {e}. Refreshing the page and retrying...")
            driver.execute_script("location.reload();")
            attempt += 1

            select_station(driver, station_name)

    if attempt == max_download_attempts:
        print(f"All download attempts exhausted. Exiting without successful download.")


def main():
    driver = None

    try:
        for station_name, download_directory in stations_mapping.items():
            driver = initialize_and_login_chrome_driver(download_directory)

            if driver:
                select_station(driver, station_name)


                for from_date_str, to_date_str in months_to_download:
                    download_data(driver, from_date_str, to_date_str, download_directory, station_name)

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if driver:
            driver.quit()
            print("Chrome driver closed successfully.")

if __name__ == "__main__":
    main()
