<a href="https://githubtocolab.com/giswqs/geemap/blob/master/examples/notebooks/126_selenium_fetch_community_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/></a>

## Web retrieval code scripts from google earth engine

- Requires login to google account
- Blocked by default in many current [selenium](https://www.selenium.dev/) drivers
- But is still possible using the [undetected_chromedriver](https://pypi.org/project/undetected-chromedriver/)

Tested on local windows jupyter.

In [None]:
#! pip install geemap
#! pip install selenium
#! pip install undetected_chromedriver

## Open browser

- needs some extras to allow setting download location (ChromeWithPrefs)
- modify prefs download directory as required 

    prefs = {"download.default_directory": r"%\geemap\geemap\data\awesome_gee",}

In [None]:
prefs = {
        "download.default_directory": r"C:\Users\karel\geemap\geemap\geemap\data\awesome_gee",
        }

In [None]:
### https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/524#issuecomment-1058340938

import json
import os
import tempfile
from functools import reduce

import undetected_chromedriver as webdriver


class ChromeWithPrefs(webdriver.Chrome):
    def __init__(self, *args, options=None, **kwargs):
        if options:
            self._handle_prefs(options)

        super().__init__(*args, options=options, **kwargs)

        # remove the user_data_dir when quitting
        self.keep_user_data_dir = False

    @staticmethod
    def _handle_prefs(options):
        if prefs != options.experimental_options.get("prefs"):
            # turn a (dotted key, value) into a proper nested dict
            def undot_key(key, value):
                if "." in key:
                    key, rest = key.split(".", 1)
                    value = undot_key(rest, value)
                return {key: value}

            # undot prefs dict keys
            undot_prefs = reduce(
                lambda d1, d2: {**d1, **d2},  # merge dicts
                (undot_key(key, value) for key, value in prefs.items()),
            )

            # create an user_data_dir and add its path to the options
            user_data_dir = os.path.normpath(tempfile.mkdtemp())
            options.add_argument(f"--user-data-dir={user_data_dir}")

            # create the preferences json file in its default directory
            default_dir = os.path.join(user_data_dir, "Default")
            os.mkdir(default_dir)

            prefs_file = os.path.join(default_dir, "Preferences")
            with open(prefs_file, encoding="latin1", mode="w") as f:
                json.dump(undot_prefs, f)

            # pylint: disable=protected-access
            # remove the experimental_options to avoid an error
            del options._experimental_options["prefs"]    

## Manually log in

In newly opened browser window fill in

- username
- password


Leave the browser open. 

In [None]:
options = webdriver.ChromeOptions()
options.add_experimental_option("prefs", prefs)

# use the derived Chrome class that handles prefs
driver = ChromeWithPrefs(options=options)
driver.get('https://www.google.com/accounts/Login')

## Go get a cup of coffee / tea

- attempts to fetch each code example from community datasets
- plus 900 datasources (with duplicates, 140 unique ones)
- takes about 1-3 seconds per page. 
- not all can be loaded (f.e. those using https://earthenv-dot-map-of-life.appspot.com/)

In [None]:
import geemap
import os
import time

memory = {}

In [None]:
def load_all():
    
    failed = list()
    known = 0

    def get_community_examples():   
        assets = geemap.search_ee_data('', source='community')
        for asset in assets:
            asset['uri'] = asset['sample_code'].replace('https://code.earthengine.google.com/',
                                      'https://code.earthengine.google.com/scripts/public/load?id=')
        return [x for x in assets if 'google' in x['uri']]  
    
    def get_ee_code(uri, label):
       
        name = "f.json"
        path = os.path.join(prefs["download.default_directory"], name)
        
        if os.path.exists(path):
            os.remove(path)
            
        uuid = uri.replace('https://code.earthengine.google.com/scripts/public/load?id=','')         
        if uuid not in memory:
        
            driver.get(uri)       
            time.sleep(2)
            
            counter = 0   
            while not os.path.isfile(path) and counter < 6:
                counter += 1
                time.sleep(2)
            try:
                with open(path) as f:
                    script = json.load(f)['code']
                    memory[uuid] = script
                    os.remove(path)
            except:
                failed.append(y)
        else: 
            known += 1
    
    for ix, (x,y) in enumerate([(asset['uri'], asset['title']) for asset in get_community_examples()]):
        if not ix%20:
            print(f'Fetch {ix}')
        try:
            get_ee_code(x,y)
        except Exception as e:
            failed.append(y)
            raise(e)
            
    return failed, known

failed = load_all()
failed = load_all()
failed, known = load_all()

In [None]:
from collections import Counter

known, len(failed), len(set(failed)), Counter(failed)

## Unable to import these

In [None]:
{'Cloud Forest Prediction': 1,
          'Mapbiomas: Final Integrated Land Use Land Cover Maps': 1,
          'Mapbiomas: Maps of Annual water coverage Collection 6': 1,
          'Mapbiomas: Maps of Annual water frequency Collection 6': 1,
          'Mapbiomas: Maps of Deforestation Regeneration Collection 6': 1,
          'Mapbiomas: Maps of Mined substances Collection 6': 1,
          'Mapbiomas: Maps of Pasture quality Collection 6': 1,
          'Mapbiomas: Maps of annual burned coverage Collection 6': 1,
          'Mapbiomas: Maps of fire frequency Collection 6': 1,
          'Mapbiomas: Maps of irrigated agriculture': 1,
          'Mapbiomas: Maps of monthly burned coverage Collection 6': 1,
          'Mapbiomas: Maps of quality (change in coverage or use) between selected years': 1,
          'Mapbiomas: Maps of transition areas (change in coverage or use) between selected years': 1,
          'Mapbiomas: Mosaic of Landsat Images for each year of Collection 6': 1,
          'Mean annual cloud frequency': 1,
          'Mean between-year seasonality': 1,
          'Mean monthly cloud frequency': 1,
          'Rangeland-Analysis-Platform: NPP partitioned v2': 1,
          'Rangeland-Analysis-Platform: Vegetation Cover v2': 1,
          'Seasonality concentration and angle (theta)': 1,
          'Seasonality single band with color table': 1,
          'Spatial variability (1 degree SD)': 1,
          'Timing of peak seasonal cloud concentration': 1,
          'Within-year seasonality': 1}

In [None]:
len(memory), 

In [None]:
name = "community_f.json"
path = os.path.join(prefs["download.default_directory"], name)

with open(path, 'w') as f:
    f.write(json.dumps(memory))

## Update to repository

https://github.com/giswqs/geemap/data/community_f.json

## Verify usage

In [None]:
Map = geemap.Map()
Map