# ESGF Data Search

Please report any [issues](https://github.com/esgf-nimbus/getting_started/issues/new?labels=bug,esgf-search) or [suggestions](https://github.com/esgf-nimbus/getting_started/issues/new?labels=suggestion,esgf-search) for this Jupyter Notebook.

In [1]:
import configparser
import math

import requests

class ESGFSearchFacets:
    def __init__(self):
        self.base_url = 'https://raw.githubusercontent.com/ESGF/config/master/search-configs/search'
        self.presets = {
            'all': '',
            'cc4e': 'cc4e',
            'cmip5': 'cmip5',
            'cmip6': 'cmip6',
            'cordex': 'cordex',
            'input4mips': 'input4mips',
            'isimip-ft': 'isimip-ft',
            'obs4mips': 'obs4mips',
            'specs': 'specs',
        }
        
        self.cache = {}
        
    def get_config(self, name):
        if name == '':
            url = '{base_url}.cfg'
        else:
            url = '{base_url}_{variant}.cfg'
            
        url = url.format(base_url=self.base_url, variant=name)
        
        response = requests.get(url)
        
        config = configparser.ConfigParser()
        
        try:
            config.read_string(response.text)
        except configparser.MissingSectionHeaderError:
            return None
        
        return config
    
    def load_config(self, name):
        config = self.get_config(name)

        data = {}

        for s in config.sections():
            if s == 'GLOBAL':
                continue

            title = s.split('=')[1]

            values = [config[s][x].split('|')[0] for x in config[s]]

            if title in data:
                data[title].extend(values)
            else:
                data[title] = values

        return data

    def get_facets(self, name, include_counts=False, overwrite=False, **kwargs):
        if name in self.cache and not overwrite:
            return self.cache[name]

        search_params = {
            'format': 'application/solr+json',
            'limit': '0',
            'project': [],
            'facets': [],
        }

        config = self.load_config(name)

        for x_name, x in config.items():
            search_params['facets'].extend(x)

        search_params['facets'] = ','.join(search_params['facets'])

        search_params.update(kwargs)
        
        search_url = 'https://esgf-node.llnl.gov/esg-search/search'

        response = requests.get(search_url, params=search_params)
        
        try:
            fields = response.json()['facet_counts']['facet_fields']
        except KeyError:
            return None
        
        if not include_counts:
            fields = dict((x, y[::2]) for x, y in fields.items())

        if name not in self.cache or overwrite:
            self.cache[name] = fields

        return fields


class ESGFSearch:
    def __init__(self):
        self.base_url = 'https://esgf-node.llnl.gov/esg-search/search'
        self.numFound = 0
        self.page = 0
        self.items_per_page = 10
        self.default_params = {
            'format': 'application/solr+json',
            'type': 'File',
        }
        self.user_params = {}
        self.cache = {}

    @property
    def pages(self):
        return math.ceil(self.numFound / self.items_per_page)

    def parse_results(self, result):
        self.numFound = result['response']['numFound']

        return result['response']['docs']

    def search(self, **kwargs):
        self.user_params = kwargs.copy()
        self.user_params.update(self.default_params)

        if 'offset' not in self.user_params:
            self.user_params['offset'] = 0

        return self._search(self.user_params)

    def _search(self, kwargs):
        kwargs['limit'] = self.items_per_page
            
        response = requests.get(self.base_url, params=kwargs)
        
        if response.ok:
            data = self.parse_results(response.json())
        else:
            data = response.text
            
        return data
    
    def next(self):
        if self.page + 1 > self.pages:
            raise Exception('Your past the last page')
            
        self.page += 1
        
        self.user_params['offset'] = self.page*self.items_per_page
        
        return self._search(self.user_params)
    
    def previous(self):
        if self.page - 1 < 0:
            raise Exception('Your past the first page')
        
        self.page -= 1
        
        self.user_params['offset'] = self.page*self.items_per_page
        
        return self._search(self.user_params)

In [2]:
import pandas as pd
import IPython
from IPython.display import display, Javascript
import ipywidgets as widgets
import nbformat
from uuid import uuid4

facets = ESGFSearchFacets()

esgf = ESGFSearch()

df = None

preset_dd = widgets.Dropdown(options=facets.presets.items(), description='Presets')
facet_dd = widgets.Dropdown(description='Facets')
items_dd = widgets.Dropdown(description='Items', options=[10, 25, 50, 100, 500, 1000])
value_ms = widgets.SelectMultiple(description='Values', rows=10)
select_ms = widgets.SelectMultiple(description='Selected')
field_ms = widgets.SelectMultiple(description='Fields')

add_btn = widgets.Button(description='Add', layout=widgets.Layout(left='auto'))
remove_btn = widgets.Button(description='Remove')
search_btn = widgets.Button(description='Search')
next_btn = widgets.Button(description='Next')
prev_btn = widgets.Button(description='Previous')
copy_btn = widgets.Button(description='Copy Notebook')

page_label = widgets.Label()

search_result = widgets.Output()
log = widgets.Output()

def disable_controls():
    search_btn.disabled = True
    facet_dd.disabled = True
    value_ms.disabled = True
    
def enable_controls():
    search_btn.disabled = False
    facet_dd.disabled = False
    value_ms.disabled = False

@log.capture()
def load_facet_values(change):
    f = facets.get_facets(preset_dd.value)
    
    value_ms.options = f[change['new']]

def _load_facets(f):
    facet_dd.options = f.keys()
    
    load_facet_values({'new': facet_dd.value})
    
@log.capture()
def load_facets(change):
    disable_controls()
    
    select_ms.options = tuple()
    
    f = facets.get_facets(change['new'])
    
    _load_facets(f)
    
    enable_controls()
    
@log.capture()
def add_facet_values(b):
    disable_controls()
    
    add_values = []
    
    for x in value_ms.value:
        item = '{}={}'.format(facet_dd.value, x)
        
        if item in select_ms.options:
            continue
            
        add_values.append(item)
    
    select_ms.options = select_ms.options + tuple(add_values)
    
    enable_controls()
    
@log.capture()
def remove_facet_values(b):
    select_ms.options = tuple(x for x in select_ms.options if x not in select_ms.value)

def search_facets_to_dict(items):
    values = {}
    
    for x in items:
        facet, value = x.split('=')
        
        if facet in values:
            values[facet].append(value)
        else:
            values[facet] = [value]
            
    return dict((x, ','.join(y)) for x, y in values.items())

def display_results(result):
    global df
    
    page_label.value = '{} - {} ({})'.format(esgf.page, esgf.pages, esgf.numFound)
    
    df = pd.DataFrame(result)
    
    search_result.clear_output()
    
    with search_result:
        display(df)

@log.capture()
def search(b):
    facet_values = search_facets_to_dict(select_ms.options)
    
    result = esgf.search(**facet_values)
    
    display_results(result)
    
@log.capture()
def next(b):
    result = esgf.next()
    
    display_results(result)
    
@log.capture()
def previous(b):
    result = esgf.previous()
    
    display_results(result)
    
@log.capture()
def update_items_per_page(change):
    esgf.items_per_page = change['new']
    
    if df is None:
        return
    
    disable_controls()
    
    search(None)
    
    enable_controls()
    
@log.capture()
def copy_notebook(b):
    notebook_url = 'https://raw.githubusercontent.com/esgf-nimbus/getting_started/master/esgf_search.ipynb'
    
    response = requests.get(notebook_url, stream=True)
    
    nb = nbformat.reads(response.text, as_version=4)
    
    output_name = 'esgf_search_{}.ipynb'.format(str(uuid4())[:8])
    
    with open(output_name, 'w') as outfile:
        nbformat.write(nb, outfile)
        
    display(Javascript('alert("You will be redirect to the new notebook.")'))
    
    display(Javascript('window.location="{}lab/tree/getting_started/{}"'.format(os.environ['JUPYTERHUB_SERVICE_PREFIX'], output_name)))
    
copy_btn.on_click(copy_notebook)
    
preset_dd.observe(load_facets, names='value')
    
facet_dd.observe(load_facet_values, names='value')
  
items_dd.observe(update_items_per_page, names='value')
    
add_btn.on_click(add_facet_values)
    
remove_btn.on_click(remove_facet_values)
   
search_btn.on_click(search)

next_btn.on_click(next)

prev_btn.on_click(previous)
    
load_facets({'new': preset_dd.options[0][1]})

header = widgets.Output()
header.append_display_data(IPython.display.Markdown('''
# ESGF Search

This is a simple ESGF search interface. 

Select the facets you would like to add to the search. Next click the `Search` button and the results will display below. Use the `Next` and `Previous` buttons to navigate results.

The results are stored in the variable `df` which is a `pandas.DataFrame`. You can further manipulate the results, refer to `pandas` [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html) documentation.

You can convert the results back to json with `df.to_json(orient='records')`.
'''))

page_box = widgets.HBox(children=[prev_btn, page_label, next_btn])

_nav_box = widgets.HBox(children=[page_box, items_dd])

_nav_box2 = widgets.HBox(children=[search_btn, copy_btn])

nav_box = widgets.HBox(children=[_nav_box2, _nav_box])
nav_box.layout.justify_content = 'space-between'

add_btn.layout.align_self = 'flex-end'

facet_box = widgets.VBox(children=[preset_dd, facet_dd, value_ms, add_btn])

remove_btn.layout.align_self = 'flex-end'

selected_box = widgets.VBox(children=[select_ms, remove_btn])

select_box = widgets.HBox(children=[facet_box, selected_box])
select_box.layout.justify_content = 'flex-start'

main = widgets.VBox(children=[header, select_box, nav_box, search_result, log])
main.layout.width = '100%'

main

VBox(children=(Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<IPython.core.display.M…