In [1]:
import database
import traceback
import sys
from snowballing.operations import load_work, reload, work_by_varname
from snowballing.operations import work_by_varname, set_display, extract_info, set_place
from snowballing.selenium_scholar import SeleniumScholarQuerier
from snowballing.scholar import ScholarConf
from snowballing.selenium_scholar import SearchScholarQuery
from snowballing.dbmanager import insert, set_attribute

from snowballing.snowballing import ArticleNavigator
from ipywidgets import HBox, VBox, IntSlider, ToggleButton, Text, Layout
from ipywidgets import Dropdown, Button, Tab, Label, Textarea, Output
from IPython.display import clear_output, display, HTML, Javascript

import pandas as pd


In [2]:
querier = SeleniumScholarQuerier()
querier.apply_settings(10, 4)

[ INFO]  settings applied


<snowballing.selenium_scholar.SeleniumScholarQuerier at 0x21a674a4ef0>

In [3]:
df = pd.read_excel("todo_francisco.xlsx")

In [4]:
class FindNames(ArticleNavigator):
    """Widget for curating database"""

    def __init__(self, querier, df, force=False, debug=False, index=0):
        reload()
        self.matrix = df.as_matrix()
        self.force = force
        self.querier = querier
        self.paper_name = Text(layout=Layout(width="99%"))
        self.next_page_widget = Button(description='Next Work', icon='fa-arrow-right')
        self.reload_widget = Button(description='Reload', icon='fa-refresh')
        self.previous_page_widget = Button(description='Previous Work', icon='fa-arrow-left')
        self.debug_widget = ToggleButton(value=debug, description="Debug")
        self.textarea_widget = ToggleButton(value=True, description="Refresh")
        self.page_number_widget = Label(value="")
        self.output_widget = Output()
        self.next_page_widget.on_click(self.next_page)
        self.reload_widget.on_click(self.reload)
        self.previous_page_widget.on_click(self.previous_page)
        self.textarea_widget.observe(self.show)
        self.view = VBox([
            self.paper_name,
            HBox([
                self.previous_page_widget,
                self.reload_widget,
                self.next_page_widget,
                self.debug_widget,
                self.textarea_widget,
                self.page_number_widget
            ]),
            self.output_widget
        ])
        self.index = index
        self.name = ""
        self.articles = []
        self.work = None
        self.citation_var = ""
        self.set_attrs = []
        self.reload(show=False)
        


    def next_page(self, b):
        """Go to next page"""
        self.index = min(len(self.matrix) - 1, self.index + 1)
        self.reload(b)

    def previous_page(self, b):
        """Go to previous page"""
        self.query = max(0, self.index - 1)
        self.reload(b)

    def set_index(self):
        """Set page index"""
        self.page_number_widget.value = str(self.index)
        self.next_page_widget.disabled = self.index == len(self.matrix) - 1
        self.previous_page_widget.disabled = self.index == 0

    def show(self, b=None):
        """Show comparison"""
        self.output_widget.clear_output()
        with self.output_widget:
            print(self.name)
            if not self.articles:
                print("<unknown>")
                return
            try:
                article = self.articles[0]
                if 'div' in article:
                    self.show_article_html(article['div'])
                else:
                    print(article['name'])
                for article, nwork, info in self.valid_articles([article], show=True):
                    for attr in self.set_attrs:
                        info[attr] = "1"
                    self.show_article(article, nwork, info)
            except:
                traceback.print_exc(file=sys.stdout)

    def reload(self, b=None, show=True):
        """Reload"""
        self.output_widget.clear_output()
        with self.output_widget:
            if self.debug_widget.value:
                ScholarConf.LOG_LEVEL = 3
            else:
                ScholarConf.LOG_LEVEL = 2
            reload()
            self.querier.tasks.clear()

            if self.index >= len(self.matrix):
                self.set_index()
                return
            matrix_row = self.matrix[self.index]
            self.paper_name.value = self.name = matrix_row[0]
            self.set_attrs = matrix_row[2].split(", ")
            print(self.name)
            query = SearchScholarQuery()

            query.set_scope(False)
            query.set_words(self.name)
            query.set_num_page_results(1)
            self.querier.send_query(query)

            self.articles = list(map(extract_info, self.querier.articles))
        if show:
            self.show()

        self.set_index()

    def browser(self):
        """Present widget"""
        self.show()
        return self.view

    def _ipython_display_(self):
        """ Displays widget """
        self.show()
        display(self.view)

find = FindNames(querier, df)

In [5]:
find.view

A Jupyter Widget

In [None]:
# Temp
insert('''
gupta2009a = DB(Work(
    2009, "A case study comparing defect profiles of a reused framework and of applications reusing it",
    display="gupta",
    authors="Gupta, Anita and Li, Jingyue and Conradi, Reidar and Rønneberg, Harald and Landre, Einar",
    place=ESE,
    pp="227--255",
    entrytype="article",
    volume="14",
    number="2",
    publisher="Springer",
    ID="gupta2009case",
    cluster_id="8288715595896582988",
    scholar="http://scholar.google.com/scholar?cites=8288715595896582988&as_sdt=2005&sciodt=0,5&hl=en",
    springer2015="1",
))
''');

In [6]:
len(df)

288