## Konkordanser for NAOB mot Bokhylla

Versjon: **Mai 2019**

Velg `Run all` fra Cell-menyen over. 


Søkeord er forskjellig med stor og liten forbokstav. Forfatter og tittel kan trunkeres, for eksempel *Agnar Mykle* kan skrives som *mykl*. 

In [1]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import pandas as pd
import requests
import inspect
from IPython.display import HTML

def get_konk(word, params=None, kind='html'):
    if params is None:
        params = {}

    para = params
    para['word']= word

    corpus = 'bok'
    if 'corpus' in para:
        corpus = para['corpus']
    else:
        para['corpus'] = corpus
        
    r = requests.get('https://api.nb.no/ngram/konk', params=para)
    if kind=='html':
        rows = ""
        row_template = ("<tr>"
                        "<td><a href='{urn}?searchText={kw}' target='_'>{urnredux}</a></td>"
                        "<td>{b}</td>"
                        "<td>{w}</td>"
                        "<td style='text-align:left'>{a}</td>"
                        "</tr>\n")
        if corpus == 'bok':
            for x in r.json():
                rows += row_template.format(
                    kw = word,
                    urn=x['urn'],
                    urnredux=','.join([x['author'], x['title'], str(x['year'])]),
                    b=x['before'],
                    w=x['word'],
                    a=x['after'])
        else:
            #print(r.json())
            for x in r.json():
                rows += row_template.format(
                    kw = word,
                    urn=x['urn'],
                    urnredux='-'.join(x['urn'].split('_')[2:6:3]),
                    b=x['before'],
                    w=x['word'],
                    a=x['after'])
        res = "<table>{rows}</table>".format(rows=rows)
        res = HTML(res)
    elif kind == 'json':
        res = r.json()
    else:
        try:
            if corpus == 'bok':
                res = pd.DataFrame(r.json())
                res = res[['urn','author','title','year','before','word','after']]
            else:
                res = pd.DataFrame(r.json())
                res = res[['urn','before','word','after']]
            
        except:
            res= pd.DataFrame()
        #r = r.style.set_properties(subset=['after'],**{'text-align':'left'})
    return res

def get_urnkonk(word, params=None, html=True):
    if params is None:
        params = {}

    para = params
    para['word']= word
    r = requests.post('https://api.nb.no/ngram/urnkonk', json = para)
    if html:
        rows = ""
        for x in r.json():
            rows += """<tr>
                <td>
                    <a href='{urn}?searchText={kw}' target='_blank' style='text-decoration:none'>{urnredux}</a>
                </td>
                <td>{b}</td>
                <td>{w}</td>
                <td style='text-align:left'>{a}</td>
            </tr>\n""".format(kw=word,
                              urn=x['urn'],
                              urnredux="{t}, {f}, {y}".format(t=x['title'], f=x['author'], y=x['year']),
                              b=x['before'],
                              w=x['word'],
                              a=x['after']
                             )
        res = """<table>{rows}</table>""".format(rows=rows)
        res = HTML(res)
    else:
        res = pd.DataFrame(r.json())
        res = res[['urn','before','word','after']]
        #r = r.style.set_properties(subset=['after'],**{'text-align':'left'})
    return res

def refine_book_urn(urns = None, words = None, author = None, 
             title = None, ddk  = None, subject = None, period=(1100, 2020), gender=None, lang = None, trans= None, limit=20 ):

    """Refine URNs for books with metadata"""
    
    # if empty urns nothing to refine
    
    if urns is None or urns == []:
        return []
    
    # check if urns is a metadata list, and pick out first elements if that is the case
    if isinstance(urns[0], list):
        urns = [x[0] for x in urns]
        
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    query = {i:values[i] for i in args if values[i] != None and i != 'period' and i != 'urns'}
    query['year'] = period[0]
    query['next'] = period[1] - period[0]
    #print(query)
    return refine_urn(urns, query)

def refine_urn(urns, metadata=None):
    """Refine a list urns using extra information"""
    if metadata is None:
        metadata = {}
    metadata['urns'] = urns
    if not ('words' in metadata):
        metadata['words'] = []
    if not ('next' in metadata or 'neste' in metadata):
        metadata['next'] = 520
    if not 'year' in metadata:
        metadata['year'] = 1500
    r = requests.post('https://api.nb.no/ngram/refineurn', json=metadata)
    return r.json()

def get_urns_from_text(document):
    """Find all URNs in a text-file"""
    
    import re

    with open(document) as fp:
        text = fp.read()
    #print(text)
    return re.findall("[0-9]{13}", text)

def code_toggle(button_text = "klikk"):
    from IPython.display import HTML, display

    display(
        HTML(
        '''<div>
                <style>
                 .mybutton {
                    background-color: white;
                    border: none;
                    color: white;
                    padding: 1px 1px;
                    text-align: center;
                    text-decoration: none;
                    display: inline-block;
                    font-size: 8px;
                    margin: 2px 1px;
                    cursor: pointer;
                }
            </style>
            <script>
                code_show=true; 
                function code_toggle() {
                 if (code_show){
                 $('div.input').hide();
                 } else {
                 $('div.input').show();
                 }
                 code_show = !code_show
                } 
                $( document ).ready(code_toggle);
            </script>
            <form  action="javascript:code_toggle()">
                <input class='mybutton' type="submit" value=''' + '"'  + button_text + '"' + '''>
            </form>
        </div>'''
        ))

In [2]:
code_toggle()

In [3]:
res = dict()

In [4]:
naob = get_urns_from_text("./naob_korpus.txt")
naobutvid = get_urns_from_text("./Litterære kilder tilleggsliste til UBB og NB 20190328.csv")
NAOB = naob + naobutvid

In [5]:
def konk(søkeord='titt-tei', forfatter ='', tittel ='', 
         periode =widgets.IntRangeSlider(min=1920, max= 2015, value=[1950, 2000]), 
         før = (0,12), etter = (0,12),  grense = (0,10)):
    if forfatter == '':
        forfatter = "%"
    if tittel == '':
        tittel = "%"
    urner = refine_book_urn(urns = NAOB, words=[søkeord], author = "%{f}%".format(f=forfatter) , title= "%{t}%".format(t=tittel), period=periode)
    res[søkeord] = get_urnkonk(søkeord, {'urns': [u[0] for u in urner],'corpus':'bok', 'before':før, 'after':etter, 'size':grense})
    return res[søkeord]


im = interact_manual(konk);
im.widget.children[7].description = 'Hent konkordanser!'

interactive(children=(Text(value='titt-tei', description='søkeord'), Text(value='', description='forfatter'), …