In [1]:
import dhlab as dh
import requests
import pandas as pd

In [2]:
base_url = "https://api.nb.no/dhlab/similarity"

In [3]:
import re
from IPython.display import HTML

def display_finds(r, num_rows, num_columns, width=500):
    """A list of urls in r is displayed in a grid layout with specified number of rows and columns."""
    base = "https://www.nb.no/items/"
    # Initialize the rows list which will contain HTML string for each row
    rows_html = []
    # Calculate total number of items to display, based on the specified rows and columns
    total_items = num_rows * num_columns
    # Ensure we don't try to display more items than we have
    r = r[:total_items]
    
    # Split the list into rows with the specified number of columns
    for row_start in range(0, len(r), num_columns):
        row_items = r[row_start:row_start+num_columns]
        # For each row, create a list of cell HTML strings
        cells_html = []
        for i, item in enumerate(row_items):
            urnstring = re.findall("URN[^/]*", item)[0]
            prefix, doctyp, urn, page = urnstring.split('_')
            cell_html = f"<td><a href='{base}{prefix}_{doctyp}_{urn}?page={int(page) + 1}' target='_'><img src='{item}' width={width}></a>{row_start+i}</td>"
            cells_html.append(cell_html)
        # Join the cell HTML strings into a row and add it to the rows list
        rows_html.append(f"<tr>{' '.join(cells_html)}</tr>")

    # Join all rows into the final HTML table
    html_table = f"<table>{' '.join(rows_html)}</table>"
    return HTML(f"""<html><head></head><body>{html_table}</body></html>""")
import numpy as np

In [4]:
def collections():
    r = requests.get(f"{base_url}/collections")
    if r.status_code == 200:
        res = r.text
    else:
        res = ""
    return res

In [5]:
def words(word=None, collection_name=None):
    params = locals()
    r = requests.get(f"{base_url}/sim_words", params=params)
    if r.status_code == 200:
        res = pd.DataFrame(r.json(), columns=['word','score'])
    else:
        res = pd.DataFrame()
    return res

In [6]:
def image(search=None, hits=10):
    params = locals()
    r = requests.get(f"{base_url}/images", params=params)
    if r.status_code == 200:
        res = r.json()
    else:
        print(r.status_code)
        res = ""
    return res

In [7]:
def sim_image(image_url=None, limit=20):
    params = locals()
    r = requests.get(f"{base_url}/sim_images", params=params)
    if r.status_code == 200:
        res = r.json()
    else:
        print(r.status_code)
        res = ""
    return res

In [8]:
ims = image("eskimo", hits=20)

In [9]:
display_finds([x[0] for x in ims.values()],10,4)

0,1,2,3
0,1,2,3
4,5,6,7
8,9,10,11
12,13,14,15
16,17,18,19


In [10]:
sim_image(image_url='https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2009060503005_0292/462,925,1263,1383/315,/0/default.jpg', limit=10)

[['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2009060503005_0292/462,925,1263,1383/315,/0/default.jpg',
  0.99999994],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2013040824075_0645/358,874,1280,1403/320,/0/default.jpg',
  0.97438085],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2009101212001_0459/360,784,1259,1395/314,/0/default.jpg',
  0.9618212],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2009101910001_0639/457,722,1266,1384/316,/0/default.jpg',
  0.95577365],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2009101212001_0457/262,923,1445,1193/361,298/0/default.jpg',
  0.88135064],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2016052707018_0453/429,594,1426,1995/356,/0/default.jpg',
  0.88058984],
 ['https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2015032508049_0087/432,846,1091,1410/272,/0/default.jpg',
  0.8784076],
 ['https://www.nb.n

In [11]:
collections()

'vss_1850_cosvss_dewey_codevss_1800_cosvss_deweyimages_1900_cos'

In [13]:
words(word='Hottentotterne', collection_name="vss_1850_cos", limit=5)

TypeError: words() got an unexpected keyword argument 'limit'

In [20]:
for x in "Hottentotterne Kystbyen".split():
    print(x,
', '.join([x['word'] for _, x in words(x, collection_name='vss_1850_cos').iterrows()]))

Hottentotterne Hottentotterne, Negerne, Kafferne, Malayerne, Buskmændene, Eskimoerne, Kirgiserne, Eskimoer, Hottentotter, Baskerne, Negrene, Mongoler, Hinduerne, Arabere, Samojederne, Beduinerne, Indierne, Kalmukkerne, Indianerne, Slaverne
Kystbyen Kystbyen, Sabinerbjærgene, Nanterre, Syracusa, NyMexiko, Pueblo, Mowa, Torres-Strædet, Launceston, Port-Said, Sjenin, Gabes, Tripolitza, Sciacca, Aigues-Mortes, Manyanga, Miramichi, Minatitlan, Kajrasol, Gabon
