In [None]:
import os
import pandas as pd

from annif_client import AnnifClient

annif = AnnifClient(api_base='https://api.annif.org/v1/')


In [None]:
# Set paths
path = "."
os.chdir(path)
data_path = path + "\\data"
output_path = path + "\\outputs"
fig_path = path + "\\figs"

In [None]:
# Look at languages
for project in annif.projects:
    print("Project id: {:<16} lang: {}  name: {}".format(
        project['project_id'], project['language'], project['name']))

In [None]:
# Set project id
project_id='yso-en'

In [None]:
# Try on text
text = 'The quick brown fox jumped over the lazy dog'
results = annif.suggest(project_id=project_id, text=text)

In [None]:
# Show results
print("URI\t\t\t\t\tscore\tlabel")
for result in results:
    print("<{}>\t{:.4f}\t{}".format(result['uri'], result['score'], result['label']))

In [None]:
# Import data
filename = "working_data_sans_dewey.csv"
output_filename = "working_data_sans_dewey_with_annif.csv"
df = pd.read_csv(os.path.join(data_path, filename), index_col=0)
print("Dimension of dataframe: ", df.shape)

In [None]:
# Try on 1 notice
text = df.loc[2, "DESCR"]
text

In [None]:
results = annif.suggest(project_id=project_id, text=text)
print("URI\t\t\t\t\tscore\tlabel")
for result in results:
    print("<{}>\t{:.4f}\t{}".format(result['uri'], result['score'], result['label']))

In [None]:
def store_annif_results(results):
    label = []
    score = []
    for result in results:
        label.append(result['label'])
        score.append("{:.4f}".format(result['score']))

    return label, score

In [None]:
def get_annif_results(text, project_id='yso-en'):
    results = annif.suggest(project_id=project_id, text=text)
    return results

In [None]:
def predict_from_annif(text, project_id='yso-en', output="labels"):
    results = get_annif_results(text, project_id)
    labels, scores = store_annif_results(results)
    if output == 'labels':
        return labels
    else:
        return scores

In [None]:
df_annif = df.copy()
df_annif["annif_labels"] = df_annif["DESCR"].apply(lambda x: predict_from_annif(x, output="labels"))
df_annif["annif_scores"] = df_annif["DESCR"].apply(lambda x: predict_from_annif(x, output="scores"))

In [None]:
df_annif.to_csv(os.path.join(data_path, output_filename))