%run ../ipynb_util_tars.py

In [1]:
import os

from qdrant_client import QdrantClient, models

client = QdrantClient(
    os.getenv("QDRANT_API_URL"),
    port=os.getenv("QDRANT_API_PORT"),
    api_key=os.getenv("QDRANT_API_KEY"),
)



In [20]:
# scroll over points that HAVE labels ("labels" field is not null)
scroll_result = client.scroll(
    collection_name="publications",
    limit=500,
    with_payload=["id", "labels", "xai", "description"],
    scroll_filter=models.Filter(
        must_not=[
            models.IsNullCondition(
                is_null=models.PayloadField(key="labels")
            )
        ]
    )
)
print(len(scroll_result[0]))

for point in scroll_result[0]:
    if len(point.payload["xai"]) != 1:
        print("huh", point.payload["id"])

    shap_values = point.payload["xai"][0]["xai_values"]["token_scores"]
    # check if each value is in the range [-1, 1]
    # the shap_values are of size [len(tokens), 17]
    for token_scores in shap_values:
        for score in token_scores:
            if score < -0.1 or score > 0.1:
                print(point.payload["id"])
                break

print()
res = scroll_result[0][5]
print(res.payload["id"])
print(res.payload["labels"])
print(res.payload["xai"][0]["predicted_label"])
print(res.payload["xai"][0]["probs"])
print(res.payload["description"])

384
oai:www.zora.uzh.ch:125412
oai:www.zora.uzh.ch:125412
oai:www.zora.uzh.ch:143565
oai:www.zora.uzh.ch:148179
oai:www.zora.uzh.ch:148179
oai:www.zora.uzh.ch:151857

oai:www.zora.uzh.ch:87341
{'sdg12': {'ts': 1703092276, 'user': 'expert', 'val': 1}}
16
[[0.007794328965246677, 0.007933328859508038, 0.011707817204296587, 0.003974159713834524, 0.015394791960716248, 0.00841787550598383, 0.00809470098465681, 0.030228832736611366, 0.02986086718738079, 0.1111711859703064, 0.02253444492816925, 0.05184902995824814, 0.020818442106246948, 0.012820121832191944, 0.016828157007694244, 0.5926969647407532, 0.04787500575184822]]
Transnational governance schemes (TGSs) are interorganizational networks of public and/or private actors that jointly regulate global public policy issues, such as the prevention of human rights violations and the protection of ecosystems. Considering that TGSs mainly address issues of public concern, the general public represents a major source of legitimacy in transnational 

In [None]:
xai_out_dict = []

for point in scroll_result[0]:
    xai_out_dict.append(
        {
            "id": point.payload["id"],
            "xai": point.payload["xai"],
        }
    )

In [None]:
# save xai_out_dict on disk as json
# import json

# with open("xai_out_dict.json", "w") as f:
#    json.dump(xai_out_dict, f)


In [3]:
points_with_labels_count = client.count(
    collection_name="publications",
    count_filter=models.Filter(
        must_not=[
            models.IsNullCondition(
                is_null=models.PayloadField(key="labels")
            )
        ]
    ),
    exact=True
)
print(points_with_labels_count)

sdgs = [f"sdg{i}" for i in range(1, 18)]
sdg_counts = {}

for sdg in sdgs:
    result = client.count(
        collection_name="publications",
        count_filter=models.Filter(
            must_not=[
                models.IsEmptyCondition(
                    is_empty=models.PayloadField(key=f"labels.{sdg}")
                )
            ]
        ),
        exact=True
    )
    sdg_counts[sdg] = result.count

print(sdg_counts)
print(sum(sdg_counts.values()))

count=384
{'sdg1': 6, 'sdg2': 13, 'sdg3': 43, 'sdg4': 2, 'sdg5': 22, 'sdg6': 3, 'sdg7': 17, 'sdg8': 26, 'sdg9': 13, 'sdg10': 31, 'sdg11': 3, 'sdg12': 30, 'sdg13': 41, 'sdg14': 14, 'sdg15': 79, 'sdg16': 34, 'sdg17': 7}
384


In [None]:
assert sum(sdg_counts.values()) == points_with_labels_count.count

In [None]:
client.count(
    collection_name="publications",
    count_filter=models.Filter(
        must_not=[
            models.IsEmptyCondition(
                is_empty=models.PayloadField(key=f"xai")
            )
        ]
    ),
    exact=True
)

CountResult(count=384)