In [5]:
from pymongo import MongoClient


MONGO_HOST = 'localhost:27017'
MONGO_USER = ''
MONGO_PASS = ''


# Example usage:
client = MongoClient(MONGO_HOST)
db = client.cache
# collection = db.cache

In [None]:
from functools import wraps
from typing import Callable
from pymongo.collection import Collection


class MongoCache:
    def __init__(self, collection, max_size: int):
        assert isinstance(collection, Collection)
        self.collection = collection
        self.max_size = max_size

    def get(self, key):
        return self.collection.find_one({'key': key})

    def set(self, key, value):
        if self.collection.count_documents({}) >= self.max_size:
            self._remove_oldest()
        self.collection.insert_one({'key': key, 'value': value})

    def delete(self, key):
        self.collection.delete_one({'key': key})

    def _remove_oldest(self):
        doc = self.collection.find_one(sort=[('_id', -1)])
        if doc:
            self.collection.delete_one({'_id': doc['_id']})


def mongo_cache(cache: MongoCache):

    def decorator(func: Callable) -> Callable:

        @wraps(func)
        def wrapper(*args, **kwargs):
            key = str(args) + str(kwargs)
            result = cache.get(key)
            if result is not None:
                return result['value']

            result = func(*args, **kwargs)
            cache.set(key, result)
            return result

        def _set_cache(key, value):
            cache.set(key, value)

        def _get_cache(key):
            return cache.get(key)

        def _delete_cache(key):
            cache.delete(key)

        wrapper._cache = cache
        wrapper._set_cache = _set_cache
        wrapper._get_cache = _get_cache
        wrapper._delete_cache = _delete_cache
        return wrapper

    return decorator

In [103]:
from inspect import signature
from functools import wraps

from pymongo.collection import Collection


class MongoCache:
    def __init__(self, collection, max_size: int=1024):
        assert isinstance(collection, Collection)
        assert isinstance(max_size, int)
        assert max_size >= 0
        self.collection = collection
        self.max_size = max_size

    def get(self, key: str):
        return self.collection.find_one({ 'key': key })

    def set(self, key: str, value):
        if self.max_size and self.collection.count_documents({}) >= self.max_size:
            self._remove_oldest()
        self.collection.insert_one({ 'key': key, 'value': value })

    def delete(self, key: str):
        self.collection.delete_one({ 'key': key })

    def _remove_oldest(self):
        doc = self.collection.find_one(sort=[('_id', 1)])
        if doc:
            self.collection.delete_one({ '_id': doc['_id'] })

    def fifo_cache(self, using=None):
        def decorator(func):
            @wraps(func)
            def wrapper(*args, **kwargs):
                sign = signature(func)
                bound_args = sign.bind(*args, **kwargs)
                bound_args.apply_defaults()
                
                key = []
                for name, value in bound_args.arguments.items():
                    if not using or name in using:
                        key.append(f'{name}={value}')
                key = f"{func.__name__}({', '.join(key)})"

                result = self.get(key)
                if result is not None:
                    return result['value']
                
                result = func(*args, **kwargs)
                self.set(key, result)
                
                return result

            return wrapper

        return decorator

In [104]:
collection.drop()

In [105]:
cache = MongoCache(collection, 10)

In [106]:
@cache.fifo_cache(using={'x'})
def my_function(x, z=0):
    return x * x

for i in range(20):
    my_function(i)

In [21]:
from inspect import signature

signature(my_function)

<Signature (x, z=0)>

In [97]:
db = client.dynbench
collection = db.cache

In [37]:
collection.drop()

In [22]:
db.list_collection_names()

[]

In [38]:
for i in collection.find({}):
    print(i)

{'_id': ObjectId('698673a7dd2bb68e11e3a02e'), 'key': 'execute(query=SELECT DISTINCT ?p ?o WHERE { VALUES ?p { wdt:P31 wdt:P279 } wd:Q8502 ?p ?o })', 'value': {'head': {'vars': ['p', 'o']}, 'results': {'bindings': [{'p': {'type': 'uri', 'value': 'http://www.wikidata.org/prop/direct/P31'}, 'o': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q618123'}}, {'p': {'type': 'uri', 'value': 'http://www.wikidata.org/prop/direct/P31'}, 'o': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q12046615'}}, {'p': {'type': 'uri', 'value': 'http://www.wikidata.org/prop/direct/P279'}, 'o': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q35145263'}}, {'p': {'type': 'uri', 'value': 'http://www.wikidata.org/prop/direct/P279'}, 'o': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q106589819'}}]}}}
{'_id': ObjectId('698673a8dd2bb68e11e3a02f'), 'key': 'execute(query=SELECT DISTINCT ?p ?o WHERE { VALUES ?p { wdt:P31 wdt:P279 } wd:Q183 ?p ?o })', 'value': {'head': {'vars': ['p

In [64]:
collection.drop()

In [34]:
import inspect
import functools


def my_wrapper(using=None):
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            # Get the function signature
            signature = inspect.signature(func)
            
            # Bind the arguments to the function signature
            bound_args = signature.bind(*args, **kwargs)
            
            # Apply default values
            bound_args.apply_defaults()
            
            key = []
            # Now you can access the arguments by name
            for param_name, param_value in bound_args.arguments.items():
                if not using or param_name in using:
                    key.append(f'{param_name}={param_value}')
                    # print(f"Important argument {param_name} = {param_value}")
            key = ', '.join(key)
            key = f'{func.__name__}({key})'
            print(key)
            
            # Call the original function
            return func(*args, **kwargs)
        
        return wrapper
    
    return decorator

# Example usage:

@my_wrapper(['a', 'c'])
def example_function(a, b, c):
    pass

example_function(1, 2, c=3)

example_function(a=1, c=3)


In [14]:
from dynutils import get_wikidata_label


WIKIDATA_AGENT='wiki_parser_online/0.17.1 (https://deeppavlov.ai; info@deeppavlov.ai) deeppavlov/0.17.1'
WIKIDATA_ENDPOINT='https://query.wikidata.org/bigdata/namespace/wdq/sparql'


get_wikidata_label('wd:Q183', WIKIDATA_ENDPOINT, WIKIDATA_AGENT, lang='en')

'Germany'

In [None]:
import requests

prompt = 'There is a question:\nWhat is the highest mountain in Germany?\nReplace "Germany" with "Nauru" in the question.\nProvide no other information.\nLanguare of the question is English.'

data = {
    'model': 'gpt-4o',
    # 'prompt': 'There is a question:\nWhat is the highest mountain in Germany?\nReplace "Germany" with "Nauru" in the question.\nProvide no other information.\nLanguare of the question is English.',
    "messages": [
        { 'role': 'user', 'content': prompt }
    ],
    'stream': False,
    'options': {
        'temperature': 0.0,
        'num_predict': 1000,
    }
}

LLM_URL = 'http://demos.swe.htwk-leipzig.de:40139/v1/chat/completions'
KEY = 'hgJ6WXSMpCu0nqYVhWzVq5BzrX0y5B'

LLM_URL = 'http://localhost:27017/v1/chat/completions'
KEY = ''

response = requests.post(
    LLM_URL, 
    json=data, 
    headers={ 'content-type': 'application/json', 'Authorization': f'Bearer {KEY}' },
    timeout=30.0
)


# model=, prompt=There is a question:\nWhat is the highest mountain in Germany?\nReplace "Germany" with "Nauru" in the question.\nProvide no other information.\nLanguare of the question is English., temp=0.0, max_tokens=1000)'

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [36]:
response.status_code, response.text

(200, '"What is the highest mountain in Nauru?"')

In [39]:
response.json()

'What is the highest mountain in Nauru?'

In [53]:
from enum import Enum


class Strategy(Enum):
    FIFO = 1
    LRU  = 2


In [54]:
Strategy.FIFO

<Strategy.FIFO: 1>

In [57]:
dynmongo_url = 'mongodb://demos.swe.htwk-leipzig.de:40129'
DYN_USER = 'admin'
DYN_PASS = 'koh6zNirFF#d'

dynmongo = MongoClient(dynmongo_url, username=DYN_USER, password=DYN_PASS)

In [65]:
dynmongo.list_database_names() # list_collection_names()

['admin', 'config', 'local']

In [64]:
dynmongo.drop_database('wikidata')

In [66]:
dynmongo.close()

In [67]:
from decouple import config

In [68]:
config('LLM_URL')

'http://localhost:11434/api/generate'

In [74]:
from datetime import datetime as dt

dt.now()

datetime.datetime(2026, 2, 10, 12, 44, 7, 520179)

In [86]:
collection.aggregate([ { '$sample': { 'size': 1 } } ])

<pymongo.synchronous.command_cursor.CommandCursor at 0x7852231709a0>

In [92]:
from functools import wraps


def _get_cache_key(func, args: tuple, kwargs: dict) -> str:
    # Generate a cache key based on the function and arguments
    return f"{func.__name__}({', '.join(map(str, args))}, {', '.join(f'{k}={v}' for k, v in kwargs.items())})"

def cache():
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            key = _get_cache_key(func, args, kwargs)
            print(key)
            result = func(*args, **kwargs)
            return result
        return wrapper
    return decorator

@cache()
def func(a, b, c=10):
    pass

func(6, 4)


func(6, 4, )


In [119]:
import threading
from inspect import signature
from functools import wraps
from typing import Callable, Any
from enum import Enum


class Policy(Enum):
    RR   = 1
    FIFO = 2
    LIFO = 3
    LRU  = 4
    MRU  = 5


class MongoCache:
    def __init__(self, collection, max_size: int=1024, policy=Policy.FIFO):
        assert isinstance(max_size, int)

        self.collection = collection
        self.max_size = max(0, max_size)
        self.policy = policy
        self.lock = threading.Lock()

        # enshure that collection is properly indexed
        with self.lock:
            collection.create_index('order', unique=True)

    def get_min_order(self):
        doc = self.collection.find_one({}, sort={ 'order': 1 })
        return doc['order']  if doc and 'order' in doc else  1

    def get_max_order(self):
        doc = self.collection.find_one({}, sort={ 'order': -1 })
        return doc['order']  if doc and 'order' in doc else  0

    def get(self, key: str):
        with self.lock:
            doc = self.collection.find_one({ 'key': key })
            if doc:
                order = self.get_max_order()
                self.collection.update_one({ '_id': doc['_id'] }, { '$set': { 'order': order + 1 } })
                return doc
            else:
                return None

    def set(self, key: str, value, policy = None):
        if not policy:
            policy = self.policy

        with self.lock:
            if self.max_size and self.collection.count_documents({}) >= self.max_size:
                match policy:
                    case Policy.RR:
                        # there is always at least one document
                        doc = next(self.collection.aggregate([ { '$sample': { 'size': 1 } } ]))
                    case Policy.FIFO:
                        doc = self.collection.find_one(sort=[('_id', 1)])
                    case Policy.LIFO:
                        doc = self.collection.find_one(sort=[('_id', -1)])
                    case Policy.LRU:
                        doc = self.collection.find_one(sort=[('order', 1)])
                    case Policy.MRU:
                        doc = self.collection.find_one(sort=[('order', -1)])
                    case _:
                        doc = None

                if doc:
                    self.collection.delete_one({ '_id': doc['_id'] })

            self.collection.insert_one({ 'key': key, 'order': self.get_max_order() + 1, 'value': value })

    def cache(self, using=None, policy=None):
        def decorator(func):
            @wraps(func)
            def wrapper(*args, **kwargs):
                sign = signature(func)
                bound_args = sign.bind(*args, **kwargs)
                bound_args.apply_defaults()
                
                key = []
                for name, value in bound_args.arguments.items():
                    if not using or name in using:
                        key.append(f'{name}={value}')
                key = f"{func.__name__}({', '.join(key)})"

                result = self.get(key)
                if result is not None:
                    return result['value']
                
                result = func(*args, **kwargs)
                self.set(key, result, policy)
                
                return result

            return wrapper

        return decorator


In [124]:
test_collection = db.test
test_collection.drop()


test_cache = MongoCache(test_collection, max_size=10)


@test_cache.cache
def func(a, b, c = 1):
    return (a + b) * c

In [122]:
for i in range(20):
    func(i, i, i)

In [123]:
for i in test_collection.find():
    print(i)

{'_id': ObjectId('698b8446d8115b04ed7688a3'), 'key': 'func(a=10, b=10, c=10)', 'order': 11, 'value': 200}
{'_id': ObjectId('698b8446d8115b04ed7688a4'), 'key': 'func(a=11, b=11, c=11)', 'order': 12, 'value': 242}
{'_id': ObjectId('698b8446d8115b04ed7688a5'), 'key': 'func(a=12, b=12, c=12)', 'order': 13, 'value': 288}
{'_id': ObjectId('698b8446d8115b04ed7688a6'), 'key': 'func(a=13, b=13, c=13)', 'order': 14, 'value': 338}
{'_id': ObjectId('698b8446d8115b04ed7688a7'), 'key': 'func(a=14, b=14, c=14)', 'order': 15, 'value': 392}
{'_id': ObjectId('698b8446d8115b04ed7688a8'), 'key': 'func(a=15, b=15, c=15)', 'order': 16, 'value': 450}
{'_id': ObjectId('698b8446d8115b04ed7688a9'), 'key': 'func(a=16, b=16, c=16)', 'order': 17, 'value': 512}
{'_id': ObjectId('698b8446d8115b04ed7688aa'), 'key': 'func(a=17, b=17, c=17)', 'order': 18, 'value': 578}
{'_id': ObjectId('698b8446d8115b04ed7688ab'), 'key': 'func(a=18, b=18, c=18)', 'order': 19, 'value': 648}
{'_id': ObjectId('698b8446d8115b04ed7688ac'), 

In [126]:
import bz2


for line in bz2.BZ2File('pagerank/2025-11-05.allwiki.links.rank.bz2', "r"):
    print(line)
    break

b'Q565\t77254.89626902606687509\n'


In [139]:
from math import log, log2


with open('pagerank/2025-11-05.allwiki.links.rank', 'r') as f:
    for x, line in enumerate(f):
        if x > 10:
            break

        entity, freq = line.split('\t')
        print(entity, int(float(freq)*100))

Q565 7725489
Q22664 7359690
Q30 5095667
Q1860 5022129
Q25670 4444602
Q33057 4084663
Q118455746 4058930
Q648266 3920825
Q4048908 3771328
Q36578 3479226
Q11466 3353436


In [208]:
from string import digits, ascii_lowercase, ascii_uppercase


def int_to_base(n, base):
    """
    Return base representation for int n.
    """
    assert 0 < base < 65

    base_digits = digits + ascii_lowercase + ascii_uppercase + '@#'

    if n == 0:
        return '0'
    else:
        result = ''

    q = abs(n)

    while q > 0:
        q, r = divmod(q, base)
        result += base_digits[r]

    return '-' if n < 0 else '' + ''.join(reversed(result))

In [223]:
lst = []
nom = None

with open('pagerank/2025-11-05.allwiki.links.rank', 'r') as f:
    with open('pagerank/allwiki.rank', 'w') as g:
        for x, line in enumerate(f):
            entity, freq = line.split('\t')

            # print(x, entity, float(freq)) # , end='\r')

            if float(freq) < 1.5:
                continue

            # entity = int(entity[1:])
            freq = int(float(freq)*10000) # shift down to decrease number of entries. Now all entities not included in file have pagerenk 0

            g.write(f'{entity}\t{freq/10000}\n')

            # print(x, entity, freq) # , end='\r')

             
            # entity = int_to_base(entity, 64)
            # freq = int_to_base(freq, 64)

            # if freq == '1do':
            #     nom = freq
            #     g.write(f'== {freq} ==\n')
            #     g.write(f'{entity}\n')
            # elif nom:
            #     if freq != nom:
            #         nom = freq
            #         g.write(f'== {freq} ==\n')
            #     g.write(f'{entity}\n')
            # else:
            #     g.write(f'{entity} {freq}\n')

            # # lst.append(freq)

In [209]:
base_digits = digits + ascii_lowercase + ascii_uppercase + '@#'

base_dict = {a: x for x, a in enumerate(base_digits)}

def baseX_to_decimal(n, base):
    mult = 1
    result = 0
    while n:
        result += mult * base_dict[n[-1]]
        mult *= base
        n = n[:-1]
    return result

number = int_to_base(3264563, 64)
baseX_to_decimal(number, 64)


3264563

In [None]:
baseX_to_decimal('1hS', 64)

4952

In [None]:
for i in range(10000000, 100000000):
    if baseX_to_decimal(int_to_base(i, 64), 64) != i:
        raise ValueError

KeyboardInterrupt: 

In [212]:
last = None

for a, b, c, d in zip(lst, lst[1:], lst[2:], lst[3:]):
    if a != b and d != c:
        last = a

In [213]:
last

'1do'