# Reward mechanism demo

## Imports + Setup

In [61]:
from chunking.protocol import chunkSynapse
import bittensor as bt
import requests
from nltk.tokenize import sent_tokenize, word_tokenize

### Downloading tokenizer

In [62]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/daniel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [63]:
!pip3 install python-dotenv tabulate -q


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: pip install --upgrade pip


### OpenAI API key necessary for validation

In [64]:
from dotenv import load_dotenv
import os

from openai import OpenAI

load_dotenv()

if not os.environ.get('OPENAI_API_KEY'):
    raise Exception("Make sure to set OPENAI_API_KEY in your .env file")

In [46]:
netuid = 40
network = "ws://subvortex.info:9944" # or 'finney'

metagraph = bt.metagraph(netuid, network)

metagraph.I

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.15556572, 0.        ,
       0.        , 0.        , 0.        , 0.5013809 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

## Wallet setup

In [65]:
wallet_name = "" # TODO: set wallet name
hotkey_name = "" # TODO: set hotkey name

validator_wallet = bt.wallet(name=wallet_name, hotkey=hotkey_name)
validator_dendrite = bt.dendrite(wallet=validator_wallet)

## Query the top miner

In [57]:
from chunking.validator.task_api import generate_synthetic_synapse

In [66]:
page = 33653136 # fixed article id

top_miner_uid = metagraph.I.argmax().item()

print(f"Top miner uid: {top_miner_uid}")

top_miner_axon = metagraph.axons[top_miner_uid]

# Generate the 'synthetic' query: a featured article from wikipedia.
synapse, pageid = generate_synthetic_synapse(None, pageid=page, timeout=20)


print(f"Document: {synapse.document[:100]} ...")

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=[top_miner_axon],    
    synapse=synapse,
    deserialize=False
)

top_miner_response = responses[0]

if not top_miner_response or not top_miner_response.chunks:
    raise Exception("No response from top miner")
else:
    print(f"Received {len(top_miner_response.chunks)} chunks from top miner, process time: {top_miner_response.dendrite.process_time}")
    print(f"Chunk 3: {top_miner_response.chunks[2][:50]} ...")

Top miner uid: 18
Document: ? (also written Tanda Tanya, meaning Question Mark) is a 2011 Indonesian drama film directed by Hanu ...
Received 5 chunks from top miner, process time: 0.7442488670349121
Chunk 3: The director of Mahaka Pictures, Erick Thohir, sta ...


In [72]:
from tabulate import tabulate

def print_extra_info_dict(extra_info_dicts: list[dict], uids: list[int]):
    assert len(extra_info_dicts) == len(uids)
    
    table_data = []
    
    for extra_info_dict, uid in zip(extra_info_dicts, uids):
        table_data.append([
            uid,
            extra_info_dict.get("embedding_reward", 0),             
            extra_info_dict.get("size_penalty", 0),
            extra_info_dict.get("qty_penalty", 0),
            extra_info_dict.get("time_penalty", 0) 
        ])
        
        
    table_str = tabulate(table_data, headers=["UID", "Embedding reward", "Size penalty", "Quantity penalty", "Time Penalty"], tablefmt="grid")
    
    print(table_str)


## Reward the top miners response

### Reward Mechanism

1) Chunks must not have missing words/messed up ordering
    - every continguous three word pair from the source document should be in at least 1 chunk
    - every word in a single chunk should be in the same order as in the source document
2) Penalties (all penalties are passed into exponential function, and these results are multiplied with "embedding reward")
    - There is a penalty for chunks that are too long (> `chunk_size` characters)
        - Snippet:
        ```py
        size_penalty += ((chunk_length / chunk_size) - 1) * 10
        ```        
    - There is a penalty for too many chunks (> `chunk_qty` chunks)
        - `chunk_qty` is currently calculated as:
        ```py
        ceil(ceil(len(document) / chunk_size) * 1.5)
        ```        
    - There is a time penalty (> `time_soft_max` seconds of process time)
        - This is currently `0.75 * synapse.timeout`, `timeout` is default 20 seconds
3) Intra-chunk embeddings should ideally have high cosine similarity and inter-chunk embeddings should ideally have low cosine similarity    
    - Snippet:
    ```py
    for i in range(len(testChunks) - 1):
        j = i + 1
        while j < len(testChunks):
            if testChunks[i].sourceChunk == testChunks[j].sourceChunk:
                intrachunk_similarities.append(np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j])))
            else:
                interchunk_similarities.append(np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j])))
            j += 1            
    
    reward = (
        (np.mean(intrachunk_similarities) if len(intrachunk_similarities) > 0 else 0)
        - (np.mean(interchunk_similarities) if len(interchunk_similarities) > 0 else 0)
    )
    ```

    - `testChunks` are three sentence chunks formed from each of the returned miner chunks
    - `embeddings` are the sampled embeddings of the three sentence chunks, currently created via OpenAI's `text-embedding-ada-002` model
    - `reward` is the "embedding reward" for the miner
    - This reward is exponentiated to ensure it is positive, then the penalties above are applied to get the final reward

In [73]:
from openai import OpenAI
from chunking.validator.reward import reward

bt.debug()

print(synapse.chunk_qty)

top_miner_reward, extra_info_dict = reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, chunk_qty=synapse.chunk_qty, response=top_miner_response, override_client=OpenAI(), override_num_embeddings=50, verbose=True)

print_extra_info_dict([extra_info_dict], [top_miner_uid])

print(f"Top miner reward: {top_miner_reward}")

8
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the document appears in the chunks
Using 50 test segments for evaluation
Calculated embeddings for 50 test segments
Embedding reward: 0.013477152416154436
Size penalty: 0
Quantity penalty: 0
Ensuring reward is positive (e ** reward):
1.0135683785971434
+-------+--------------------+----------------+--------------------+----------------+
|   UID |   Embedding reward |   Size penalty |   Quantity penalty |   Time Penalty |
|    18 |          0.0134772 |              0 |                  0 |              0 |
+-------+--------------------+----------------+--------------------+----------------+
Top miner reward: 1.0135683785971434


## Group tournament ranking

Generally, a miner group is queried by a validator at request time. The default group size is `min(metagraph.n, 25)`. The validator will then rank the miners in this group relative to each other based on the incentive mechanism. These local ranks will be then translated into global ranks. Having a lower overall rank is better.

Here's an example of querying a group of 4 miners (assuming the `group_size` is 4):

In [74]:
from chunking.validator.reward import rank_responses
import numpy as np


top_4_miners_uids = metagraph.I.argsort()[-4:][::-1]

print(f"Top 4 miners uids: {top_4_miners_uids}")

axons = [metagraph.axons[uid] for uid in top_4_miners_uids]

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=axons,
    synapse=synapse,
    deserialize=False
)

rewards = []
extra_info_dicts = []

for response in responses:
    num_chunks = len(response.chunks) if response.chunks else 0
    hotkey_str = response.axon.hotkey[:10] if response.axon.hotkey else "No hotkey found"
    process_time = response.dendrite.process_time if response.dendrite.process_time else "No process time found"
    
    print(f"Received {num_chunks} chunks from hotkey: {hotkey_str}, process time: {process_time}")    

    if not response.chunks:
        rewards.append(0)
        extra_info_dicts.append({})
        continue
    reward_value, extra_info_dict = reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, chunk_qty=synapse.chunk_qty, response=response, override_client=OpenAI(), override_num_embeddings=50, verbose=True)    
    rewards.append(reward_value)
    extra_info_dicts.append(extra_info_dict)

rewards = np.array(rewards)

print(f"Rewards: {rewards}")

response_ranks = rank_responses(rewards)

print(f"Response ranks: {response_ranks}")

Top 4 miners uids: [ 18 116  13 100]
Received 5 chunks from hotkey: 5F2LKK2qEt, process time: 0.7486388683319092
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the document appears in the chunks
Using 50 test segments for evaluation
Calculated embeddings for 50 test segments
Embedding reward: 0.01348732129546315
Size penalty: 0
Quantity penalty: 0
Ensuring reward is positive (e ** reward):
1.0135786855040612
Received 5 chunks from hotkey: 5G7HVdSrYj, process time: 0.7470688819885254
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the

In [75]:
print_extra_info_dict(extra_info_dicts, top_4_miners_uids)

+-------+--------------------+----------------+--------------------+----------------+
|   UID |   Embedding reward |   Size penalty |   Quantity penalty |   Time Penalty |
|    18 |          0.0134873 |              0 |                  0 |              0 |
+-------+--------------------+----------------+--------------------+----------------+
|   116 |          0.0134873 |              0 |                  0 |              0 |
+-------+--------------------+----------------+--------------------+----------------+
|    13 |          0.0134984 |              0 |                  0 |              0 |
+-------+--------------------+----------------+--------------------+----------------+
|   100 |          0.0134711 |              0 |                  0 |              0 |
+-------+--------------------+----------------+--------------------+----------------+


These response ranks would then be translated into global ranks (no change in this case as they are the top 4 miners) and combined with the previous `scores` as a moving average (therefore a lower `score` is better, even if a higher `reward` is better). 

Weights are then determined based on the global `scores` (which is basically just ranks as a moving average for all miner UIDs)

So, if the ranks are [2, 0, 3, 1] (0-indexed):
- the old scores might be something like [2.0393, 0.55, 2.539, 1.3940] (as it is the moving average of ranks)
- the new scores might be something like [2.0373, 0.5225, 2.562, 1.3742] (with an `alpha` of 0.05)
    - the moving average calculation is:
    ```py
    new_score = global_rank * alpha + old_score * (1 - alpha)
    ```
- the weights would be [0.25, 1, 0.125, 0.5], where each index is a UID.