# Reward mechanism demo

## Imports + Setup

In [1]:
from chunking.protocol import chunkSynapse
import bittensor as bt
import requests
from nltk.tokenize import sent_tokenize, word_tokenize

### Downloading tokenizer

In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/daniel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [25]:
!pip3 install python-dotenv tabulate -q


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: pip install --upgrade pip


### OpenAI API key necessary for validation

In [4]:
from dotenv import load_dotenv
import os

from openai import OpenAI

load_dotenv()

if not os.environ.get('OPENAI_API_KEY'):
    raise Exception("Make sure to set OPENAI_API_KEY in your .env file")

In [5]:
netuid = 40
network = "ws://subvortex.info:9944" # or 'finney'

metagraph = bt.metagraph(netuid, network)

metagraph.I

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.12974747, 0.        ,
       0.        , 0.        , 0.        , 0.51534295, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

## Wallet setup

In [6]:
wallet_name = "owner" # TODO: set wallet name
hotkey_name = "validator-1" # TODO: set hotkey name

validator_wallet = bt.wallet(name=wallet_name, hotkey=hotkey_name)
validator_dendrite = bt.dendrite(wallet=validator_wallet)

## Query the top miner

In [16]:
import importlib

import chunking

importlib.reload(chunking)

import chunking

In [17]:
from chunking.validator.task_api import generate_synthetic_synapse

In [18]:
page = 33653136 # fixed article id

top_miner_uid = metagraph.I.argmax().item()

print(f"Top miner uid: {top_miner_uid}")

top_miner_axon = metagraph.axons[top_miner_uid]

# Generate the 'synthetic' query: a featured article from wikipedia.
synapse, pageid = generate_synthetic_synapse(None, pageid=page, timeout=20)


print(f"Document: {synapse.document[:100]} ...")

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=[top_miner_axon],    
    synapse=synapse,
    deserialize=False
)

top_miner_response = responses[0]

print(f"Received {len(top_miner_response.chunks)} chunks from top miner, process time: {top_miner_response.dendrite.process_time}")
print(f"Chunk 3: {top_miner_response.chunks[2][:50]} ...")

Top miner uid: 18
Document: ? (also written Tanda Tanya, meaning Question Mark) is a 2011 Indonesian drama film directed by Hanu ...
Received 5 chunks from top miner, process time: 0.835676908493042
Chunk 3: The director of Mahaka Pictures, Erick Thohir, sta ...


## Reward the top miners response

### Reward Mechanism

1) Chunks must not have missing words/messed up ordering
    - every continguous three word pair from the source document should be in at least 1 chunk
    - every word in a single chunk should be in the same order as in the source document
2) There is a penalty for chunks that are too long (> `chunk_size` characters)
    - Snippet:
     ```py
     size_penalty += ((chunk_length / chunk_size) - 1) * 10
     ```
3) Intra-chunk embeddings should have high cosine similarity, inter-chunk embeddings should have low cosine similarity
    
    - Snippet:
    ```py
    for i in range(len(testChunks) - 1):
        j = i + 1
        while j < len(testChunks):
            if testChunks[i].sourceChunk == testChunks[j].sourceChunk:
                reward += np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            else:
                reward -= np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            j += 1
    ```

    - `testChunks` are three sentence chunks formed from each of the returned miner chunks
    - `embeddings` are the embeddings of the three sentence chunks, via OpenAI's `text-embedding-ada-002` model

In [35]:
from tabulate import tabulate

def print_extra_info_dict(extra_info_dicts: list[dict], uids: list[int]):
    assert len(extra_info_dicts) == len(uids)
    
    table_data = []
    
    for extra_info_dict, uid in zip(extra_info_dicts, uids):
        table_data.append([
            uid,
            extra_info_dict.get("embedding_reward", 0),             
            extra_info_dict.get("size_penalty", 0),
            extra_info_dict.get("qty_penalty", 0)
        ])
        
        
    table_str = tabulate(table_data, headers=["UID", "Embedding reward", "Size penalty", "Quantity penalty"], tablefmt="grid")
    
    print(table_str)


In [30]:
from openai import OpenAI
from chunking.validator.reward import reward

bt.debug()

print(synapse.chunk_qty)

top_miner_reward, extra_info_dict = reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, chunk_qty=synapse.chunk_qty, response=top_miner_response, override_client=OpenAI(), override_num_embeddings=50, verbose=True)

print_extra_info_dict([extra_info_dict], [top_miner_uid])

print(f"Top miner reward: {top_miner_reward}")

8
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the document appears in the chunks
Using 50 test segments for evaluation
Calculated embeddings for 50 test segments
Embedding reward: 0.01349250084228093
Size penalty: 0
Quantity penalty: 0
Ensuring reward is positive (e ** reward):
1.0135839353959122
  UID    Embedding reward    Size penalty    Quantity penalty
-----  ------------------  --------------  ------------------
   18           0.0134925               0                   0
Top miner reward: 1.0135839353959122


## Group tournament ranking

Generally, a miner group is queried by a validator at request time. The default group size is `min(metagraph.n, 25)`. The validator will then rank the miners in this group relative to each other based on the incentive mechanism. These local ranks will be then translated into global ranks. Having a lower overall rank is better.

Here's an example of querying a group of 4 miners (assuming the `group_size` is 4):

In [33]:
from chunking.validator.reward import rank_responses
import numpy as np


top_4_miners_uids = metagraph.I.argsort()[-4:][::-1]

print(f"Top 4 miners uids: {top_4_miners_uids}")

axons = [metagraph.axons[uid] for uid in top_4_miners_uids]

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=axons,
    synapse=synapse,
    deserialize=False
)

rewards = []
extra_info_dicts = []

for response in responses:
    num_chunks = len(response.chunks) if response.chunks else 0
    hotkey_str = response.axon.hotkey[:10] if response.axon.hotkey else "No hotkey found"
    process_time = response.dendrite.process_time if response.dendrite.process_time else "No process time found"
    
    print(f"Received {num_chunks} chunks from hotkey: {hotkey_str}, process time: {process_time}")    

    if not response.chunks:
        rewards.append(0)
        extra_info_dicts.append({})
        continue
    reward_value, extra_info_dict = reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, chunk_qty=synapse.chunk_qty, response=response, override_client=OpenAI(), override_num_embeddings=50, verbose=True)    
    rewards.append(reward_value)
    extra_info_dicts.append(extra_info_dict)

rewards = np.array(rewards)

print(f"Rewards: {rewards}")

response_ranks = rank_responses(rewards)

print(f"Response ranks: {response_ranks}")

Top 4 miners uids: [ 18 116  13 100]
Received 5 chunks from hotkey: 5F2LKK2qEt, process time: 0.7323508262634277
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the document appears in the chunks
Using 50 test segments for evaluation
Calculated embeddings for 50 test segments
Embedding reward: 0.013467973409598932
Size penalty: 0
Quantity penalty: 0
Ensuring reward is positive (e ** reward):
1.0135590750890504
Received 5 chunks from hotkey: 5G7HVdSrYj, process time: 0.819551944732666
Chunk 0 has 33 sentences. Added 11 test segments
Chunk 1 has 41 sentences. Added 14 test segments
Chunk 2 has 33 sentences. Added 11 test segments
Chunk 3 has 33 sentences. Added 11 test segments
Chunk 4 has 8 sentences. Added 3 test segments
Every set of 3 adjacent words from the

In [36]:
print_extra_info_dict(extra_info_dicts, top_4_miners_uids)

+-------+--------------------+----------------+--------------------+
|   UID |   Embedding reward |   Size penalty |   Quantity penalty |
|    18 |          0.013468  |              0 |                  0 |
+-------+--------------------+----------------+--------------------+
|   116 |          0.0134939 |              0 |                  0 |
+-------+--------------------+----------------+--------------------+
|    13 |          0         |              0 |                  0 |
+-------+--------------------+----------------+--------------------+
|   100 |          0.0134873 |              0 |                  0 |
+-------+--------------------+----------------+--------------------+


- These response ranks would then be translated into global ranks (no change in this case as they are the top 4 miners) and combined with the previous `scores` as a moving average (therefore a lower `score` is better, even if a higher `reward` is better). 

- Weights are then determined based on the global `scores` (which is basically just ranks as a moving average for all miner UIDs)

- So, if the ranks are [2, 0, 3, 1] (0-indexed), the scores might be something like [2.0393, .4593, 2.539, 1.3940] (as it is the moving average of ranks), and the weights would be [0.5, 1, 0.25, 0.75], where each index is a UID.