# Reward mechanism demo

## Imports + Setup

In [35]:
from chunking.protocol import chunkSynapse
import bittensor as bt
import requests
from nltk.tokenize import sent_tokenize, word_tokenize

In [None]:
import nltk
nltk.download('punkt')

In [37]:
!pip3 install python-dotenv -q


[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: pip install --upgrade pip


### OpenAI API key necessary for validation

In [38]:
from dotenv import load_dotenv
import os

from openai import OpenAI

load_dotenv()

if not os.environ.get('OPENAI_API_KEY'):
    raise Exception("Make sure to set OPENAI_API_KEY in your .env file")

In [39]:
netuid = 1 # TODO: set netuid
network = 'ws://localhost:9946' # TODO: set network

metagraph = bt.metagraph(netuid, network)

metagraph.I

array([0.27187   , 0.31795225, 0.1597467 , 0.14297703, 0.09549096,
       0.00793469, 0.00399786], dtype=float32)

## Wallet setup

In [40]:
wallet_name = "owner" # TODO: set wallet name
hotkey_name = "validator-1" # TODO: set hotkey name

validator_wallet = bt.wallet(name=wallet_name, hotkey=hotkey_name)
validator_dendrite = bt.dendrite(wallet=validator_wallet)

## Query the top miner

In [41]:
page = 33653136 # hard article id

def generate_synthetic_synapse() -> chunkSynapse:    
    document = requests.get('https://en.wikipedia.org/w/api.php', params={
        'action': 'query',
        'format': 'json',
        'pageids': page,
        'prop': 'extracts',
        'explaintext': True,
        'exsectionformat': 'plain',
        }).json()['query']['pages'][str(page)]['extract']
    document = document.replace("\n", " ").replace("\t", " ")
    document = ' '.join(document.split())
    synapse = chunkSynapse(document=document, time_soft_max=5.0, chunk_size=4096)
    return synapse


top_miner_uid = metagraph.I.argmax().item()

print(f"Top miner uid: {top_miner_uid}")

top_miner_axon = metagraph.axons[top_miner_uid]

# Generate the 'synthetic' query: a featured article from wikipedia.
synapse = generate_synthetic_synapse()

print(f"Document: {synapse.document[:100]} ...")

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=[top_miner_axon],    
    synapse=synapse,
    deserialize=False
)

top_miner_response = responses[0]

print(f"Received {len(top_miner_response.chunks)} chunks from top miner, process time: {top_miner_response.dendrite.process_time}")
print(f"Chunk 3: {top_miner_response.chunks[2][:50]} ...")

Top miner uid: 1
Document: ? (also written Tanda Tanya, meaning Question Mark) is a 2011 Indonesian drama film directed by Hanu ...
Received 5 chunks from top miner, process time: 0.04235506057739258
Chunk 3: The director of Mahaka Pictures, Erick Thohir, sta ...


## Reward the top miners response

### Reward Mechanism

1) Chunks must not have missing words/messed up ordering
    - every continguous three word pair from the source document should be in at least 1 chunk
    - every word in a single chunk should be in the same order as in the source document
2) There is a penalty for chunks that are too long (> `chunk_size` characters)
    - Snippet:
     ```py
     size_penalty += ((chunk_length / chunk_size) - 1) * 10
     ```
3) Intra-chunk embeddings should have high cosine similarity, inter-chunk embeddings should have low cosine similarity
    
    - Snippet:
    ```py
    for i in range(len(testChunks) - 1):
        j = i + 1
        while j < len(testChunks):
            if testChunks[i].sourceChunk == testChunks[j].sourceChunk:
                reward += np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            else:
                reward -= np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            j += 1
    ```

    - `testChunks` are three sentence chunks formed from each of the returned miner chunks
    - `embeddings` are the embeddings of the three sentence chunks, via OpenAI's `text-embedding-ada-002` model

In [42]:
from openai import OpenAI
from chunking.validator.reward import reward

bt.debug()

top_miner_reward = reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, response=top_miner_response, override_client=OpenAI(), override_num_embeddings=50, verbose=True)

print(f"Top miner reward: {top_miner_reward}")

Top miner reward: 0.0039050038673031375


## Group tournament ranking

Generally, a miner group is queried by a validator at request time. The default group size is min(metagraph.n, 25). The validator will then rank the miners in the group based on the reward mechanism to make local ranks, then these will be translated into global ranks. Lower rank is better.

Here's an example of querying a group of 4 miners (assuming the `group_size` is 4):

In [43]:
from chunking.validator.reward import rank_responses
import numpy as np


top_4_miners_uids = metagraph.I.argsort()[-4:][::-1]

print(f"Top 4 miners uids: {top_4_miners_uids}")

axons = [metagraph.axons[uid] for uid in top_4_miners_uids]

responses: list[chunkSynapse] = validator_dendrite.query(
    axons=axons,
    synapse=synapse,
    deserialize=False
)

for response in responses:
    print(f"Received {len(response.chunks)} chunks from {response.axon.hotkey[:10]}, process time: {response.dendrite.process_time}")    

rewards = np.array([reward(self=None, document=synapse.document, chunk_size=synapse.chunk_size, response=response, override_client=OpenAI(), override_num_embeddings=50, verbose=False) for response in responses])

print(f"Rewards: {rewards}")

response_ranks = rank_responses(rewards)

print(f"Response ranks: {response_ranks}")

Top 4 miners uids: [1 0 2 3]
Received 5 chunks from 5FZWySZzkt, process time: 0.11609911918640137
Received 5 chunks from 5DMQN4xwHm, process time: 0.1407170295715332
Received 5 chunks from 5CDEHMHzvr, process time: 0.14405298233032227
Received 5 chunks from 5CzASa8NMS, process time: 0.08191990852355957
Rewards: [0.00392112 0.00406185 0.00398069 0.00400092]
Response ranks: [3. 0. 2. 1.]


- These response ranks would then be translated into global ranks (no change in this case as they are the top 4 miners) and combined with the previous `scores` as a moving average (therefore a lower `score` is better, even if a higher `reward` is better). 

- Weights are then determined based on the global `scores` (which is basically just ranks as a moving average for all miner UIDs)

- So, if the ranks are [2, 0, 3, 1] (0-indexed), the scores might be something like [2.0393, .4593, 2.539, 1.3940] (as it is the moving average of ranks), and the weights would be [0.5, 1, 0.25, 0.75], where each index is a UID.