# Reward mechanism demo

## Imports + Setup

In [13]:
from chunking.protocol import chunkSynapse
import bittensor as bt
import requests
from nltk.tokenize import sent_tokenize, word_tokenize

In [14]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/daniel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [15]:
netuid = 1 # TODO: set netuid
network = 'ws://localhost:9946' # TODO: set network

metagraph = bt.metagraph(netuid, network)

metagraph.I

array([0., 0., 0., 0., 0., 0., 0.], dtype=float32)

## Wallet setup

In [11]:
wallet_name = "owner" # TODO: set wallet name
hotkey_name = "validator-1" # TODO: set hotkey name

validator_wallet = bt.wallet(name=wallet_name, hotkey=hotkey_name)
validator_dendrite = bt.dendrite(wallet=validator_wallet)

## Query the top miner

In [None]:
page = 33653136 # hard article id

def generate_synthetic_synapse() -> chunkSynapse:    
    document = requests.get('https://en.wikipedia.org/w/api.php', params={
        'action': 'query',
        'format': 'json',
        'pageids': page,
        'prop': 'extracts',
        'explaintext': True,
        'exsectionformat': 'plain',
        }).json()['query']['pages'][str(page)]['extract']
    document = document.replace("\n", " ").replace("\t", " ")
    document = ' '.join(document.split())
    synapse = chunkSynapse(document=document, time_soft_max=5.0, chunk_size=4096)
    return synapse


top_miner_uid = metagraph.I.argmax().item()

top_miner_axon = metagraph.axons[top_miner_uid]

# Generate the 'synthetic' query: a featured article from wikipedia.
synapse = generate_synthetic_synapse()

top_miner_response = validator_dendrite.query(
    axons=[top_miner_axon],    
    synapse=synapse,
)

## Reward the top miners response

### Reward Mechanism

1) Chunks must not have missing words/messed up ordering
    - every continguous three word pair from the source document should be in at least 1 chunk
    - every word in a single chunk should be in the same order as in the source document
2) There is a penalty for chunks that are too long (> `chunk_size` characters)
    - Snippet:
     ```py
     size_penalty += ((chunk_length / chunk_size) - 1) * 10
     ```
3) Intra-chunk embeddings should have high cosine similarity, inter-chunk embeddings should have low cosine similarity
    
    - Snippet:
    ```py
    for i in range(len(testChunks) - 1):
        j = i + 1
        while j < len(testChunks):
            if testChunks[i].sourceChunk == testChunks[j].sourceChunk:
                reward += np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            else:
                reward -= np.dot(np.asarray(embeddings[i]), np.asarray(embeddings[j]))
            j += 1
    ```

    - `testChunks` are three sentence chunks formed from each of the returned miner chunks
    - `embeddings` are the embeddings of the three sentence chunks, via OpenAI's `text-embedding-ada-002` model

In [None]:
from openai import OpenAI
from chunking.validator.reward import reward
import os

if not os.environ.get('OPENAI_API_KEY'):
    raise Exception("Make sure to set the OPENAI_API_KEY environment variable. `export OPENAI_API_KEY=your-key`")


top_miner_reward = reward(None, synapse.document, synapse.chunk_size, synapse, OpenAI(), override_num_embeddings=50)

top_miner_reward