# Preparations 
use `shift+enter` to execute command in the line. The `number` in the square brackets (f.e. In [23]:) means command executed. The In [\*] status means the command processed.  

In [None]:
import os

In [None]:
! pip install tqdm

In [None]:
! pip install plotly

In [None]:
! pip install pymongo

In [None]:
from pymongo import MongoClient

In [None]:
import pandas as pd

In [None]:
from tqdm import tqdm_notebook

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import plotly.graph_objects as go

In [None]:
import numpy as np

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib

In [None]:
from converter import base64_to_bech32
import requests

In [None]:
from itertools import product

In [None]:
! pwd

# Forming euler_validators_partial.csv file with validators precommits 

In [None]:
client = MongoClient()
collection = client.cyberd.validators

In [None]:
validators = list(collection.find({}))

In [None]:
validators_df = pd.DataFrame(validators)

In [None]:
# This command shown missed blocks before the last block. It's normal if it show you something. Make sure that it 
# doesn't contain blocks befor MAX_BLOCK it explicity
set(range(0, validators_df["block"].max())) - set(validators_df["block"].unique()) - set([0,1])

In [None]:
# The MAX_BLOCK synced
validators_df["block"].max()

In [None]:
# The MAX_BLOCK for calculations
MAX_BLOCK = 7000000

In [None]:
MAX_BLOCK

In [None]:
validators_df[validators_df["block"] <= MAX_BLOCK][["block", "validators"]].sort_values("block").to_csv("./euler_validators_partial.csv")

In [None]:
validators_df = pd.read_csv("./euler_validators_partial.csv")

# Visualisation of validators work

In [None]:
validators_df["validators"] = validators_df["validators"].apply(lambda x: eval(x))

In [None]:
validators_df["validators"] = validators_df["validators"].apply(lambda x: set(x))

In [None]:
all_validators = set([v for v_list in validators_df["validators"].tolist() for v in v_list])

In [None]:
validators_df["rounded_block"] = validators_df["block"].round(-4)

In [None]:
validators_df["rounded_block"].nunique()

In [None]:
x = validators_df["rounded_block"].unique().tolist()

In [None]:
all_y = []
labels = []
validator_traces = {}
for validator in tqdm_notebook(all_validators):
    validators_df["validator_mask"] = validators_df["validators"].apply(lambda x: validator in x).astype(float)
    y = validators_df.groupby("rounded_block")["validator_mask"].sum().tolist()
    all_y.append(y)
    labels.append(validator)
    validator_traces[validator] = y

In [None]:
cmap = matplotlib.cm.get_cmap('jet')

In [None]:
validator_colors = {
    validator: "rgba({})".format(", ".join([str(int(color * 255)) for color in cmap(np.sum(trace) / MAX_BLOCK)]))
    for validator, trace in validator_traces.items()
}

In [None]:
fig = go.Figure()

for validator, y in validator_traces.items():
    fig.add_trace(go.Scatter(
        x=x, y=y,
        mode='lines',
        name=validator[0:5] + "...",
        line=dict(width=0.5, color=validator_colors[validator]),
        stackgroup='one',
        groupnorm='percent'
    ))

fig.update_layout(
    showlegend=True,
    xaxis_type='linear',
    yaxis=dict(
        type='linear',
        range=[1, 100],
        ticksuffix='%'))

fig.show()

# Calculating rewards per each validator

In [None]:
# Amount of tokens allocated to validators lifetime rewards
AMOUNT_OF_TOKENS = 2700000000000

In [None]:
validators_df["total_validators"] = validators_df["validators"].apply(lambda x: len(x))

In [None]:
b_step_simple_range = np.linspace(0, 1, 10) * AMOUNT_OF_TOKENS

In [None]:
def linear(block, k, b):
    return k * block + b

In [None]:
validators_df["rounded_block"] = validators_df["block"].round(-5)

In [None]:
all_rewards = {}

for b in tqdm_notebook(b_step_simple_range):
    validators_df["total_reward"] = validators_df["block"].apply(lambda x: linear(x, 1, b)) 
    validators_df["validator_reward"] = validators_df["total_reward"] / validators_df["total_validators"] / validators_df["total_reward"].sum()

    rewards = {}
    for validator in tqdm_notebook(all_validators):
        validator_reward = validators_df[validators_df["validators"].apply(lambda x: validator in x)].groupby("rounded_block")["validator_reward"].sum()
        rewards[validator] = (validator_reward * AMOUNT_OF_TOKENS).to_dict()
        
    all_rewards[b] = rewards

In [None]:
sorted_validators = sorted(all_validators, key=lambda x: sum(all_rewards[0][x].values()))

In [None]:
def show_rewards_by_block(all_rewards, margin=0):
    values = []
    for block_range in validators_df["rounded_block"].unique():
        values.append([])
        for validator in sorted_validators:
            values[-1].append(all_rewards[validator].get(block_range, 0))

    cmap = matplotlib.cm.get_cmap('Reds')
    ind = np.arange(len(all_validators))    
    width = 0.08
    blocks_number = validators_df["rounded_block"].nunique()
    
    bottom_values = np.zeros(len(values[0]))
    for index, values_row in enumerate(values):
        plt.barh(ind - margin * width, values_row, width, color=cmap(index / blocks_number), left=bottom_values)
        bottom_values += values_row

In [None]:
plt.figure(figsize=(20, 20))

for i, b in enumerate(b_step_simple_range):
    show_rewards_by_block(all_rewards[b], i)
    
plt.xlabel('Validators')
plt.title('Reward for each validator')
indices = np.arange(len(all_validators)) - 0.08 * 0.5 * len(b_step_simple_range)
plt.yticks(indices, sorted_validators)
plt.show()

Per each validator
- More dark color represents the part of rewards won for more last blocks
- Downward displayed rewards changing if coefficcient $\frac{B}{K}$ increasing

### Conclusion

If $reward = K*height+B$ and increasing $\frac{B}{K}$
- new validators get less;
- old validators get more.

Rewwards depends on $\frac{B}{K}$, not separate $B$ и $K$

# Визуализация балансов

In [None]:
K = 1
B = 0

In [None]:
validators_df["total_reward"] = validators_df["block"].apply(lambda x: linear(x, K, B)) 
validators_df["validator_reward"] = validators_df["total_reward"] / validators_df["total_validators"] / validators_df["total_reward"].sum()

selected_rewards = {}
for validator in tqdm_notebook(all_validators):
    validator_reward = validators_df["validator_reward"][validators_df["validators"].apply(lambda x: validator in x)].sum()
    selected_rewards[validator] = validator_reward

In [None]:
sorted_validators = sorted(all_validators, key=lambda x: selected_rewards[x])

In [None]:
data = [selected_rewards[v] for v in sorted_validators]

In [None]:
# Here you can name validators
validator_names = {
    sorted_validators[-1]: "That guy"
}

In [None]:
def get_name(i):
    validator_address = sorted_validators[i]
    main_name = validator_names.get(validator_address, validator_address[0:5] + "...")
    return main_name + " ({0:.2f}%)".format(selected_rewards[validator_address] * 100)

In [None]:
captions = [get_name(i) for i in range(len(sorted_validators))]

In [None]:
# Here you can combine last validators by changing `OMITTED` value
OMITTED = 15
data = [sum(data[:OMITTED])] + data[OMITTED:]
captions = ["\n".join(captions[:OMITTED])] + captions[OMITTED:]

In [None]:
fig, ax = plt.subplots(figsize=(20, 20), subplot_kw=dict(aspect="equal"))

wedges, texts = ax.pie(data, wedgeprops=dict(width=0.3), startangle=-40)

bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"),
          bbox=bbox_props, zorder=0, va="center")

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = "angle,angleA=0,angleB={}".format(ang)
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(captions[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),
                horizontalalignment=horizontalalignment, fontsize=20, **kw)

ax.set_title("Validators distribution", fontsize=30)

plt.show()

### Составление таблицы с публичными ключами

In [None]:
NODE_HOST = os.environ["NODE_HOST"]
NODE_PORT = os.environ["NODE_PORT"]

In [None]:
# Additional converter check
assert base64_to_bech32("7GglL8LOiwNrYIiyGzsrEAaIvYn5iVqYLKG05TG5RXk=") == "cybervalconspub1zcjduepqa35z2t7ze69sx6mq3zepkwetzqrg30vflxy44xpv5x6w2vdeg4usdyhrzw"

In [None]:
# Get all base64 in according block
validators_base64 = {}

for validator in tqdm_notebook(all_validators):
    first_block = validators_df[validators_df["validators"].apply(lambda x: validator in x)]["block"].min()
    response = requests.get("http://{}:{}/validators?height={}".format(NODE_HOST, NODE_PORT, first_block)).json()
    block_validators = response["result"]["validators"]
    pub_key = [v["pub_key"]["value"] for v in block_validators if v["address"] == validator][0]
    validators_base64[validator] = pub_key

In [None]:
validators_base64

In [None]:
validators_bech32 = {k: base64_to_bech32(v) for k, v in validators_base64.items()}

In [None]:
validators_bech32

In [None]:
operator_addresses_response = requests.get("http://{}:{}/staking/validators".format(NODE_HOST, NODE_PORT)).json()

operator_addresses = [{
    "operator_address": address["operator_address"],
    "consensus_pubkey": address["consensus_pubkey"]
} for address in operator_addresses_response['result']]

operator_addresses_df = pd.DataFrame(operator_addresses)

In [None]:
operator_addresses

In [None]:
balances_df = pd.Series({validators_bech32[k]: AMOUNT_OF_TOKENS * v for k, v in selected_rewards.items()})\
    .to_frame()\
    .reset_index()\
    .rename(columns={
        0: "balance",
        "index": "pub"
    })

In [None]:
merged_balances_df = balances_df.merge(operator_addresses_df, how="left", left_on="pub", right_on="consensus_pubkey")[["operator_address", "balance"]]

In [None]:
merged_balances_df.to_csv("./balances.csv", index=False, header=False)

the result have saved in validators-investigation/data/notebook/balances.csv