In [1]:
from helius import NFTAPI, NameAPI, BalancesAPI, WebhooksAPI, TransactionsAPI
from dotenv import load_dotenv
from pathlib import Path
import requests
import pandas as pd
import numpy as np
import os
import time
import json

In [2]:
HELIUS_API_KEY = os.getenv('HELIUM_API_KEY')


In [3]:
VALIDATORS_API_KEY = os.getenv('VALIDATORS_API_KEY')

In [4]:
pd.set_option('display.max_colwidth', None)  
pd.set_option('display.max_columns', None)   
pd.set_option('display.width', 200)    
pd.set_option('display.float_format', '{:.8f}'.format)

In [5]:
# transactions_api  = TransactionsAPI(HELIUS_API_KEY)

# parsed_transaction_history = transactions_api.get_parsed_transaction_history(address="HtXa1PH33GGvH3giqMqatndHcKnzeSkwMaW46DTzDfLd")

# print(parsed_transaction_history)

In [6]:
import requests

# Replace this with your Helius API key
#API_KEY = "YOUR_HELIUS_API_KEY"
BASE_URL = f"https://mainnet.helius-rpc.com/?api-key={HELIUS_API_KEY}"

HEADERS = {"Content-Type": "application/json"}

def get_vote_accounts():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getVoteAccounts"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        current_validators = data["result"]["current"]
        delinquent_validators = data["result"]["delinquent"]
        return current_validators, delinquent_validators
    else:
        print("Error fetching vote accounts:", response.text)
        return [], []

def get_stake_accounts_by_pubkey(pubkey):
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getProgramAccounts",
        "params": [
            "Stake11111111111111111111111111111111111111",  # Stake Program
            {
                "encoding": "jsonParsed",
                "filters": [
                    {
                        "memcmp": {
                            "offset": 12,
                            "bytes": pubkey
                        }
                    }
                ]
            }
        ]
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        return response.json()["result"]
    else:
        print("Error fetching stake accounts:", response.text)
        return []



In [7]:
# # --- Example Usage ---

# # Fetch validators
# current, delinquent = get_vote_accounts()
# print(f"Total active validators: {len(current)}")
# print(f"Total delinquent validators: {len(delinquent)}")

# # Optionally fetch stake accounts by a delegator pubkey
# delegator_pubkey = "HtXa1PH33GGvH3giqMqatndHcKnzeSkwMaW46DTzDfLd"  # Replace this
# stakes = get_stake_accounts_by_pubkey(delegator_pubkey)
# print(f"Stake accounts for {delegator_pubkey}:")
# for s in stakes:
#     print(s["pubkey"], s["account"]["data"]["parsed"]["info"]["stake"])


# Fetching Validator + Stake Data via Helius API

### Vote Data

In [8]:
# JSON-RPC Payload to fetch validator vote accounts
payload = {
    "jsonrpc": "2.0",
    "id": 1,
    "method": "getVoteAccounts"
}

response = requests.post(BASE_URL, json=payload)
data = response.json()

# Combine current + delinquent validators
validators = data["result"]["current"] + data["result"]["delinquent"]

# Convert to DataFrame
df = pd.DataFrame(validators)
df.head(n=1)

Unnamed: 0,activatedStake,commission,epochCredits,epochVoteAccount,lastVote,nodePubkey,rootSlot,votePubkey
0,2166168164002,0,"[[778, 110335901, 103442134], [779, 117227467, 110335901], [780, 124110747, 117227467], [781, 130944902, 124110747], [782, 134056430, 130944902]]",True,338018997,7yk4vhSMYNrs5GP2xd6ZrWgtazA6eXTS3p68f2qsGxQo,338018966,EHmh8op1wzbKeuRBk8UdWWB3vegwZWSZ5nnzWRKbDVoY


In [9]:
# Add additional fields (can be joined with price data or slashing reports)
df["timestamp"] = pd.Timestamp.utcnow()
df["epoch"] = None  # You can get this via 'getEpochInfo' method

# Preview important fields
# print(df[[
#     "nodePubkey", "commission", "activatedStake", "lastVote", "rootSlot", 
#     "epochCredits", "epochVoteAccount"
# ]])

df = df[[
    "timestamp","nodePubkey", "commission", "activatedStake", "lastVote", "rootSlot", 
    "epochCredits", "epochVoteAccount","votePubkey"
]]


In [10]:
print(df.columns.tolist())


['timestamp', 'nodePubkey', 'commission', 'activatedStake', 'lastVote', 'rootSlot', 'epochCredits', 'epochVoteAccount', 'votePubkey']


In [11]:
expanded_rows = []

for idx, row in df.iterrows():
    node_pubkey = row['nodePubkey']
    votePubkey = row['votePubkey']
    for epoch_info in row['epochCredits']:
        epoch, current_credits, previous_credits = epoch_info
        credits_earned = current_credits - previous_credits
        expanded_rows.append({
            'timestamp': row['timestamp'],
            'nodePubkey': node_pubkey,
            'commission': row['commission'],
            'activatedStake': row['activatedStake'],
            'lastVote': row['lastVote'],
            'rootSlot': row['rootSlot'],
            'epochVoteAccount': row['epochVoteAccount'],
            'epoch': epoch,
            'credits': current_credits,
            'previous_credits': previous_credits,
            'credits_earned': credits_earned,
            'votePubkey' : votePubkey
        })

df_expanded = pd.DataFrame(expanded_rows)
df_expanded.sort_values(by=['nodePubkey', 'epoch'], inplace=True)
print(df_expanded.head())


                          timestamp                                    nodePubkey  commission  activatedStake   lastVote   rootSlot  epochVoteAccount  epoch    credits  previous_credits  \
75 2025-05-05 15:23:55.829223+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  338018997  338018966              True    778  635925863         629035066   
76 2025-05-05 15:23:55.829223+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  338018997  338018966              True    779  642817650         635925863   
77 2025-05-05 15:23:55.829223+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  338018997  338018966              True    780  649701256         642817650   
78 2025-05-05 15:23:55.829223+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  338018997  338018966              True    781  656598333         649701256   
79 2025-05-05 15:23:55.829223+00:00  138KHwTqKNWGLoo8fK

In [12]:
df_expanded["epoch"].unique() #.sum()

array([778, 779, 780, 781, 782, 777, 774, 775, 776, 773, 767, 768, 770,
       649, 744, 745, 746, 761, 769, 603, 604, 605, 719])

### Annual Inflation Rate
Purpose: Use this to contextualize staking APY and understand network reward distribution.





In [13]:
def get_inflation_rate():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getInflationRate"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        #return data["result"]
        result = data["result"]
        result["timestamp"] = pd.Timestamp.utcnow()  # Add timestamp
        return result 
    else:
        print("Error fetching inflation rate:", response.text)
        return {}

# Fetch and convert to DataFrame
inflation_data = get_inflation_rate()
df_inflation = pd.DataFrame([inflation_data])  # Single row
print(df_inflation.head(n=5))

   epoch  foundation      total  validator                        timestamp
0    782  0.00000000 0.04554517 0.04554517 2025-05-05 15:23:56.732060+00:00


### Staking Rewards - Inflation Rate
This is critical for calculating APY


- amount: Rewards in lamports (e.g., 1,863,991,600 lamports ≈ 1.863 SOL).

- effectiveSlot: Slot when rewards were applied.

- epoch: Epoch number.

- postBalance: Account balance after rewards (in lamports).

- commission: Validator’s commission rate




In [14]:
def get_inflation_reward(vote_accounts, epoch=None):
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getInflationReward",
        "params": [vote_accounts, {"commitment": "finalized"}]
    }
    if epoch is not None:
        payload["params"].append({"epoch": epoch})
        
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    
    if response.status_code == 200:
        data = response.json()
        result = data["result"]
        
        # Add timestamp to each reward entry
        timestamp = pd.Timestamp.utcnow()
        for entry in result:
            if entry is not None:  # Some entries may be None
                entry["timestamp"] = timestamp
        
        return result
    else:
        print("Error fetching inflation reward:", response.text)
        return []


# Get validator-specific inflation rewards
validator_addresses = df_expanded["votePubkey"].tolist()
rewards = get_inflation_reward(validator_addresses[:10])  # Max 50 addresses per call

inflation_reward = pd.DataFrame(rewards)
#inflation_reward = inflation_reward[inflation_reward["amount"] > 0]
print(inflation_reward)

   amount  commission  effectiveSlot  epoch  postBalance                        timestamp
0       0           0      337824000    781    171285600 2025-05-05 15:23:58.177808+00:00
1       0           0      337824000    781    171285600 2025-05-05 15:23:58.177808+00:00
2       0           0      337824000    781    171285600 2025-05-05 15:23:58.177808+00:00
3       0           0      337824000    781    171285600 2025-05-05 15:23:58.177808+00:00
4       0           0      337824000    781    171285600 2025-05-05 15:23:58.177808+00:00
5       0           0      337824000    781  16992674467 2025-05-05 15:23:58.177808+00:00
6       0           0      337824000    781  16992674467 2025-05-05 15:23:58.177808+00:00
7       0           0      337824000    781  16992674467 2025-05-05 15:23:58.177808+00:00
8       0           0      337824000    781  16992674467 2025-05-05 15:23:58.177808+00:00
9       0           0      337824000    781  16992674467 2025-05-05 15:23:58.177808+00:00


### Total Network Credits

Use total network credits to normalize your validator’s credits_earned and assess relative performance. Also provides epoch context.



In [15]:
def get_epoch_info():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getEpochInfo"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        result = data["result"]
        result["timestamp"] = pd.Timestamp.utcnow()  # Add timestamp
        return result 
    else:
        print("Error fetching epoch info:", response.text)
        return {}

# Fetch and convert to DataFrame
epoch_info = get_epoch_info()
df_epoch = pd.DataFrame([epoch_info])
print(df_epoch.head(n=1))

   absoluteSlot  blockHeight  epoch  slotIndex  slotsInEpoch  transactionCount                        timestamp
0     338019007    316244397    782     195007        432000      402519814870 2025-05-05 15:23:58.759655+00:00


## Circulating SOL Supply



In [16]:
def get_supply():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getSupply",
        "params": [{"commitment": "finalized"}]
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        return data["result"]["value"]
    else:
        print("Error fetching supply:", response.text)
        return {}

# Fetch and convert to DataFrame
supply_data = get_supply()
df_supply = pd.DataFrame([supply_data])
print(df_supply.head(n=1))

          circulating     nonCirculating  \
0  518189314601749874  81871866811026266   

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

In [17]:
import pandas as pd

# If df_supply is already created:
df_supply['circulating'] = df_supply['circulating'].astype(float)
df_supply['nonCirculating'] = df_supply['nonCirculating'].astype(float)
df_supply['total'] = df_supply['total'].astype(float)
# Optionally convert to billions for readability:
df_supply['circulating_sol'] = df_supply['circulating'] / 1e9
df_supply['nonCirculating_sol'] = df_supply['nonCirculating'] / 1e9
df_supply['total_sol'] = df_supply['total'] / 1e9
# If you want a DataFrame where each nonCirculatingAccount is its own row:
df_accounts = pd.DataFrame(df_supply['nonCirculatingAccounts'][0], columns=['nonCirculatingAccount'])

# Resulting outputs:
df_supply = df_supply[['circulating_sol', 'nonCirculating_sol', 'total_sol']]
print(df_supply)
#print(df_supply[['circulating_sol', 'nonCirculating_sol', 'total_sol']])
#print(df_accounts.head())


     circulating_sol  nonCirculating_sol          total_sol
0 518189314.60174990   81871866.81102628 600061181.41277623


###  Transactions Per Second (TPS)



In [18]:
def get_tps():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getRecentPerformanceSamples",
        "params": [1]  # Fetch 1 sample
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        sample = data["result"][0]
        tps = sample["numTransactions"] / sample["samplePeriodSecs"]
        return tps
    else:
        print("Error fetching TPS:", response.text)
        return 0

# Fetch TPS
tps = get_tps()
df_tps = pd.DataFrame({"timestamp": [pd.Timestamp.utcnow()], "tps": [tps]})
print("TPS DataFrame:")
print(df_tps.head())

TPS DataFrame:
                         timestamp           tps
0 2025-05-05 15:24:05.898690+00:00 4435.96666667


### SOL Price (Using CoinGecko)




In [19]:
def get_sol_price():
    url = "https://api.coingecko.com/api/v3/simple/price?ids=solana&vs_currencies=usd"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data["solana"]["usd"]
    else:
        print("Error fetching SOL price:", response.text)
        return 0

# Fetch SOL price
sol_price = get_sol_price()
df_price = pd.DataFrame({"timestamp": [pd.Timestamp.utcnow()], "sol_price_usd": [sol_price]})
print("SOL Price DataFrame:")
print(df_price.head())

SOL Price DataFrame:
                         timestamp  sol_price_usd
0 2025-05-05 15:24:06.938760+00:00   144.52000000


In [20]:
def get_recent_priority_fees():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getRecentPrioritizationFees",
        "params": [[]]  # Empty accounts for global fees
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        fees = [f["prioritizationFee"] for f in data["result"]]
        avg_fee = sum(fees) / len(fees) if fees else 0  # Micro-lamports
        return avg_fee / 1e6  # Convert to lamports
    else:
        print("Error fetching priority fees:", response.text)
        return 0

# Fetch average priority fee
avg_priority_fee = get_recent_priority_fees()
base_fee = 5000  # Static base fee per signature
avg_fee = base_fee + avg_priority_fee  # Total average fee
df_fees = pd.DataFrame({
    "timestamp": [pd.Timestamp.utcnow()],
    "avg_fee_lamports": [avg_fee],
    "avg_fee_sol": [avg_fee / 1e9],
    "avg_fee_usd": [avg_fee / 1e9 * sol_price]
})
print("Network Fees DataFrame:")
print(df_fees)

Network Fees DataFrame:
                         timestamp  avg_fee_lamports  avg_fee_sol  avg_fee_usd
0 2025-05-05 15:24:07.854286+00:00     5000.00000000   0.00000500   0.00072260


In [21]:


# Assuming VALIDATORS is already defined
# Example: VALIDATORS = "your-secret-api-token"

NETWORK = "mainnet"  # Change to 'testnet' or 'pythnet' as needed
BASE_URL = f"https://www.validators.app/api/v1/validators/{NETWORK}.json"

HEADERS = {
    "Token": VALIDATORS_API_KEY
}

def fetch_all_validators(with_history=False):
    params = {}
    if with_history:
        params["with_history"] = "true"
    
    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        validators = response.json()
        print(f"Fetched {len(validators)} validators.")
        return pd.DataFrame(validators)
    else:
        print(f"Error fetching validator data: {response.status_code} - {response.text}")
        return pd.DataFrame()

# Usage
df_validators = fetch_all_validators(with_history=True)

# Display the first few rows
print(df_validators.head())


Fetched 1289 validators.
   network                                       account                                             name keybase_id                     www_url  \
0  mainnet   Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re  YYDS Validator 🚀 Performant, Secure and NO Fees             https://site-et7.pages.dev   
1  mainnet   Tri1F8B6YtjkBztGCwBNSLEZib1EAqMUEUM7dTT7ZG3          Trillium: Stake, Earn, and Fuel Solana!                    https://trillium.so   
2  mainnet  BeSovDCzhEAfgwDyXBuhmCFKsu5WQ3PaX61GEfteNzXM                     Yonta Labs - LST 0%Fee MEV -              https://www.yontalabs.io/   
3  mainnet   hykfH9jUQqe2yqv3VqVAK5AmMYqrmMWmdwDcbfsm6My                                            Hayek             https://hayek.fi/validator   
4  mainnet  B8m79Xf3kp19suGMJkfXZDDHCmMP5vWHuYAdirtswEzD                    Palidator [Paladin validator]       reff      https://stake.z64x.com   

                                                                                      

In [22]:
df_validators.columns.to_list()

['network',
 'account',
 'name',
 'keybase_id',
 'www_url',
 'details',
 'avatar_url',
 'created_at',
 'updated_at',
 'jito',
 'jito_commission',
 'stake_pools_list',
 'is_active',
 'avatar_file_url',
 'active_stake',
 'authorized_withdrawer_score',
 'commission',
 'data_center_concentration_score',
 'delinquent',
 'published_information_score',
 'root_distance_score',
 'security_report_score',
 'skipped_slot_score',
 'skipped_after_score',
 'software_version',
 'software_version_score',
 'stake_concentration_score',
 'consensus_mods_score',
 'vote_latency_score',
 'total_score',
 'vote_distance_score',
 'ip',
 'data_center_key',
 'autonomous_system_number',
 'latitude',
 'longitude',
 'data_center_host',
 'vote_account',
 'epoch_credits',
 'epoch',
 'skipped_slots',
 'skipped_slot_percent',
 'ping_time',
 'url']

# Epochs

In [46]:
BASE_URL = 'https://www.validators.app/api/v1/epochs/mainnet.json'  # Replace with the desired network ('mainnet', 'testnet', 'pythnet')

# Headers with the API token for authentication
HEADERS = {
    'Token': VALIDATORS_API_KEY
}

# Make the API request
def get_epoch_data():
    params = {'per': 50, 'page': 1}  # Fetch up to 50 epochs, page 1 (adjust as needed)
    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        data = response.json()
        return data['epochs']
    else:
        print(f"Error fetching epoch data: {response.text}")
        return []

# Get epoch data
epoch_data = get_epoch_data()

# Convert the data to a pandas DataFrame
df_epochs = pd.DataFrame(epoch_data)

# Display the first few rows of the DataFrame
print(df_epochs.head())

   epoch  starting_slot  slots_in_epoch  network                created_at            total_rewards          total_active_stake
0    782      337824002          432000  mainnet  2025-05-04T18:04:09.000Z                      NaN                         NaN
1    781      337392000          432000  mainnet  2025-05-02T18:48:09.000Z 149600376018593.00000000 391208956635595392.00000000
2    780      336960000          432000  mainnet  2025-04-30T19:23:26.000Z 149328698516297.00000000 392498296158841984.00000000
3    779      336528001          432000  mainnet  2025-04-28T19:56:18.000Z 149221322367074.00000000 389876708020589120.00000000
4    778      336096000          432000  mainnet  2025-04-26T20:24:27.000Z 149812284369850.00000000 389896186286022464.00000000


# Exploratory Data Analysis

In [47]:
df_expanded.head(n=1)


Unnamed: 0,timestamp,nodePubkey,commission,activatedStake,lastVote,rootSlot,epochVoteAccount,epoch,credits,previous_credits,credits_earned,votePubkey,activatedStake_SOL,activatedStake_USD
75,2025-05-05 15:23:55.829223+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,338018997,338018966,True,778,635925863,629035066,6890797,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,35818.57902257,5176501.04034138


In [48]:
df_epochs.head()

Unnamed: 0,epoch,starting_slot,slots_in_epoch,network,created_at,total_rewards,total_active_stake
0,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,
1,781,337392000,432000,mainnet,2025-05-02T18:48:09.000Z,149600376018593.0,3.912089566355954e+17
2,780,336960000,432000,mainnet,2025-04-30T19:23:26.000Z,149328698516297.0,3.92498296158842e+17
3,779,336528001,432000,mainnet,2025-04-28T19:56:18.000Z,149221322367074.0,3.898767080205891e+17
4,778,336096000,432000,mainnet,2025-04-26T20:24:27.000Z,149812284369850.0,3.8989618628602246e+17


In [49]:
print(df_epochs["epoch"].dtype)

int64


In [50]:
print(df_expanded["epoch"].dtype)

int64


In [51]:
df_expanded["activatedStake_SOL"] = df_expanded["activatedStake"] / 1e9
df_expanded["activatedStake_USD"] = df_expanded["activatedStake_SOL"] * sol_price
df_expanded.head(n=1)

Unnamed: 0,timestamp,nodePubkey,commission,activatedStake,lastVote,rootSlot,epochVoteAccount,epoch,credits,previous_credits,credits_earned,votePubkey,activatedStake_SOL,activatedStake_USD
75,2025-05-05 15:23:55.829223+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,338018997,338018966,True,778,635925863,629035066,6890797,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,35818.57902257,5176501.04034138


In [52]:
df_merged = pd.merge(df_epochs, df_expanded, on="epoch", how="inner")
df_merged["vote_account"] = df_merged["votePubkey"]
df_merged.head()

Unnamed: 0,epoch,starting_slot,slots_in_epoch,network,created_at,total_rewards,total_active_stake,timestamp,nodePubkey,commission,activatedStake,lastVote,rootSlot,epochVoteAccount,credits,previous_credits,credits_earned,votePubkey,activatedStake_SOL,activatedStake_USD,vote_account
0,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,338018997,338018966,True,659702336,656598333,3104003,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,35818.57902257,5176501.04034138,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz
1,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,13cm6z7ajighVFYN1aR2hPQ3Rhp4QJenDbHGRmps9P1n,0,208209172493583,338018997,338018966,True,637412032,634297618,3114414,F82nmpcZMdHtMVsLtAGByPavdN5WuEX1hjNwzs3UFuwq,208209.17249358,30090389.60877262,F82nmpcZMdHtMVsLtAGByPavdN5WuEX1hjNwzs3UFuwq
2,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,1EWZm7aZYxfZHbyiELXtTgN1yT2vU1HF9d8DWswX2Tp,5,35679485062836,338018997,338018966,True,758413060,755298583,3114477,HG7a8fgjTkQhGFTPukbTdf5FCwxVVjKzkbo6ToNswTXH,35679.48506284,5156399.18128106,HG7a8fgjTkQhGFTPukbTdf5FCwxVVjKzkbo6ToNswTXH
3,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,1KXvrkPXwkGF6NK1zyzVuJqbXfpenPVPP6hoiK9bsK3,0,277688701811061,338018997,338018966,True,632659227,629544726,3114501,1KXz4xKV2viJCGpxqnQqdf2J45vQr5USdmtcJLTaHkm,277688.70181106,40131571.18573454,1KXz4xKV2viJCGpxqnQqdf2J45vQr5USdmtcJLTaHkm
4,782,337824002,432000,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,1MuaDGhuN7KRqvsupUcYmq9u1YRh1pp38hu1WV2WC6S,0,83147234519010,338018997,338018966,True,575191514,572077074,3114440,4z9rbspUBsnZmTQbWSSPETkXmWHfhzQXXc289Z3m6XcJ,83147.23451901,12016438.33268733,4z9rbspUBsnZmTQbWSSPETkXmWHfhzQXXc289Z3m6XcJ


In [60]:
df_merged.columns.tolist()

['epoch',
 'starting_slot',
 'slots_in_epoch',
 'network',
 'created_at',
 'total_rewards',
 'total_active_stake',
 'timestamp',
 'nodePubkey',
 'commission',
 'activatedStake',
 'lastVote',
 'rootSlot',
 'epochVoteAccount',
 'credits',
 'previous_credits',
 'credits_earned',
 'votePubkey',
 'activatedStake_SOL',
 'activatedStake_USD',
 'vote_account']

In [66]:
df_final = pd.merge(
    df_validators,
    df_merged,
    on='vote_account',
    #right_on='vote_account',
    how='left',
    suffixes=('_val', '_merged')
)

df_final.head()

Unnamed: 0,network_val,account,name,keybase_id,www_url,details,avatar_url,created_at_val,updated_at,admin_warning,jito,jito_commission,stake_pools_list,is_active,avatar_file_url,active_stake,authorized_withdrawer_score,commission_val,data_center_concentration_score,delinquent,published_information_score,root_distance_score,security_report_score,skipped_slot_score,skipped_after_score,software_version,software_version_score,stake_concentration_score,consensus_mods_score,vote_latency_score,total_score,vote_distance_score,ip,data_center_key,autonomous_system_number,latitude,longitude,data_center_host,vote_account,epoch_credits,epoch_val,skipped_slots,skipped_slot_percent,ping_time,url,safety_score,epoch_merged,starting_slot,slots_in_epoch,network_merged,created_at_merged,total_rewards,total_active_stake,timestamp,nodePubkey,commission_merged,activatedStake,lastVote,rootSlot,epochVoteAccount,credits,previous_credits,credits_earned,votePubkey,activatedStake_SOL,activatedStake_USD
0,mainnet,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,"YYDS Validator 🚀 Performant, Secure and NO Fees",,https://site-et7.pages.dev,Join YYDS Validator for 0% commission and enjoy top-tier security and performance for your assets,https://site-et7.pages.dev/logo.png,2024-10-14 01:54:49 UTC,2025-01-01 03:40:05 UTC,,True,0.0,[BlazeStake],True,https://prod-validators.nyc3.digitaloceanspaces.com/3qpnfid358b10t2sbaophi1x2tev,20785386413605,0,0,0,False,2,2,1,2,2,2.1.21,2,0,0,2.0,13,2,135.125.119.131,16276-FR-Gravelines,16276.0,48.8582,2.3387,be103.gra-g2-nc5.fr.eu,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,3101396.0,782.0,0.0,0.0,,https://www.validators.app/api/v1/validators/mainnet/Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,,782.0,337824002.0,432000.0,mainnet,2025-05-04T18:04:09.000Z,,,2025-05-05 15:23:55.829223+00:00,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,0.0,20785386413605.0,338018997.0,338018966.0,True,441539509.0,438427378.0,3112131.0,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,20785.3864136,3003904.04449419
1,mainnet,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,"YYDS Validator 🚀 Performant, Secure and NO Fees",,https://site-et7.pages.dev,Join YYDS Validator for 0% commission and enjoy top-tier security and performance for your assets,https://site-et7.pages.dev/logo.png,2024-10-14 01:54:49 UTC,2025-01-01 03:40:05 UTC,,True,0.0,[BlazeStake],True,https://prod-validators.nyc3.digitaloceanspaces.com/3qpnfid358b10t2sbaophi1x2tev,20785386413605,0,0,0,False,2,2,1,2,2,2.1.21,2,0,0,2.0,13,2,135.125.119.131,16276-FR-Gravelines,16276.0,48.8582,2.3387,be103.gra-g2-nc5.fr.eu,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,3101396.0,782.0,0.0,0.0,,https://www.validators.app/api/v1/validators/mainnet/Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,,781.0,337392000.0,432000.0,mainnet,2025-05-02T18:48:09.000Z,149600376018593.0,3.912089566355954e+17,2025-05-05 15:23:55.829223+00:00,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,0.0,20785386413605.0,338018997.0,338018966.0,True,438427378.0,431536663.0,6890715.0,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,20785.3864136,3003904.04449419
2,mainnet,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,"YYDS Validator 🚀 Performant, Secure and NO Fees",,https://site-et7.pages.dev,Join YYDS Validator for 0% commission and enjoy top-tier security and performance for your assets,https://site-et7.pages.dev/logo.png,2024-10-14 01:54:49 UTC,2025-01-01 03:40:05 UTC,,True,0.0,[BlazeStake],True,https://prod-validators.nyc3.digitaloceanspaces.com/3qpnfid358b10t2sbaophi1x2tev,20785386413605,0,0,0,False,2,2,1,2,2,2.1.21,2,0,0,2.0,13,2,135.125.119.131,16276-FR-Gravelines,16276.0,48.8582,2.3387,be103.gra-g2-nc5.fr.eu,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,3101396.0,782.0,0.0,0.0,,https://www.validators.app/api/v1/validators/mainnet/Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,,780.0,336960000.0,432000.0,mainnet,2025-04-30T19:23:26.000Z,149328698516297.0,3.92498296158842e+17,2025-05-05 15:23:55.829223+00:00,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,0.0,20785386413605.0,338018997.0,338018966.0,True,431536663.0,424665488.0,6871175.0,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,20785.3864136,3003904.04449419
3,mainnet,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,"YYDS Validator 🚀 Performant, Secure and NO Fees",,https://site-et7.pages.dev,Join YYDS Validator for 0% commission and enjoy top-tier security and performance for your assets,https://site-et7.pages.dev/logo.png,2024-10-14 01:54:49 UTC,2025-01-01 03:40:05 UTC,,True,0.0,[BlazeStake],True,https://prod-validators.nyc3.digitaloceanspaces.com/3qpnfid358b10t2sbaophi1x2tev,20785386413605,0,0,0,False,2,2,1,2,2,2.1.21,2,0,0,2.0,13,2,135.125.119.131,16276-FR-Gravelines,16276.0,48.8582,2.3387,be103.gra-g2-nc5.fr.eu,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,3101396.0,782.0,0.0,0.0,,https://www.validators.app/api/v1/validators/mainnet/Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,,779.0,336528001.0,432000.0,mainnet,2025-04-28T19:56:18.000Z,149221322367074.0,3.898767080205891e+17,2025-05-05 15:23:55.829223+00:00,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,0.0,20785386413605.0,338018997.0,338018966.0,True,424665488.0,417782155.0,6883333.0,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,20785.3864136,3003904.04449419
4,mainnet,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,"YYDS Validator 🚀 Performant, Secure and NO Fees",,https://site-et7.pages.dev,Join YYDS Validator for 0% commission and enjoy top-tier security and performance for your assets,https://site-et7.pages.dev/logo.png,2024-10-14 01:54:49 UTC,2025-01-01 03:40:05 UTC,,True,0.0,[BlazeStake],True,https://prod-validators.nyc3.digitaloceanspaces.com/3qpnfid358b10t2sbaophi1x2tev,20785386413605,0,0,0,False,2,2,1,2,2,2.1.21,2,0,0,2.0,13,2,135.125.119.131,16276-FR-Gravelines,16276.0,48.8582,2.3387,be103.gra-g2-nc5.fr.eu,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,3101396.0,782.0,0.0,0.0,,https://www.validators.app/api/v1/validators/mainnet/Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,,778.0,336096000.0,432000.0,mainnet,2025-04-26T20:24:27.000Z,149812284369850.0,3.8989618628602246e+17,2025-05-05 15:23:55.829223+00:00,Xoir1BnQX9TbEvon9HRbD8tkjcD9dorsxmNjZAV64Re,0.0,20785386413605.0,338018997.0,338018966.0,True,417782155.0,410896024.0,6886131.0,DzPT1ZWDeURdTj38QBSceWnrpYFxZRBLPRXmUgHVDAGR,20785.3864136,3003904.04449419


In [65]:
# df_final = pd.merge(
#     df_merged,
#     df_validators[["vote_account", "name"]],
#     on="vote_account",
#     how="left"

# )

# df_final

In [31]:
df_final.columns.tolist()

['epoch',
 'starting_slot',
 'slots_in_epoch',
 'network',
 'created_at',
 'total_rewards',
 'total_active_stake',
 'timestamp',
 'nodePubkey',
 'commission',
 'activatedStake',
 'lastVote',
 'rootSlot',
 'epochVoteAccount',
 'credits',
 'previous_credits',
 'credits_earned',
 'votePubkey',
 'activatedStake_SOL',
 'activatedStake_USD',
 'vote_account',
 'name']

In [32]:
validators_metetrcs = df_validators[["name", "commission", "active_stake", "skipped_slot_percent", "jito", "total_score","ping_time", "skipped_slots", "skipped_after_score", "root_distance_score", "vote_distance_score", "epoch", "epoch_credits", "skipped_slots", "skipped_slot_percent" ]]
validators_metetrcs.head()

Unnamed: 0,name,commission,active_stake,skipped_slot_percent,jito,total_score,ping_time,skipped_slots,skipped_after_score,root_distance_score,vote_distance_score,epoch,epoch_credits,skipped_slots.1,skipped_slot_percent.1
0,"YYDS Validator 🚀 Performant, Secure and NO Fees",0,20785386413605,0.0,True,13,,0.0,2,2,2,782.0,3101396.0,0.0,0.0
1,"Trillium: Stake, Earn, and Fuel Solana!",5,239874967809218,0.0,True,13,,0.0,2,2,2,782.0,3100588.0,0.0,0.0
2,Yonta Labs - LST 0%Fee MEV -,0,178771190703120,0.0,True,13,,0.0,2,2,2,782.0,3100336.0,0.0,0.0
3,Hayek,0,14710170959218,,True,13,,,2,2,2,782.0,3100208.0,,
4,Palidator [Paladin validator],5,39380182380465,0.0,True,13,,0.0,2,2,2,782.0,3095960.0,0.0,0.0


In Solana, **EPOCHS\_PER\_YEAR** is a constant that depends on the number of slots per epoch and the length of each slot.

### Step-by-Step Calculation:

1. **Length of an epoch**:
   Solana's epoch typically consists of **432,000 slots** (this number can slightly vary).

2. **Duration of a slot**:
   Each slot lasts roughly **400 milliseconds** (0.4 seconds).

3. **Total seconds in a year**:
   A year has **365.25 days** (accounting for leap years), so the total seconds in a year is:

   $$
   365.25 \times 24 \times 60 \times 60 = 31,557,600 \text{ seconds}
   $$

4. **Epoch length in seconds**:
   The length of an epoch in seconds can be calculated as:

   $$
   \text{Epoch length} = 432,000 \times 0.4 \text{ seconds} = 172,800 \text{ seconds per epoch}
   $$

5. **Number of epochs per year**:
   To find the number of epochs per year, divide the total seconds in a year by the seconds in one epoch:

   $$
   \text{Epochs per year} = \frac{31,557,600}{172,800} \approx 182.5
   $$



### Important Notes:

* The exact value of **EPOCHS\_PER\_YEAR** can fluctuate slightly based on actual slot times or network conditions.
* In practical use, you might want to round **EPOCHS\_PER\_YEAR** to **183** or another convenient approximation for clarity.

Would you like to run this with some example values, or do you need further adjustments?


## APY (Annual Percentage Yield)

APY estimates staking returns for a validator, considering how many credits they earn.

# Activated Stake

Calculation:

- Represented in lamports (1 SOL = 1e9 lamports).

- Track stake changes over epochs to detect delegation trends.

In [33]:
# df_expanded['activated_stake_sol'] = df_expanded['activatedStake'] / 1e9
# print(df_expanded)

### Uptime

 Solana has ~432,000 slots per epoch (2.5 slots/sec).

In [34]:
# slots_per_epoch = 432000
# df_expanded['uptime_pct'] = (df_expanded["credits_earned"] / slots_per_epoch) * 100



#### Vote Success Rate

If credits_earned > slots, use lastVote and rootSlot instead.



In [35]:
# successful_votes = df_expanded['lastVote'] - df_expanded['rootSlot']
# df_expanded['uptime_pct'] = (successful_votes / slots_per_epoch) * 100

#### Vote Lag
vote_lag shows how many slots the validator lags behind.

#### Lagging 
A consistent high lag can suggest downtime or poor performance

In [36]:
# threshold = 100
# df_expanded['vote_lag'] = df_expanded['rootSlot'] - df_expanded['lastVote']
# df_expanded['lagging'] = df_expanded['vote_lag'] > threshold

# df_expanded.head()


In [37]:
df_expanded['vote_lag'].unique().tolist()

KeyError: 'vote_lag'