In [1]:
from helius import NFTAPI, NameAPI, BalancesAPI, WebhooksAPI, TransactionsAPI
from dotenv import load_dotenv
from pathlib import Path
import requests
import pandas as pd
import numpy as np
import os
import time
import json

In [2]:
HELIUS_API_KEY = os.getenv('HELIUM_API_KEY')


In [3]:
VALIDATORS_API_KEY = os.getenv('VALIDATORS_API_KEY')

In [4]:
pd.set_option('display.max_colwidth', None)  
pd.set_option('display.max_columns', None)   
pd.set_option('display.width', 200)    
pd.set_option('display.float_format', '{:.8f}'.format)

In [5]:
# transactions_api  = TransactionsAPI(HELIUS_API_KEY)

# parsed_transaction_history = transactions_api.get_parsed_transaction_history(address="HtXa1PH33GGvH3giqMqatndHcKnzeSkwMaW46DTzDfLd")

# print(parsed_transaction_history)

In [6]:
import requests

# Replace this with your Helius API key
#API_KEY = "YOUR_HELIUS_API_KEY"
BASE_URL = f"https://mainnet.helius-rpc.com/?api-key={HELIUS_API_KEY}"

HEADERS = {"Content-Type": "application/json"}

def get_vote_accounts():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getVoteAccounts"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        current_validators = data["result"]["current"]
        delinquent_validators = data["result"]["delinquent"]
        return current_validators, delinquent_validators
    else:
        print("Error fetching vote accounts:", response.text)
        return [], []

def get_stake_accounts_by_pubkey(pubkey):
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getProgramAccounts",
        "params": [
            "Stake11111111111111111111111111111111111111",  # Stake Program
            {
                "encoding": "jsonParsed",
                "filters": [
                    {
                        "memcmp": {
                            "offset": 12,
                            "bytes": pubkey
                        }
                    }
                ]
            }
        ]
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        return response.json()["result"]
    else:
        print("Error fetching stake accounts:", response.text)
        return []



In [7]:
# --- Example Usage ---

# Fetch validators
current, delinquent = get_vote_accounts()
print(f"Total active validators: {len(current)}")
print(f"Total delinquent validators: {len(delinquent)}")

# Optionally fetch stake accounts by a delegator pubkey
delegator_pubkey = "HtXa1PH33GGvH3giqMqatndHcKnzeSkwMaW46DTzDfLd"  # Replace this
stakes = get_stake_accounts_by_pubkey(delegator_pubkey)
print(f"Stake accounts for {delegator_pubkey}:")
for s in stakes:
    print(s["pubkey"], s["account"]["data"]["parsed"]["info"]["stake"])


Total active validators: 1288
Total delinquent validators: 77
Stake accounts for HtXa1PH33GGvH3giqMqatndHcKnzeSkwMaW46DTzDfLd:


# Fetching Validator + Stake Data via Helius API

### Vote Data

In [8]:
# JSON-RPC Payload to fetch validator vote accounts
payload = {
    "jsonrpc": "2.0",
    "id": 1,
    "method": "getVoteAccounts"
}

response = requests.post(BASE_URL, json=payload)
data = response.json()

# Combine current + delinquent validators
validators = data["result"]["current"] + data["result"]["delinquent"]

# Convert to DataFrame
df = pd.DataFrame(validators)
df.head(n=1)

Unnamed: 0,activatedStake,commission,epochCredits,epochVoteAccount,lastVote,nodePubkey,rootSlot,votePubkey
0,22843122853697,5,"[[778, 545176380, 538290606], [779, 552059588, 545176380], [780, 558930653, 552059588], [781, 565821316, 558930653], [782, 566472173, 565821316]]",True,337864773,1so1ctTM24PdU7RLZJzJKYYVYri3gjNeCd8nmHbpdXg,337864742,scanjszMg2p4pXCZJWNUY8gEN6sCqY2oThXV58NPbWd


In [9]:
# Add additional fields (can be joined with price data or slashing reports)
df["timestamp"] = pd.Timestamp.utcnow()
df["epoch"] = None  # You can get this via 'getEpochInfo' method

# Preview important fields
# print(df[[
#     "nodePubkey", "commission", "activatedStake", "lastVote", "rootSlot", 
#     "epochCredits", "epochVoteAccount"
# ]])

df = df[[
    "timestamp","nodePubkey", "commission", "activatedStake", "lastVote", "rootSlot", 
    "epochCredits", "epochVoteAccount","votePubkey"
]]


In [10]:
print(df.columns.tolist())


['timestamp', 'nodePubkey', 'commission', 'activatedStake', 'lastVote', 'rootSlot', 'epochCredits', 'epochVoteAccount', 'votePubkey']


In [11]:
expanded_rows = []

for idx, row in df.iterrows():
    node_pubkey = row['nodePubkey']
    votePubkey = row['votePubkey']
    for epoch_info in row['epochCredits']:
        epoch, current_credits, previous_credits = epoch_info
        credits_earned = current_credits - previous_credits
        expanded_rows.append({
            'timestamp': row['timestamp'],
            'nodePubkey': node_pubkey,
            'commission': row['commission'],
            'activatedStake': row['activatedStake'],
            'lastVote': row['lastVote'],
            'rootSlot': row['rootSlot'],
            'epochVoteAccount': row['epochVoteAccount'],
            'epoch': epoch,
            'credits': current_credits,
            'previous_credits': previous_credits,
            'credits_earned': credits_earned,
            'votePubkey' : votePubkey
        })

df_expanded = pd.DataFrame(expanded_rows)
df_expanded.sort_values(by=['nodePubkey', 'epoch'], inplace=True)
print(df_expanded.head())


                            timestamp                                    nodePubkey  commission  activatedStake   lastVote   rootSlot  epochVoteAccount  epoch    credits  previous_credits  \
3638 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    778  635925863         629035066   
3639 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    779  642817650         635925863   
3640 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    780  649701256         642817650   
3641 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    781  656598333         649701256   
3642 2025-05-04 22:32:36.496945+00:00  138KHw

### Annual Inflation Rate
Purpose: Use this to contextualize staking APY and understand network reward distribution.





In [12]:
def get_inflation_rate():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getInflationRate"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        #return data["result"]
        result = data["result"]
        result["timestamp"] = pd.Timestamp.utcnow()  # Add timestamp
        return result 
    else:
        print("Error fetching inflation rate:", response.text)
        return {}

# Fetch and convert to DataFrame
inflation_data = get_inflation_rate()
df_inflation = pd.DataFrame([inflation_data])  # Single row
print(df_inflation.head(n=1))

   epoch  foundation      total  validator                        timestamp
0    782  0.00000000 0.04554517 0.04554517 2025-05-04 22:32:37.664215+00:00


### Staking Rewards - Inflation Rate
This is critical for calculating APY


- amount: Rewards in lamports (e.g., 1,863,991,600 lamports ≈ 1.863 SOL).

- effectiveSlot: Slot when rewards were applied.

- epoch: Epoch number.

- postBalance: Account balance after rewards (in lamports).

- commission: Validator’s commission rate




In [13]:
def get_inflation_reward(vote_accounts, epoch=None):
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getInflationReward",
        "params": [vote_accounts, {"commitment": "finalized"}]
    }
    if epoch is not None:
        payload["params"].append({"epoch": epoch})
        
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    
    if response.status_code == 200:
        data = response.json()
        result = data["result"]
        
        # Add timestamp to each reward entry
        timestamp = pd.Timestamp.utcnow()
        for entry in result:
            if entry is not None:  # Some entries may be None
                entry["timestamp"] = timestamp
        
        return result
    else:
        print("Error fetching inflation reward:", response.text)
        return []


# Get validator-specific inflation rewards
validator_addresses = df_expanded["votePubkey"].tolist()
rewards = get_inflation_reward(validator_addresses[:10])  # Max 50 addresses per call

inflation_reward = pd.DataFrame(rewards)
print(inflation_reward.head(n=1))

   amount  commission  effectiveSlot  epoch  postBalance                        timestamp
0       0           0      337824000    781    171285600 2025-05-04 22:33:53.718058+00:00


### Total Network Credits

Use total network credits to normalize your validator’s credits_earned and assess relative performance. Also provides epoch context.



In [14]:
def get_epoch_info():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getEpochInfo"
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        result = data["result"]
        result["timestamp"] = pd.Timestamp.utcnow()  # Add timestamp
        return result 
    else:
        print("Error fetching epoch info:", response.text)
        return {}

# Fetch and convert to DataFrame
epoch_info = get_epoch_info()
df_epoch = pd.DataFrame([epoch_info])
print(df_epoch.head(n=1))

   absoluteSlot  blockHeight  epoch  slotIndex  slotsInEpoch  transactionCount                        timestamp
0     337864974    316090625    782      40974        432000      402255642241 2025-05-04 22:33:54.497805+00:00


## Circulating SOL Supply



In [15]:
def get_supply():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getSupply",
        "params": [{"commitment": "finalized"}]
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        return data["result"]["value"]
    else:
        print("Error fetching supply:", response.text)
        return {}

# Fetch and convert to DataFrame
supply_data = get_supply()
df_supply = pd.DataFrame([supply_data])
print(df_supply.head(n=1))

          circulating     nonCirculating  \
0  518193300520267163  81868553321360705   

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

In [16]:
import pandas as pd

# If df_supply is already created:
df_supply['circulating'] = df_supply['circulating'].astype(float)
df_supply['nonCirculating'] = df_supply['nonCirculating'].astype(float)
df_supply['total'] = df_supply['total'].astype(float)
# Optionally convert to billions for readability:
df_supply['circulating_sol'] = df_supply['circulating'] / 1e9
df_supply['nonCirculating_sol'] = df_supply['nonCirculating'] / 1e9
df_supply['total_sol'] = df_supply['total'] / 1e9
# If you want a DataFrame where each nonCirculatingAccount is its own row:
df_accounts = pd.DataFrame(df_supply['nonCirculatingAccounts'][0], columns=['nonCirculatingAccount'])

# Resulting outputs:
df_supply = df_supply[['circulating_sol', 'nonCirculating_sol', 'total_sol']]
print(df_supply)
#print(df_supply[['circulating_sol', 'nonCirculating_sol', 'total_sol']])
#print(df_accounts.head())


     circulating_sol  nonCirculating_sol          total_sol
0 518193300.52026713   81868553.32136071 600061853.84162796


###  Transactions Per Second (TPS)



In [17]:
def get_tps():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getRecentPerformanceSamples",
        "params": [1]  # Fetch 1 sample
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        sample = data["result"][0]
        tps = sample["numTransactions"] / sample["samplePeriodSecs"]
        return tps
    else:
        print("Error fetching TPS:", response.text)
        return 0

# Fetch TPS
tps = get_tps()
df_tps = pd.DataFrame({"timestamp": [pd.Timestamp.utcnow()], "tps": [tps]})
print("TPS DataFrame:")
print(df_tps.head())

TPS DataFrame:
                         timestamp           tps
0 2025-05-04 22:34:02.847892+00:00 4385.11666667


### SOL Price (Using CoinGecko)



In [18]:
def get_sol_price():
    url = "https://api.coingecko.com/api/v3/simple/price?ids=solana&vs_currencies=usd"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data["solana"]["usd"]
    else:
        print("Error fetching SOL price:", response.text)
        return 0

# Fetch SOL price
sol_price = get_sol_price()
df_price = pd.DataFrame({"timestamp": [pd.Timestamp.utcnow()], "sol_price_usd": [sol_price]})
print("SOL Price DataFrame:")
print(df_price.head())

SOL Price DataFrame:
                         timestamp  sol_price_usd
0 2025-05-04 22:34:03.610374+00:00   146.07000000


In [19]:
def get_recent_priority_fees():
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "getRecentPrioritizationFees",
        "params": [[]]  # Empty accounts for global fees
    }
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        fees = [f["prioritizationFee"] for f in data["result"]]
        avg_fee = sum(fees) / len(fees) if fees else 0  # Micro-lamports
        return avg_fee / 1e6  # Convert to lamports
    else:
        print("Error fetching priority fees:", response.text)
        return 0

# Fetch average priority fee
avg_priority_fee = get_recent_priority_fees()
base_fee = 5000  # Static base fee per signature
avg_fee = base_fee + avg_priority_fee  # Total average fee
df_fees = pd.DataFrame({
    "timestamp": [pd.Timestamp.utcnow()],
    "avg_fee_lamports": [avg_fee],
    "avg_fee_sol": [avg_fee / 1e9],
    "avg_fee_usd": [avg_fee / 1e9 * sol_price]
})
print("Network Fees DataFrame:")
print(df_fees.head())

Network Fees DataFrame:
                         timestamp  avg_fee_lamports  avg_fee_sol  avg_fee_usd
0 2025-05-04 22:35:19.150059+00:00     5000.00000000   0.00000500   0.00073035


In [20]:


# Assuming VALIDATORS is already defined
# Example: VALIDATORS = "your-secret-api-token"

NETWORK = "mainnet"  # Change to 'testnet' or 'pythnet' as needed
BASE_URL = f"https://www.validators.app/api/v1/validators/{NETWORK}.json"

HEADERS = {
    "Token": VALIDATORS_API_KEY
}

def fetch_all_validators(with_history=False):
    params = {}
    if with_history:
        params["with_history"] = "true"
    
    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        validators = response.json()
        print(f"Fetched {len(validators)} validators.")
        return pd.DataFrame(validators)
    else:
        print(f"Error fetching validator data: {response.status_code} - {response.text}")
        return pd.DataFrame()

# Usage
df_validators = fetch_all_validators(with_history=True)

# Display the first few rows
print(df_validators.head())


Fetched 1291 validators.
   network                                       account                                            name keybase_id                      www_url  \
0  mainnet   Tri1F8B6YtjkBztGCwBNSLEZib1EAqMUEUM7dTT7ZG3         Trillium: Stake, Earn, and Fuel Solana!                     https://trillium.so   
1  mainnet  5NiHw5LZn1FiL848XzbEBxuygbNvMJ7CsPvXNC8VmCLN                                        Bukashka   bukashka  https://keybase.io/bukashka   
2  mainnet   LodeuWMHPiPj2PUHUyca2bkpFv9HyzR3gaDBmGJ9TSS                             The Lode (Sentries)   the_lode          https://sentries.io   
3  mainnet  FYWeJ3uyJRHPUYj84ebhPAa7ZTeq9WU8LHQaKAgDcF74  StakeRocket 🚀 0% Fee + Jito MEV 💰 Boosted APY!                 https://stakerocket.org   
4  mainnet   NLMSHTjmSiRxGJPs3uaqtsFBC2dTGYwK41U18Nmw5kH                                 T-STAKE Systems                     https://t-stake.com   

                                                                                      

In [21]:
df_validators.columns.to_list()

['network',
 'account',
 'name',
 'keybase_id',
 'www_url',
 'details',
 'avatar_url',
 'created_at',
 'updated_at',
 'jito',
 'jito_commission',
 'stake_pools_list',
 'is_active',
 'avatar_file_url',
 'active_stake',
 'authorized_withdrawer_score',
 'commission',
 'data_center_concentration_score',
 'delinquent',
 'published_information_score',
 'root_distance_score',
 'security_report_score',
 'skipped_slot_score',
 'skipped_after_score',
 'software_version',
 'software_version_score',
 'stake_concentration_score',
 'consensus_mods_score',
 'vote_latency_score',
 'total_score',
 'vote_distance_score',
 'ip',
 'data_center_key',
 'autonomous_system_number',
 'latitude',
 'longitude',
 'data_center_host',
 'vote_account',
 'epoch_credits',
 'epoch',
 'skipped_slots',
 'skipped_slot_percent',
 'ping_time',
 'url']

# Exploratory Data Analysis

In Solana, **EPOCHS\_PER\_YEAR** is a constant that depends on the number of slots per epoch and the length of each slot.

### Step-by-Step Calculation:

1. **Length of an epoch**:
   Solana's epoch typically consists of **432,000 slots** (this number can slightly vary).

2. **Duration of a slot**:
   Each slot lasts roughly **400 milliseconds** (0.4 seconds).

3. **Total seconds in a year**:
   A year has **365.25 days** (accounting for leap years), so the total seconds in a year is:

   $$
   365.25 \times 24 \times 60 \times 60 = 31,557,600 \text{ seconds}
   $$

4. **Epoch length in seconds**:
   The length of an epoch in seconds can be calculated as:

   $$
   \text{Epoch length} = 432,000 \times 0.4 \text{ seconds} = 172,800 \text{ seconds per epoch}
   $$

5. **Number of epochs per year**:
   To find the number of epochs per year, divide the total seconds in a year by the seconds in one epoch:

   $$
   \text{Epochs per year} = \frac{31,557,600}{172,800} \approx 182.5
   $$



### Important Notes:

* The exact value of **EPOCHS\_PER\_YEAR** can fluctuate slightly based on actual slot times or network conditions.
* In practical use, you might want to round **EPOCHS\_PER\_YEAR** to **183** or another convenient approximation for clarity.

Would you like to run this with some example values, or do you need further adjustments?


## APY (Annual Percentage Yield)

APY estimates staking returns for a validator, considering how many credits they earn.

In [22]:
EPOCHS_PER_YEAR = 182.5

# Avoid divide-by-zero issues
df_expanded = df_expanded[df_expanded['activatedStake'] > 0]

# Compute per-epoch return
df_expanded['epoch_return'] = df_expanded['credits_earned'] / df_expanded['activatedStake']

# Compute APY
df_expanded['APY'] = (1 + df_expanded['epoch_return']) ** EPOCHS_PER_YEAR - 1

df_expanded.head()

Unnamed: 0,timestamp,nodePubkey,commission,activatedStake,lastVote,rootSlot,epochVoteAccount,epoch,credits,previous_credits,credits_earned,votePubkey,epoch_return,APY
3638,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,778,635925863,629035066,6890797,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.511e-05
3639,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,779,642817650,635925863,6891787,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.512e-05
3640,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,780,649701256,642817650,6883606,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.507e-05
3641,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,781,656598333,649701256,6897077,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.514e-05
3642,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,782,657249655,656598333,651322,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,2e-08,3.32e-06


# Activated Stake

Calculation:

- Represented in lamports (1 SOL = 1e9 lamports).

- Track stake changes over epochs to detect delegation trends.

In [23]:
df_expanded['activated_stake_sol'] = df_expanded['activatedStake'] / 1e9
print(df_expanded)

                            timestamp                                    nodePubkey  commission  activatedStake   lastVote   rootSlot  epochVoteAccount  epoch    credits  previous_credits  \
3638 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    778  635925863         629035066   
3639 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    779  642817650         635925863   
3640 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    780  649701256         642817650   
3641 2025-05-04 22:32:36.496945+00:00  138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT           0  35818579022567  337864773  337864742              True    781  656598333         649701256   
3642 2025-05-04 22:32:36.496945+00:00  138KHw

### Uptime

 Solana has ~432,000 slots per epoch (2.5 slots/sec).

In [24]:
slots_per_epoch = 432000
df_expanded['uptime_pct'] = (df_expanded["credits_earned"] / slots_per_epoch) * 100



#### Vote Success Rate

If credits_earned > slots, use lastVote and rootSlot instead.



In [25]:
successful_votes = df_expanded['lastVote'] - df_expanded['rootSlot']
df_expanded['uptime_pct'] = (successful_votes / slots_per_epoch) * 100

#### Vote Lag
vote_lag shows how many slots the validator lags behind.

#### Lagging 
A consistent high lag can suggest downtime or poor performance

In [26]:
threshold = 100
df_expanded['vote_lag'] = df_expanded['rootSlot'] - df_expanded['lastVote']
df_expanded['lagging'] = df_expanded['vote_lag'] > threshold

df_expanded.head()


Unnamed: 0,timestamp,nodePubkey,commission,activatedStake,lastVote,rootSlot,epochVoteAccount,epoch,credits,previous_credits,credits_earned,votePubkey,epoch_return,APY,activated_stake_sol,uptime_pct,vote_lag,lagging
3638,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,778,635925863,629035066,6890797,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.511e-05,35818.57902257,0.00717593,-31,False
3639,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,779,642817650,635925863,6891787,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.512e-05,35818.57902257,0.00717593,-31,False
3640,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,780,649701256,642817650,6883606,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.507e-05,35818.57902257,0.00717593,-31,False
3641,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,781,656598333,649701256,6897077,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,1.9e-07,3.514e-05,35818.57902257,0.00717593,-31,False
3642,2025-05-04 22:32:36.496945+00:00,138KHwTqKNWGLoo8fK5i8UxYtwoC5tC8o7M9rY1CDEjT,0,35818579022567,337864773,337864742,True,782,657249655,656598333,651322,ASfKFAKz6fH4eip1jdLGt5Ym954kU9KYnwq2Csn9ogSz,2e-08,3.32e-06,35818.57902257,0.00717593,-31,False


In [27]:
df_expanded['vote_lag'].unique().tolist()

[-31, -32, -35]