In [924]:
import plotly.graph_objects as go
import numpy as np

import pandas as pd
import numpy as np
from datetime import datetime, timezone
import os
import logging
from typing import List, Dict
from sqlalchemy import create_engine, text
from sqlalchemy.engine.base import Engine
from dotenv import load_dotenv

import plotly.express as px
from plotly.subplots import make_subplots

import json
import jinja2

import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
from datetime import datetime, timezone  # Changed this line
import json
from sqlalchemy import create_engine, text
from typing import Dict

# Inputs

In [925]:
# Database configuration
db_config = {
    'dbname': 'allora',
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',
    'port': '5433'
}

# inputs
topic_id = 1
lookback_epochs = 10000 # will grab data from this many epochs ago
lookback_blocks = 10000 # will grab data from this many blocks ago (only used for tokenomics)



# Queries and db scripts

In [926]:
class DatabaseManager:
    def __init__(self, read_config: Dict[str, str], write_config: Dict[str, str]):
        self.read_config = read_config
        self.write_config = write_config
        self.read_engine: Engine = None
        self.write_engine: Engine = None

    def connect(self):
        try:
            read_connection_string = f"postgresql://{self.read_config['user']}:{self.read_config['password']}@{self.read_config['host']}:{self.read_config['port']}/{self.read_config['dbname']}"
            self.read_engine = create_engine(read_connection_string, future=True)
            print("Connected to the read database")

            write_connection_string = f"postgresql://{self.write_config['user']}:{self.write_config['password']}@{self.write_config['host']}:{self.write_config['port']}/{self.write_config['dbname']}"
            self.write_engine = create_engine(write_connection_string, future=True)
            print("Connected to the write database")
        except Exception as e:
            print(f"Error connecting to databases: {e}")
            raise

    def execute_query(self, query: str) -> pd.DataFrame:
        try:
            with self.read_engine.connect() as connection:
                result = connection.execute(text(query))
                return pd.DataFrame(result.fetchall(), columns=result.keys())
        except Exception as e:
            print(f"Error executing query: {e}")
            raise

    def create_metrics_table(self):
        print("Creating or verifying the research metrics table in the write database...")
        create_table_query = text("""
        CREATE TABLE IF NOT EXISTS research_metrics (
            id SERIAL PRIMARY KEY,
            topic_id INTEGER NOT NULL,
            epoch INTEGER NOT NULL,
            address VARCHAR(255),
            metric_value DOUBLE PRECISION,
            metric_name VARCHAR(255) NOT NULL,
            updated_at TIMESTAMP WITH TIME ZONE NOT NULL
        );
        """)
        
        try:
            with self.write_engine.connect() as connection:
                connection.execute(create_table_query)
                connection.commit()
            print("Research metrics table created or verified in the write database.")
        except Exception as e:
            print(f"Error creating or verifying research metrics table: {e}")
            raise

    def insert_metrics(self, df: pd.DataFrame):
        try:
            # Ensure all expected columns are present
            expected_columns = ['topic_id', 'epoch', 'address', 'metric_value', 'metric_name', 'updated_at']
            for col in expected_columns:
                if col not in df.columns:
                    df[col] = None  # Add missing columns with None values

            # Convert 'updated_at' to datetime if it's not already
            if df['updated_at'].dtype != 'datetime64[ns]':
                df['updated_at'] = pd.to_datetime(df['updated_at'])

            # Convert 'address' column to string type
            df['address'] = df['address'].astype(str)

            # Insert data into the database
            df.to_sql('research_metrics', self.write_engine, if_exists='append', index=False)
            print(f"Inserted {len(df)} rows into research metrics table")
        except Exception as e:
            print(f"Error inserting metrics into database: {e}")
            raise

    def close(self):
        if self.read_engine:
            self.read_engine.dispose()
        if self.write_engine:
            self.write_engine.dispose()
        print("Database connections closed")

    def set_global_lock(self):
        try:
            lock_entry = pd.DataFrame({
                'topic_id': [0],  # Use 0 as a special topic_id for global lock
                'epoch': [-1],
                'metric_value': [0.0],
                'metric_name': ['global_lock'],
                'updated_at': [datetime.now(timezone.utc)]
            })
            self.insert_metrics(lock_entry)
            print("Global lock set")
        except Exception as e:
            print(f"Error setting global lock: {e}")
            raise

    def check_global_lock(self) -> bool:
        query = """
        SELECT EXISTS (
            SELECT 1 FROM research_metrics 
            WHERE topic_id = 0 AND epoch = -1 AND metric_name = 'global_lock'
        ) AS is_locked;
        """
        try:
            with self.read_engine.connect() as connection:
                result = connection.execute(text(query))
                return result.scalar()
        except Exception as e:
            print(f"Error checking global lock: {e}")
            raise

    def remove_global_lock(self):
        query = """
        DELETE FROM research_metrics 
        WHERE topic_id = 0 AND epoch = -1 AND metric_name = 'global_lock';
        """
        try:
            with self.write_engine.connect() as connection:
                connection.execute(text(query))
                connection.commit()
            print("Global lock removed")
        except Exception as e:
            print(f"Error removing global lock: {e}")
            raise

class Metric:
    def __init__(self, name: str, topic_id: int, epoch_length: int):
        self.name = name
        self.topic_id = topic_id
        self.epoch_length = epoch_length

    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        raise NotImplementedError("Subclasses must implement this method")
class ValidatorRewardMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        WITH block_rewards AS (
            SELECT 
                height_tx,
                validator,
                amount::numeric / (10^18) as reward
            FROM 
                validator_rewards
            WHERE 
                height_tx >= {min_block_height}
        )
        SELECT 
            height_tx,
            ARRAY_AGG(reward) as rewards
        FROM 
            block_rewards
        GROUP BY 
            height_tx
        ORDER BY 
            height_tx ASC;
        """
        
        df = db_manager.execute_query(query)
        
        def compute_validator_reward_metric(rewards):
            # Parameters
            C_v = 10
            beta = 0.25
            
            # Convert rewards to numpy array
            rewards = np.array(rewards)
            
            # Normalize rewards (P_iv)
            total_rewards = np.sum(rewards)
            if total_rewards == 0:
                return 0
                
            P_iv = rewards / total_rewards
            
            # Compute effective number of validators
            n_v = len(rewards)
            n_v_eff = 1 / np.sum(P_iv**2)
            
            # Compute scaled entropy
            # Add small epsilon to avoid log(0)
            epsilon = 1e-10
            P_iv = np.clip(P_iv, epsilon, 1)
            E_v = -np.sum(P_iv * np.log(P_iv) * (n_v_eff/n_v)**beta)
            
            # Normalize and scale entropy
            H_v = 10**(C_v * (E_v/np.log(n_v) - 1))
            
            return H_v

        # Calculate metric for each block
        results = []
        for _, row in df.iterrows():
            metric_value = compute_validator_reward_metric(row['rewards'])
            results.append({
                'epoch': row['height_tx'],
                'metric_value': metric_value,
                'topic_id': 0,
                'metric_name': self.name,
                'updated_at': datetime.now(timezone.utc),
                'address': None
            })
        
        if not results:
            return pd.DataFrame()
            
        result_df = pd.DataFrame(results)
        return result_df
class ForecastHealthMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            {self.topic_id} AS topic_id,
            FLOOR(height_tx / {self.epoch_length}) AS epoch,
            SUM(CAST(score AS FLOAT)) AS metric_value
        FROM 
            topic_forecasting_scores
        WHERE 
            topic_id = {self.topic_id} AND
            height_tx >= {min_block_height}
        GROUP BY
            epoch
        ORDER BY 
            epoch DESC;
        """
        df = db_manager.execute_query(query)
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        df['address'] = None
        return df

class ReputerScoreMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        lookback_blocks = min_block_height
        query = f"""
        WITH active_inferers_per_epoch AS (
            SELECT 
                FLOOR(height / {self.epoch_length}) AS epoch,  
                address
            FROM 
                ema_scores
            WHERE 
                is_active = true  
                AND height >= {min_block_height}  
                AND type = 'ACTOR_TYPE_REPUTER'
            GROUP BY 
                epoch, address
        )
        SELECT 
            a.epoch, 
            s.address, 
            SUM(s.value) AS average_score
        FROM 
            scores s
        JOIN 
            active_inferers_per_epoch a
            ON FLOOR(s.height / {self.epoch_length}) = a.epoch
            AND s.address = a.address
        WHERE 
            s.topic_id = {self.topic_id}
        AND
            s.type = 'ACTOR_TYPE_REPUTER'
        AND
            s.height >= {min_block_height}
        GROUP BY 
            a.epoch, 
            s.address
        ORDER BY 
            a.epoch DESC, 
            s.address;
        """
        
        raw_reputer_scores = db_manager.execute_query(query)
        
        reputer_counts = raw_reputer_scores.groupby('epoch')['address'].nunique().reset_index()
        epsilon = 10**-6

        reputer_counts.columns = ['epoch', 'num_reputers']
        reputer_stats = raw_reputer_scores.groupby('epoch')['average_score'].agg(['mean', 'std']).reset_index()
        reputer_stats.columns = ['epoch', 'reputer_mean_score', 'reputer_std_score']
        reputer_summary = reputer_counts.merge(reputer_stats, on='epoch', how='outer')

        def calculate_reputer_score_metric(row):
            if row['reputer_mean_score'] <= 0:
                return 0
            if pd.isna(row['reputer_std_score']):  # Case of a single reputer
                return np.log10(row['reputer_mean_score'])
            result = np.log10(row['reputer_mean_score'] / (row['reputer_std_score']**2 + epsilon))
            return 0 if np.isinf(result) else result

        reputer_summary['metric_value'] = reputer_summary.apply(calculate_reputer_score_metric, axis=1)

        # Create a new DataFrame with all required columns
        result_df = pd.DataFrame({
            'epoch': reputer_summary['epoch'],
            'metric_value': reputer_summary['metric_value'],
            'topic_id': self.topic_id,
            'metric_name': self.name,
            'updated_at': datetime.now(timezone.utc)
        })
        
        result_df['address'] = None
        return result_df

class SortitionScoreMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        WITH ranked_scores AS (
            SELECT
                address,
                FLOOR(height / {self.epoch_length}) AS epoch,
                is_active,
                score,
                LAG(is_active) OVER (PARTITION BY address ORDER BY height) AS prev_is_active,
                LAG(score) OVER (PARTITION BY address ORDER BY height) AS prev_score
            FROM
                ema_scores
            WHERE
                topic_id = {self.topic_id}
                AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
                AND height >= {min_block_height}
        )
        SELECT
            epoch,
            COALESCE((AVG(CASE WHEN prev_is_active = false AND is_active = true THEN score END) -
                      AVG(CASE WHEN prev_is_active = true AND is_active = false THEN prev_score END)) / 
                      NULLIF(COUNT(CASE WHEN prev_is_active = true AND is_active = false THEN address END), 0), 0) 
            AS metric_value
        FROM
            ranked_scores
        GROUP BY
            epoch
        ORDER BY
            epoch DESC;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        df['address'] = None
        return df

class SortitionTimeMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        WITH ranked_scores AS (
            SELECT
                address,
                FLOOR(height / {self.epoch_length}) AS epoch,
                is_active,
                LAG(is_active) OVER (PARTITION BY address ORDER BY height) AS prev_is_active
            FROM
                ema_scores
            WHERE
                topic_id = {self.topic_id}
                AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
                AND height >= {min_block_height}
        ),
        epoch_metrics AS (
            SELECT
                epoch,
                COUNT(DISTINCT address) as total_addresses,
                COUNT(CASE WHEN prev_is_active = false AND is_active = true THEN address END) as new_active
            FROM ranked_scores
            WHERE prev_is_active IS NOT NULL
            GROUP BY epoch
        )
        SELECT
            epoch,
            total_addresses::float / (new_active + 1e-6) AS metric_value
        FROM epoch_metrics
        ORDER BY epoch DESC;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        df['address'] = None
        return df

class LifetimeMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        WITH ranked_scores AS (
            SELECT
                address,
                FLOOR(height / {self.epoch_length}) AS epoch,
                is_active,
                LAG(is_active) OVER (PARTITION BY address ORDER BY height) AS prev_is_active
            FROM
                ema_scores
            WHERE
                topic_id = {self.topic_id}
                AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
                AND height >= {min_block_height}
        ),
        epoch_metrics AS (
            SELECT
                epoch,
                COUNT(CASE WHEN prev_is_active = false AND is_active = true THEN address END) as new_active,
                COUNT(CASE WHEN is_active = true THEN address END) as active_inferrers
            FROM ranked_scores
            WHERE prev_is_active IS NOT NULL
            GROUP BY epoch
        ),
        filled_metrics AS (
            SELECT
                epoch,
                new_active,
                COALESCE(
                    active_inferrers,
                    (SELECT active_inferrers 
                     FROM epoch_metrics e2 
                     WHERE e2.epoch < epoch_metrics.epoch 
                       AND e2.active_inferrers IS NOT NULL 
                     ORDER BY e2.epoch DESC 
                     LIMIT 1)
                ) as active_inferrers
            FROM epoch_metrics
        )
        SELECT
            epoch,
            active_inferrers::float / (new_active + 1e-6) AS metric_value
        FROM filled_metrics
        WHERE active_inferrers > 0
        ORDER BY epoch DESC;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        df['address'] = None
        return df

class InfererHealthMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        losses_query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            SUM(CAST(combined_value AS FLOAT)) AS total_combined_value_per_epoch
        FROM 
            networklosses
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
        GROUP BY
            epoch
        ORDER BY 
            epoch DESC;
        """
        
        inferences_query = f"""
        WITH active_inferers AS (
            SELECT DISTINCT
                address,
                FLOOR(height / {self.epoch_length}) AS epoch
            FROM 
                ema_scores
            WHERE 
                topic_id = {self.topic_id}
                AND is_active = true
                AND height >= {min_block_height}
        )
        SELECT
            FLOOR(i.block_height / {self.epoch_length}) AS epoch,
            i.value,
            i.inferer
        FROM 
            inferences i
        JOIN 
            active_inferers a ON i.inferer = a.address AND FLOOR(i.block_height / {self.epoch_length}) = a.epoch
        WHERE 
            i.topic_id = {self.topic_id}
            AND i.block_height >= {min_block_height}
        ORDER BY
            epoch DESC;
        """

        losses_df = db_manager.execute_query(losses_query)
        inferences_df = db_manager.execute_query(inferences_query)

        print(f"Inferences DataFrame shape: {inferences_df.shape}")
        print(f"Inferences DataFrame columns: {inferences_df.columns}")
        print(f"Inferences DataFrame sample:\n{inferences_df.head()}")

        # Calculate mean distance in Python
        inferences_df['value'] = inferences_df['value'].astype(float)
        mean_inference = inferences_df.groupby('epoch')['value'].transform('mean')
        inferences_df['distance'] = abs(inferences_df['value'] - mean_inference)
        mean_distance_df = inferences_df.groupby('epoch')['distance'].mean().reset_index()
        mean_distance_df.columns = ['epoch', 'mean_distance_to_mean_inference']

        # Modify the raw inference calculation to include the address
        raw_inference_df = inferences_df.groupby(['epoch', 'inferer'])['value'].mean().reset_index()
        raw_inference_df.columns = ['epoch', 'address', 'raw_inference']

        print(f"Raw inference DataFrame shape: {raw_inference_df.shape}")
        print(f"Raw inference DataFrame columns: {raw_inference_df.columns}")
        print(f"Raw inference DataFrame sample:\n{raw_inference_df.head()}")

        # Merge the dataframes on 'epoch'
        merged_df = pd.merge(mean_distance_df, losses_df, on='epoch', how='outer')

        # Calculate the inferer health metric
        merged_df['inferer_health'] = np.log10(merged_df['mean_distance_to_mean_inference'] / (10**merged_df['total_combined_value_per_epoch']))

        # Prepare the inferer health DataFrame
        inferer_health_df = pd.DataFrame({
            'epoch': merged_df['epoch'],
            'metric_value': merged_df['inferer_health'],
            'topic_id': self.topic_id,
            'metric_name': 'infererhealth',
            'updated_at': datetime.now(timezone.utc),
            'address': None
        })
        
        # Prepare the raw inference DataFrame
        raw_inference_df = pd.DataFrame({
            'epoch': raw_inference_df['epoch'],
            'address': raw_inference_df['address'],
            'metric_value': raw_inference_df['raw_inference'],
            'topic_id': self.topic_id,
            'metric_name': 'raw_inference',
            'updated_at': datetime.now(timezone.utc)
        })

        print(f"Inferer health DataFrame shape: {inferer_health_df.shape}")
        print(f"Inferer health DataFrame columns: {inferer_health_df.columns}")
        print(f"Inferer health DataFrame sample:\n{inferer_health_df.head()}")

        # Concatenate both DataFrames
        result_df = pd.concat([inferer_health_df, raw_inference_df], ignore_index=True)

        print(f"Result DataFrame shape: {result_df.shape}")
        print(f"Result DataFrame columns: {result_df.columns}")
        print(f"Result DataFrame sample:\n{result_df.head()}")

        # Remove rows with null metric values
        result_df = result_df.dropna(subset=['metric_value'])

        print(f"Final result DataFrame shape: {result_df.shape}")
        print(f"Final result DataFrame columns: {result_df.columns}")
        print(f"Final result DataFrame sample:\n{result_df.head()}")

        return result_df

class InfererLossesMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT 
            FLOOR(nl.height / {self.epoch_length}) AS epoch,
            bv.worker AS address,
            CAST(bv.value AS FLOAT) AS metric_value
        FROM 
            networkloss_bundle_values bv
        JOIN 
            networklosses nl 
            ON bv.bundle_id = nl.id              
        WHERE 
            nl.topic_id = {self.topic_id}          
            AND nl.height >= {min_block_height}  
            AND bv.reputer_value_type = 'InfererValues'  
        ORDER BY 
            nl.height DESC;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class ForecasterLossesMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT 
            FLOOR(nl.height / {self.epoch_length}) AS epoch,
            bv.worker AS address,
            CAST(bv.value AS FLOAT) AS metric_value
        FROM 
            networkloss_bundle_values bv
        JOIN 
            networklosses nl 
            ON bv.bundle_id = nl.id              
        WHERE 
            nl.topic_id = {self.topic_id}          
            AND nl.height >= {min_block_height}  
            AND bv.reputer_value_type = 'ForecasterValues'  
        ORDER BY 
            nl.height DESC;
        """
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class NetworkLossesMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            SUM(CAST(combined_value AS FLOAT)) AS combined_losses,
            SUM(CAST(naive_value AS FLOAT)) AS naive_losses
        FROM 
            networklosses
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
        GROUP BY
            epoch
        ORDER BY 
            epoch DESC;
        """
        
        df = db_manager.execute_query(query)
        
        # Create separate rows for combined and naive losses
        combined_df = df[['epoch', 'combined_losses']].rename(columns={'combined_losses': 'metric_value'})
        combined_df['metric_name'] = 'combined_losses'
        
        naive_df = df[['epoch', 'naive_losses']].rename(columns={'naive_losses': 'metric_value'})
        naive_df['metric_name'] = 'naive_losses'
        
        result_df = pd.concat([combined_df, naive_df], ignore_index=True)
        result_df['topic_id'] = self.topic_id
        result_df['updated_at'] = datetime.now(timezone.utc)
        result_df['address'] = None
        
        return result_df

class RawReputerScoreMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            scores
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_REPUTER'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawInfererScoreMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            scores
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawForecasterScoreMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            scores
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_FORECASTER'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawInfererRewardMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            rewards
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawForecasterRewardMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            rewards
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_FORECASTER'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawReputerRewardMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(height / {self.epoch_length}) AS epoch,
            address,
            CAST(value AS FLOAT) AS metric_value
        FROM 
            rewards
        WHERE 
            topic_id = {self.topic_id}
            AND height >= {min_block_height}
            AND type = 'ACTOR_TYPE_REPUTER'
        ORDER BY 
            epoch DESC, address;
        """
        
        df = db_manager.execute_query(query)
        df['topic_id'] = self.topic_id
        df['metric_name'] = self.name
        df['updated_at'] = datetime.now(timezone.utc)
        return df

class RawForecastsMetric(Metric):
    def calculate(self, db_manager: DatabaseManager, min_block_height: int) -> pd.DataFrame:
        query = f"""
        SELECT
            FLOOR(f.block_height / {self.epoch_length}) AS epoch,
            f.block_height,
            f.id AS forecast_id,
            fv.value AS metric_value,
            fv.inferer AS address
        FROM 
            forecasts f
        JOIN 
            forecast_values fv ON f.id = fv.forecast_id
        WHERE 
            f.topic_id = {self.topic_id}
            AND f.block_height >= {min_block_height}
        ORDER BY 
            f.block_height DESC, f.id, fv.inferer;
        """
        
        df = db_manager.execute_query(query)
        
        # Only keep the columns that match our table schema
        result_df = pd.DataFrame({
            'topic_id': self.topic_id,
            'epoch': df['epoch'],
            'address': df['address'],
            'metric_value': df['metric_value'],
            'metric_name': self.name,
            'updated_at': datetime.now(timezone.utc)
        })
        
        return result_df

In [927]:
def update_specific_topic(db_manager: DatabaseManager, topic_id: int):
    if db_manager.check_global_lock():
        print(f"Global lock is active. Skipping update for topic {topic_id}.")
        return

    try:
        db_manager.set_global_lock()

        # Fetch the specific topic
        topic_query = f"SELECT id AS topic_id, epoch_length FROM topics WHERE id = {topic_id};"
        topic_df = db_manager.execute_query(topic_query)
        
        if topic_df.empty:
            print(f"Topic {topic_id} not found.")
            return
        
        row = topic_df.iloc[0]
        epoch_length = int(row['epoch_length'])
        
        print(f"Processing topic {topic_id} with epoch length {epoch_length}")

        metric_classes = [
            ValidatorRewardMetric, InfererLossesMetric, ForecastHealthMetric, ReputerScoreMetric,
            SortitionScoreMetric, SortitionTimeMetric, LifetimeMetric,
            InfererHealthMetric, ForecasterLossesMetric, NetworkLossesMetric,
            RawForecastsMetric, RawReputerScoreMetric, RawInfererScoreMetric,
            RawForecasterScoreMetric, RawInfererRewardMetric, RawForecasterRewardMetric,
            RawReputerRewardMetric
        ]

        for i, MetricClass in enumerate(metric_classes, 1):
            metric_name = MetricClass.__name__.lower().replace('metric', '')
            print(f"\nProcessing metric [{i}/{len(metric_classes)}]: {metric_name}")
            
            # Get the last processed epoch/height for this specific metric
            last_epoch_query = f"""
            SELECT MAX(epoch) as last_processed_epoch
            FROM research_metrics
            WHERE topic_id = {0 if MetricClass == ValidatorRewardMetric else topic_id} 
              AND metric_name = '{metric_name}'
              AND epoch >= 0;  -- Exclude special entries like the global lock
            """
            last_epoch_df = db_manager.execute_query(last_epoch_query)
            last_processed_epoch = last_epoch_df.iloc[0]['last_processed_epoch']
            min_block_height = 0

            if last_processed_epoch is None:
                metric_min_block_height = min_block_height
                print(f"  [{i}/{len(metric_classes)}] No previous data for {metric_name}")
            else:
                last_processed_epoch = int(last_processed_epoch)
                # Handle ValidatorRewardMetric differently
                if MetricClass == ValidatorRewardMetric:
                    metric_min_block_height = last_processed_epoch + 1  # Use direct block height
                else:
                    metric_min_block_height = (last_processed_epoch + 1) * epoch_length
                print(f"  [{i}/{len(metric_classes)}] {metric_name}:")
                print(f"    Last processed epoch: {last_processed_epoch}")
                print(f"    Starting calculation from block height: {metric_min_block_height}")

            # Create the metric instance with appropriate topic_id
            metric = MetricClass(
                metric_name, 
                0 if MetricClass == ValidatorRewardMetric else topic_id,  # Use topic 0 for validator metric
                epoch_length
            )
            
            # Create the metric instance and calculate
            metric_data = metric.calculate(db_manager, metric_min_block_height)

            if not metric_data.empty:
                print(f"  [{i}/{len(metric_classes)}] {metric_name} calculation complete:")
                print(f"    Epochs: {metric_data['epoch'].min()} to {metric_data['epoch'].max()}")
                print(f"    Block heights: {metric_data['epoch'].min() * epoch_length} to {metric_data['epoch'].max() * epoch_length}")
                print(f"    Number of new records: {len(metric_data)}")

                # Ensure the DataFrame has only the expected columns and data types
                expected_columns = {
                    'topic_id': 'int64',
                    'epoch': 'int64',
                    'address': 'object',
                    'metric_value': 'float64',
                    'metric_name': 'object',
                    'updated_at': 'datetime64[ns, UTC]'
                }
                
                # Select only the expected columns and convert data types
                metric_data = metric_data[list(expected_columns.keys())]
                for col, dtype in expected_columns.items():
                    if col == 'address':
                        metric_data[col] = metric_data[col].astype(str)
                    else:
                        metric_data[col] = metric_data[col].astype(dtype)

                # Remove any remaining null values
                metric_data = metric_data.dropna(subset=['metric_value'])

                if not metric_data.empty:
                    db_manager.insert_metrics(metric_data)
                    print(f"  [{i}/{len(metric_classes)}] Successfully inserted new data for {metric_name}")
                else:
                    print(f"  [{i}/{len(metric_classes)}] No new non-null data to insert for {metric_name}")
            else:
                print(f"  [{i}/{len(metric_classes)}] No new data calculated for {metric_name}")

            print(f"Completed processing metric [{i}/{len(metric_classes)}]: {metric_name}")
            print("-------------------------------------------")

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        db_manager.remove_global_lock()

In [None]:
db_manager = DatabaseManager(db_config, db_config)
db_manager.connect()

try:
    # Ensure the table is created in the write database before any operations
    db_manager.create_metrics_table()
    
    # Get all topics
    topics_query = "SELECT id FROM topics ORDER BY id;"
    topics_df = db_manager.execute_query(topics_query)
    
    if topics_df.empty:
        print("No topics found in the database.")
    else:
        total_topics = len(topics_df)
        print(f"Found {total_topics} topics to process")
        
        # Process each topic
        for index, row in topics_df.iterrows():
            topic_id_ = int(row['id'])
            print(f"\nProcessing topic {topic_id_} [{index + 1}/{total_topics}]")
            print("=" * 50)
            
            try:
                update_specific_topic(db_manager, topic_id_)
                print(f"Successfully processed topic {topic_id_}")
            except Exception as e:
                print(f"Error processing topic {topic_id_}: {e}")
                continue
            
            print("=" * 50)
            print(f"Completed topic {topic_id_} [{index + 1}/{total_topics}]")
        
        print("\nAll topics have been processed!")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    db_manager.close()


In [929]:
import json as _hex_json



In [930]:
import json as _hex_json



In [931]:
import json as _hex_json



## ALL of the queries!

### info queries

In [932]:
# Create SQLAlchemy engine
engine = create_engine(f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['dbname']}")

# Query for last epoch info
raw_query = """
    SELECT epoch, updated_at
    FROM research_metrics
    WHERE topic_id = {{topic_id}}
    ORDER BY epoch DESC
    LIMIT 1;
"""
sql_query = jinja2.Template(raw_query).render(vars())
last_epoch_df = pd.read_sql_query(sql_query, engine)

# Query for max height info
raw_query = """
    SELECT height, block_time
    FROM block_info
    WHERE height = (
        SELECT MAX(height)
        FROM block_info
    );
"""
sql_query = jinja2.Template(raw_query).render(vars())
max_height_df = pd.read_sql_query(sql_query, engine)

# Query for epoch length
raw_query = """
    SELECT epoch_length
    FROM topics
    WHERE id={{topic_id}}
"""
sql_query = jinja2.Template(raw_query).render(vars())
epoch_length_df = pd.read_sql_query(sql_query, engine)

# Query for min epoch
raw_query = """
    SELECT min(epoch)
    FROM research_metrics
    WHERE topic_id={{topic_id}}
"""
sql_query = jinja2.Template(raw_query).render(vars())
min_epoch_df = pd.read_sql_query(sql_query, engine)

# Query for metadata
raw_query = """
    SELECT metadata
    FROM topics
    WHERE id={{topic_id}}
"""
sql_query = jinja2.Template(raw_query).render(vars())
metadata_df = pd.read_sql_query(sql_query, engine)

# Calculate derived values
last_epoch = last_epoch_df.values[0][0]
last_timestamp = last_epoch_df.values[0][1]
max_height = max_height_df.values[0][0]
max_timestamp = max_height_df.values[0][1]
epoch_length = int(epoch_length_df.values[0][0])
min_epoch = int(min_epoch_df.values[0][0])
topic_metadata = metadata_df.values[0][0]
lookback_height = max_height - lookback_epochs * epoch_length
min_height_to_lookback = max(lookback_height, min_epoch*epoch_length)


In [933]:
last_epoch=last_epoch_df.values[0][0]
last_timestamp=last_epoch_df.values[0][1]
max_height = max_height_df.values[0][0]
max_timestamp = max_height_df.values[0][1]
epoch_length = int(epoch_length_df.values[0][0])
min_epoch = int(min_epoch_df.values[0][0])
topic_metadata = metadata_df.values[0][0]
lookback_height = max_height - lookback_epochs * epoch_length
min_height_to_lookback = max(lookback_height, min_epoch*epoch_length)

In [None]:
print(min_height_to_lookback)

In [935]:
import jinja2
raw_query = """
    WITH ranked_epochs AS (
        SELECT 
            topic_id,
            epoch,
            COUNT(*) as inferrerlosses_count,
            ROW_NUMBER() OVER (PARTITION BY topic_id ORDER BY epoch DESC) as epoch_rank
        FROM 
            research_metrics
        WHERE 
            topic_id = {{topic_id}}
            AND metric_name = 'infererlosses'
        GROUP BY
            topic_id, epoch
    )
    SELECT 
        topic_id,
        epoch,
        inferrerlosses_count
    FROM 
        ranked_epochs
    WHERE 
        epoch_rank <= {{lookback_epochs}}
    ORDER BY
        epoch DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
num_active_df = pd.read_sql_query(sql_query, engine)

In [936]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_value,
        metric_name
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name = 'naive_losses'
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
dataframe_5 = pd.read_sql_query(sql_query, engine)

In [937]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_value,
        metric_name
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name = 'combined_losses'
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
dataframe_6 = pd.read_sql_query(sql_query, engine)

In [938]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name = 'infererlosses'
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
dataframe_3 = pd.read_sql_query(sql_query, engine)

In [939]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name = 'forecasterlosses'
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'forecasterlosses'
        )
    ORDER BY
        epoch DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
dataframe_4 = pd.read_sql_query(sql_query, engine)

In [940]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_name,
        address,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name IN ('rawinfererreward', 'rawreputerreward', 'rawforecasterreward')  -- List of metric names
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'rawreputerreward'
        )
    ORDER BY
        epoch DESC, 
        metric_name;  -- Order by epoch and metric_name
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
raw_rewards_df = pd.read_sql_query(sql_query, engine)

In [941]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_name,
        address,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}}
        AND metric_name IN ('rawinfererscore', 'rawreputerscore', 'rawforecasterscore')  -- List of metric names
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}}  AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC, 
        metric_name;  -- Order by epoch and metric_name
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
raw_score_df = pd.read_sql_query(sql_query, engine)

In [942]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_name,
        address,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name IN ('rawforecasts', 'infererlosses')  -- List of metric names
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC, 
        metric_name;  -- Order by epoch and metric_name
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
raw_fore_inf_df = pd.read_sql_query(sql_query, engine)

In [943]:
import jinja2
raw_query = """
    SELECT 
        epoch,
        metric_name,
        address,
        metric_value
    FROM 
        research_metrics
    WHERE 
        topic_id = {{topic_id}} 
        AND metric_name IN ('raw_inference', 'infererlosses')  -- List of metric names
        AND epoch > (
            SELECT MAX(epoch) - {{lookback_epochs}}
            FROM research_metrics
            WHERE topic_id = {{topic_id}} AND metric_name = 'infererlosses'
        )
    ORDER BY
        epoch DESC, 
        metric_name;  -- Order by epoch and metric_name
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
raw_inf_loss_df = pd.read_sql_query(sql_query, engine)

In [944]:
# print(raw_inf_loss_df['epoch'].max()*epoch_length)
# print(raw_inf_loss_df['epoch'])
max_height_for_nw_inference = raw_inf_loss_df['epoch'].max()*epoch_length

In [945]:
import jinja2
raw_query = """
    WITH RankedInferences AS (
        SELECT 
            (metadata->>'block_height')::int AS block_height,
            (metadata->>'block_height')::int / {{epoch_length}} AS epoch,
            (value->'network_inferences'->>'naive_value')::numeric AS naive_value,
            (value->'network_inferences'->>'combined_value')::numeric AS combined_value,
            value->'network_inferences'->'inferer_values' as inferer_values,
            ROW_NUMBER() OVER (PARTITION BY (metadata->>'block_height')::int / {{epoch_length}} ORDER BY (metadata->>'block_height')::int ASC) as rn
        FROM 
            query_results
        WHERE 
            query_type = 'network_inferences'
            AND (metadata->>'topic_id')::int = {{topic_id}}
            AND (metadata->>'block_height')::int > {{min_height_to_lookback}}
            AND (metadata->>'block_height')::int <= {{max_height_for_nw_inference}}
    )
    SELECT 
        block_height,
        epoch,
        naive_value,
        combined_value,
        inferer_values
    FROM 
        RankedInferences
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
nw_inference_df = pd.read_sql_query(sql_query, engine)

In [946]:
import jinja2
raw_query = """
    WITH RankedInferences AS (
        SELECT 
            (metadata->>'block_height')::int AS block_height,
            (metadata->>'block_height')::int / {{epoch_length}} AS epoch,
            (value->'network_inferences'->>'naive_value')::numeric AS naive_value,
            (value->'network_inferences'->>'combined_value')::numeric AS combined_value,
            value->'network_inferences'->'inferer_values' as inferer_values,
            ROW_NUMBER() OVER (PARTITION BY (metadata->>'block_height')::int / {{epoch_length}} ORDER BY (metadata->>'block_height')::int ASC) as rn
        FROM 
            query_results
        WHERE 
            key = 'latest_network_inferences_outlier_resistant_' || {{topic_id}}
            AND (metadata->>'block_height')::int > {{min_height_to_lookback}}
            AND (metadata->>'block_height')::int <= {{max_height_for_nw_inference}}
    )
    SELECT 
        block_height,
        epoch,
        naive_value,
        combined_value,
        inferer_values
    FROM 
        RankedInferences
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
nw_inference_outlier_res_df = pd.read_sql_query(sql_query, engine)

In [947]:
import jinja2
raw_query = """
    WITH RankedCoefficients AS (
        SELECT 
            block_height,
            block_height / {{epoch_length}} as epoch,
            addresses,
            coefficients,
            ROW_NUMBER() OVER (PARTITION BY block_height / {{epoch_length}} ORDER BY block_height ASC) as rn
        FROM 
            listening_coefficients
        WHERE 
            topic_id = {{topic_id}}
            AND actor_type = 'ACTOR_TYPE_REPUTER'
            AND block_height > {{min_height_to_lookback}}
    )
    SELECT 
        block_height,
        epoch,
        addresses,
        coefficients
    FROM 
        RankedCoefficients
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
listening_df = pd.read_sql_query(sql_query, engine)

In [948]:
import jinja2
raw_query = """
    WITH EpochData AS (
        SELECT 
            height,
            height / {{epoch_length}} as epoch,
            COUNT(*) as total_participants,
            COUNT(CASE WHEN is_active = true THEN 1 END) as active_participants,
            array_agg(DISTINCT CASE WHEN is_active = true THEN address END) as active_addresses
        FROM ema_scores
        WHERE topic_id = {{topic_id}}
        AND type = 'ACTOR_TYPE_INFERER_UNSPECIFIED'
        AND height > {{min_height_to_lookback}}
        GROUP BY height
    ),
    EpochSummary AS (
        SELECT 
            epoch,
            MIN(height) as epoch_height,
            MAX(total_participants) as total_participants,
            MAX(active_participants) as active_participants,
            LAG(array_agg(active_addresses)) OVER (ORDER BY epoch) as prev_addresses,
            array_agg(active_addresses) as current_addresses
        FROM EpochData
        GROUP BY epoch
    )
    SELECT 
        epoch,
        epoch_height as height,
        total_participants,
        active_participants,
        COALESCE(
            CARDINALITY(
                ARRAY(
                    SELECT UNNEST(current_addresses) 
                    EXCEPT 
                    SELECT UNNEST(prev_addresses)
                )
            ),
            CARDINALITY(ARRAY(SELECT UNNEST(current_addresses)))
        ) as new_addresses
    FROM EpochSummary
    ORDER BY epoch ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
new_ema_scores_df = pd.read_sql_query(sql_query, engine)

In [949]:
import jinja2
raw_query = """
    WITH RankedInferences AS (
        SELECT 
            (metadata->>'block_height')::int AS block_height,
            (metadata->>'block_height')::int / {{epoch_length}} AS epoch,
            (value->'network_inferences'->>'naive_value')::numeric AS naive_value,
            (value->'network_inferences'->'inferer_values')::jsonb AS inferer_values,  -- Changed to jsonb
            (value->'network_inferences'->>'combined_value')::numeric AS combined_value,
            (value->>'confidence_interval_values')::jsonb AS confidence_interval_values,
            (value->>'confidence_interval_raw_percentiles')::jsonb AS confidence_interval_percentiles,
            ROW_NUMBER() OVER (PARTITION BY (metadata->>'block_height')::int / {{epoch_length}} ORDER BY (metadata->>'block_height')::int ASC) as rn
        FROM 
            query_results
        WHERE 
            query_type = 'network_inferences'
            AND (metadata->>'topic_id')::int = {{topic_id}}
            AND (metadata->>'block_height')::int > {{min_height_to_lookback}}
            AND (metadata->>'block_height')::int <= {{max_height_for_nw_inference}}
    )
    SELECT 
        block_height,
        epoch,
        naive_value,
        inferer_values,
        combined_value,
        confidence_interval_values,
        confidence_interval_percentiles
    FROM 
        RankedInferences
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
ci_df = pd.read_sql_query(sql_query, engine)

In [950]:
import jinja2
raw_query = """
    WITH RankedStakes AS (
        SELECT 
            (metadata->>'block_height')::int AS block_height,
            (metadata->>'block_height')::int / {{epoch_length}} AS epoch,
            value::jsonb AS stakes_array,
            ROW_NUMBER() OVER (PARTITION BY (metadata->>'block_height')::int / {{epoch_length}} ORDER BY (metadata->>'block_height')::int ASC) as rn
        FROM 
            query_results
        WHERE 
            key = 'active_reputer_stakes_1'
            AND (metadata->>'block_height')::int > {{lookback_height}}
    )
    SELECT 
        block_height,
        epoch,
        stakes_array
    FROM 
        RankedStakes
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
stakes_df = pd.read_sql_query(sql_query, engine)

In [951]:
import jinja2
raw_query = """
    WITH PayloadCount AS (
        SELECT 
            reputer_nonce_block_height,
            reputer_nonce_block_height / {{epoch_length}} AS epoch,
            COUNT(*) AS payload_count
        FROM 
            reputer_payload
        WHERE 
            topic_id = {{topic_id}}
            AND reputer_nonce_block_height > {{min_height_to_lookback}}
        GROUP BY 
            reputer_nonce_block_height
    )
    SELECT 
        reputer_nonce_block_height,
        epoch,
        payload_count
    FROM 
        PayloadCount
    ORDER BY 
        reputer_nonce_block_height DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
reputer_payload_df = pd.read_sql_query(sql_query, engine)

In [952]:
import jinja2
raw_query = """
    WITH RankedInferences AS (
        SELECT 
            (metadata->>'block_height')::int AS block_height,
            (metadata->>'block_height')::int / {{epoch_length}} AS epoch,
            (value->>'inferer_weights')::jsonb AS inferer_weights,
            (value->>'forecaster_weights')::jsonb AS forecaster_weights,
            ROW_NUMBER() OVER (PARTITION BY (metadata->>'block_height')::int / {{epoch_length}} ORDER BY (metadata->>'block_height')::int ASC) as rn
        FROM 
            query_results
        WHERE 
            query_type = 'network_inferences'
            AND (metadata->>'topic_id')::int = {{topic_id}}
            AND (metadata->>'block_height')::int > {{lookback_height}}
    )
    SELECT 
        block_height,
        epoch,
        inferer_weights,
        forecaster_weights
    FROM 
        RankedInferences
    WHERE 
        rn = 1
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
weights_df = pd.read_sql_query(sql_query, engine)

In [953]:
import jinja2
raw_query = """
    SELECT DISTINCT
        block_height,
        block_height / {{epoch_length}} as epoch,
        addresses,
        regrets
    FROM 
        naive_inferer_network_regret
    WHERE 
        topic_id = {{topic_id}}
        AND block_height > {{lookback_height}}
    ORDER BY 
        block_height DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
naive_regret_df = pd.read_sql_query(sql_query, engine)

In [954]:
import jinja2
raw_query = """
    SELECT DISTINCT
        block_height,
        block_height / {{epoch_length}} as epoch,
        addresses,
        regrets
    FROM 
        inferer_network_regret
    WHERE 
        topic_id = {{topic_id}}
        AND block_height > {{lookback_height}}
    ORDER BY 
        block_height DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
nw_inf_regret_df = pd.read_sql_query(sql_query, engine)

In [955]:
import jinja2
raw_query = """
    SELECT DISTINCT
        block_height,
        block_height / {{epoch_length}} as epoch,
        addresses,
        regrets
    FROM 
        forecaster_network_regret
    WHERE 
        topic_id = {{topic_id}}
        AND block_height > {{lookback_height}}
    ORDER BY 
        block_height DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
forecast_regret_df = pd.read_sql_query(sql_query, engine)

In [956]:
import jinja2
raw_query = """
    SELECT 
        block_height,
        block_height / {{epoch_length}} as epoch,
        regret
    FROM 
        topic_initial_regret
    WHERE 
        topic_id = {{topic_id}}
        AND block_height > {{lookback_height}}
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
init_regret_df = pd.read_sql_query(sql_query, engine)

In [957]:
import jinja2
raw_query = """
    SELECT 
        CAST(value->>'block_height_target_e_i_last_calculated' AS DOUBLE PRECISION) AS last_height_update,
        CAST(value->>'ecosystem_mint_supply_remaining' AS NUMERIC) / POWER(10, 18) AS ecosystem_mint_supply_remaining,
        (metadata->>'block_height')::int AS block_height,
        CAST(value->>'block_height_target_e_i_next_calculated' AS DOUBLE PRECISION) AS next_height_update,
        CAST(value->>'previous_block_emission' AS DOUBLE PRECISION) / POWER(10, 18) AS previous_block_emission,
        CAST(value->>'previous_reward_emission_per_unit_staked_token' AS DOUBLE PRECISION) AS previous_reward_emission_per_unit_staked_token,
        CAST(value->>'blocks_per_month' AS DOUBLE PRECISION) AS blocks_per_month,
        CAST(value->>'target_emission_rate_per_unit_staked_token' AS DOUBLE PRECISION) AS target_emission_rate_per_unit_staked_token,
        CAST(value->>'network_staked_tokens' AS DOUBLE PRECISION) AS network_staked,
        CAST(value->>'emission_per_unit_staked_token' AS DOUBLE PRECISION) AS emission_per_unit_staked_token,
        CAST(value->>'locked_vesting_tokens_investors_preseed' AS NUMERIC) / POWER(10, 18) AS investors_preseed_locked,
        CAST(value->>'ecosystem_locked' AS NUMERIC) / POWER(10, 18) AS ecosystem_locked,
        CAST(value->>'locked_vesting_tokens_total' AS NUMERIC) / POWER(10, 18) AS total_locked,
        CAST(value->>'locked_vesting_tokens_team' AS NUMERIC) / POWER(10, 18) AS team_locked,
        CAST(value->>'circulating_supply' AS NUMERIC) / POWER(10, 18) AS circulating_supply,
        CAST(value->>'max_supply' AS NUMERIC) / POWER(10, 18) AS max_supply
    
    
    
        
    FROM 
        query_results
    WHERE 
        query_type = 'emission_info'
        AND (metadata->>'block_height')::int = (
            SELECT MAX((metadata->>'block_height')::int)
            FROM query_results
            WHERE query_type = 'emission_info'
        );
"""
sql_query = jinja2.Template(raw_query).render(vars())
current_token_details = pd.read_sql_query(sql_query, engine)

In [958]:
import jinja2
raw_query = """
    SELECT 
        (metadata->>'block_height')::int AS block_height,
        CAST(value->>'ecosystem_mint_supply_remaining' AS NUMERIC) / POWER(10, 18) AS ecosystem_mint_supply_remaining,
        CAST(value->>'ecosystem_balance' AS NUMERIC) / POWER(10, 18) AS ecosystem_balance,
        CAST(value->>'network_staked_tokens' AS NUMERIC) / POWER(10, 18) AS network_staked,
        CAST(value->>'circulating_supply' AS NUMERIC) / POWER(10, 18) AS circulating_supply,
        CAST(value->>'target_reward_emission_per_unit_staked_token' AS NUMERIC) AS target_reward_emission_per_unit_staked_token,
        CAST(value->>'block_emission' AS NUMERIC)/ POWER(10, 18) AS block_emission,
        CAST(value->>'emission_per_unit_staked_token' AS NUMERIC)/ POWER(10, 18) AS emission_per_unit_staked_token,
        CAST(value->>'previous_block_emission' AS DOUBLE PRECISION)/ POWER(10, 18) AS previous_emission,
        CAST(value->>'previous_reward_emission_per_unit_staked_token' AS DOUBLE PRECISION) AS previous_reward_emission_per_unit_staked_token,
        CAST(value->>'max_supply' AS DOUBLE PRECISION) / POWER(10, 18) AS max_supply    
    FROM 
        query_results
    WHERE 
        query_type = 'emission_info'
        AND (metadata->>'block_height')::int > {{max_height-lookback_blocks}}
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
new_supply_df = pd.read_sql_query(sql_query, engine)

In [959]:
import jinja2
raw_query = """
    SELECT 
        (metadata->>'block_height')::int AS block_height,
        CAST(value->>'locked_vesting_tokens_investors_seed' AS NUMERIC) / POWER(10, 18) AS investors_seed_locked,
        CAST(value->>'locked_vesting_tokens_investors_preseed' AS NUMERIC) / POWER(10, 18) AS investors_preseed_locked,
        CAST(value->>'ecosystem_locked' AS NUMERIC) / POWER(10, 18) AS ecosystem_locked,
        CAST(value->>'locked_vesting_tokens_total' AS NUMERIC) / POWER(10, 18) AS total_locked,
        CAST(value->>'locked_vesting_tokens_team' AS NUMERIC) / POWER(10, 18) AS team_locked
    FROM 
        query_results
    WHERE 
        query_type = 'emission_info'
        AND (metadata->>'block_height')::int > {{max_height-lookback_blocks}}
    ORDER BY 
        block_height ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
locked_df = pd.read_sql_query(sql_query, engine)

In [960]:
import jinja2
raw_query = """
    SELECT 
        epoch AS block_height,
        metric_value as normalized_amount
    FROM 
        research_metrics
    WHERE
        epoch >= {{max_height-lookback_blocks}} AND metric_name='validatorreward'
    ORDER BY 
        epoch ASC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
validator_df = pd.read_sql_query(sql_query, engine)

In [961]:
import jinja2
raw_query = """
    WITH summed_rewards AS (
        SELECT 
            validator,
            SUM(amount::decimal / POWER(10, 18)) as total_rewards
        FROM 
            validator_rewards
        WHERE 
            height_tx >= {{max_height-lookback_blocks}}
        GROUP BY 
            validator
    )
    SELECT 
        validator,
        total_rewards,
        total_rewards / SUM(total_rewards) OVER () as normalized_rewards
    FROM 
        summed_rewards
    ORDER BY 
        total_rewards DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
v_rewards_df = pd.read_sql_query(sql_query, engine)

In [962]:
import jinja2
raw_query = """
    WITH recent_epochs AS (
        SELECT DISTINCT epoch
        FROM research_metrics
        WHERE topic_id = {{topic_id}}
        ORDER BY epoch DESC
        LIMIT {{lookback_epochs}}
    ),
    epoch_blocks AS (
        SELECT epoch, epoch * {{epoch_length}} AS block_height
        FROM recent_epochs
    )
    SELECT 
        eb.epoch,
        eb.block_height,
        bi.block_time
    FROM epoch_blocks eb
    LEFT JOIN block_info bi ON bi.height = eb.block_height
    ORDER BY eb.block_height DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
time_df = pd.read_sql_query(sql_query, engine)

In [963]:
import jinja2
raw_query = """
    SELECT 
        FLOOR(height / {{epoch_length}}) as epoch,
        height as block_height,
        block_time
    FROM block_info
    WHERE height >= {{min_height_to_lookback}}
    ORDER BY height DESC;
"""
sql_query = jinja2.Template(raw_query).render(vars())
new_time_df = pd.read_sql_query(sql_query, engine)

In [964]:
import jinja2
raw_query = """
    WITH recent_epochs AS (
        SELECT DISTINCT epoch
        FROM research_metrics
        WHERE topic_id = {{topic_id}}
        ORDER BY epoch DESC
        LIMIT {{lookback_epochs}}
    )
    SELECT 
        rm.topic_id,
        rm.metric_name,
        rm.epoch,
        rm.address,
        rm.metric_value
    FROM 
        research_metrics rm
    JOIN 
        recent_epochs re
    ON 
        rm.epoch = re.epoch
    WHERE 
        rm.topic_id = {{topic_id}}
    ORDER BY 
        rm.epoch DESC;
    
"""
sql_query = jinja2.Template(raw_query).render(vars())
df = pd.read_sql_query(sql_query, engine)

In [965]:
# First, let's explode the inferer weights into separate rows
def extract_weights(row):
    # No need for json.loads since the data is already a list
    weights = row['inferer_weights']
    return pd.DataFrame(weights)

# Create expanded dataframe with weights
weights_expanded = pd.DataFrame()
for idx, row in weights_df.iterrows():
    df_temp = extract_weights(row)
    df_temp['epoch'] = row['epoch']
    df_temp['block_height'] = row['block_height']
    weights_expanded = pd.concat([weights_expanded, df_temp])

# Convert weight column to numeric
weights_expanded['weight'] = pd.to_numeric(weights_expanded['weight'])

# Create rows for each address-regret pair
regrets_expanded = []
for _, row in nw_inf_regret_df.iterrows():
    # Skip rows where addresses or regrets are None
    if row['addresses'] is not None and row['regrets'] is not None:
        try:
            for addr, regret in zip(row['addresses'], row['regrets']):
                regrets_expanded.append({
                    'epoch': row['epoch'],
                    'addresses': addr,
                    'regrets': regret
                })
        except Exception as e:
            print(f"Error processing row with epoch {row['epoch']}: {e}")
            print("Row data:", row)
            continue

regrets_expanded = pd.DataFrame(regrets_expanded)


# Merge weights and regrets on epoch and address
worker_performance = pd.merge(
    weights_expanded,
    regrets_expanded,
    left_on=['epoch', 'worker'],
    right_on=['epoch', 'addresses'],
    how='inner'
)

# Sort by epoch and weight
worker_performance = worker_performance.sort_values(['epoch', 'weight'], ascending=[True, False])

# Ground truth interpolation

assuming MSE

In [966]:
# Pivot the DataFrame to create separate columns for raw_inference and infererlosses
restructured_df = raw_inf_loss_df.pivot(
    index=['epoch', 'address'],
    columns='metric_name',
    values='metric_value'
).reset_index()

# Rename the columns to be more intuitive
restructured_df = restructured_df.rename(columns={
    'raw_inference': 'inference',
    'infererlosses': 'loss'
})

# Ensure the columns are in a nice order
restructured_df = restructured_df[['epoch', 'address', 'inference', 'loss']]


In [None]:
# Convert values to numeric, handling any conversion errors
restructured_df['inference'] = pd.to_numeric(restructured_df['inference'], errors='coerce')
restructured_df['loss'] = pd.to_numeric(restructured_df['loss'], errors='coerce')

# Drop rows with NaN values
clean_df = restructured_df.dropna(subset=['inference', 'loss'])

# Calculate the plus/minus loss values
clean_df['inference_plus_loss'] = clean_df['inference'] + np.sqrt(clean_df['loss'])
clean_df['inference_minus_loss'] = clean_df['inference'] - np.sqrt(clean_df['loss'] )

# Identify ground truth per epoch
ground_truths = []
for epoch, group in clean_df.groupby('epoch'):
    all_values = np.concatenate([
        group['inference_plus_loss'].round(5),
        group['inference_minus_loss'].round(5)
    ])
    unique, counts = np.unique(all_values, return_counts=True)
    ground_truth = unique[np.argmax(counts)]
    ground_truths.append((epoch, ground_truth))

# Create DataFrame of ground truths
ground_truths_df = pd.DataFrame(ground_truths, columns=['epoch', 'ground_truth'])
ground_truths_df['ground_truth'] = -ground_truths_df['ground_truth']

# Print some info about the data cleaning
print(f"Original number of rows: {len(restructured_df)}")
print(f"Number of rows after dropping NaN: {len(clean_df)}")
print(f"Number of epochs with ground truth: {len(ground_truths_df)}")
# plot the ground truths
fig = go.Figure()
fig.add_trace(go.Scatter(x=ground_truths_df['epoch'], y=ground_truths_df['ground_truth'], mode='lines+markers', name='Ground Truth'))
fig.show()


# Metrics

## Forecaster Health

In [None]:
# Filter the DataFrame for the forecast_health metric
forecast_health_df = df[df['metric_name'] == 'forecasthealth'].copy()

# Sort by 'epoch' to ensure correct EMA calculation
forecast_health_df = forecast_health_df.sort_values(by='epoch')

# Compute the EMA (Exponential Moving Average)
alpha = 0.1  # Smoothing factor for the EMA
forecast_health_df['ema'] = forecast_health_df['metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
forecast_health_color = '#440154'  # Bright purple from Viridis
ema_color = 'red'  # Red color for EMA line

# Add original forecast health metric line
fig.add_trace(go.Scatter(
    x=forecast_health_df['epoch'],
    y=forecast_health_df['metric_value'],
    mode='lines+markers',
    name='Forecast Health',
    line=dict(color=forecast_health_color),
    marker=dict(color=forecast_health_color),
    hovertemplate='Forecast Health: %{y:.2f}<extra></extra>'
))

# Add EMA line
fig.add_trace(go.Scatter(
    x=forecast_health_df['epoch'],
    y=forecast_health_df['ema'],
    mode='lines',
    name=f'EMA',
    line=dict(color=ema_color),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title=f"Forecast Health Metric for Topic {forecast_health_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Forecast Health Value",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


- We are plotting the raw score of the entire forecasting task $$T_i$$
    - Equation 42 of the white paper
- A **large/postive** value is **healthy** and indicates that forecasters are helping topic {{topic_id}}! 😁✅
- A **small/negative** value is **unhealthy** and indicates that forecasters aren't really helping topic {{topic_id}}! 😢😷

In [None]:
# Create scatter plot
fig = go.Figure()

# Add small constant to weights to avoid log(0)
epsilon = 1e-10
log_weights = np.log10(worker_performance['weight'] + epsilon)

# Add scatter trace
fig.add_trace(
    go.Scatter(
        x=worker_performance['regrets'],
        y=log_weights,
        mode='markers',
        marker=dict(
            size=8,
            opacity=0.6,
            color=worker_performance['epoch'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title='Epoch')
        ),
        hovertemplate=
        '<b>Address:</b> %{text}<br>' +
        '<b>Regret:</b> %{x:.4f}<br>' +
        '<b>Log Weight:</b> %{y:.4f}<br>' +
        '<b>Epoch:</b> %{marker.color}<br>' +
        '<extra></extra>',
        text=worker_performance['worker']
    )
)

# Update layout
fig.update_layout(
    title='Inferer Regret vs Log Weight',
    xaxis_title='Combined Regret',
    yaxis_title='Log(Weight)',
    template='plotly_white',
    height=600,
    width=800
)

# Show plot
fig.show()

# # Print some statistics about the weights
# print("\nWeight statistics:")
# print(f"Min weight: {combined_performance['weight'].min()}")
# print(f"Max weight: {combined_performance['weight'].max()}")
# print(f"Number of zero weights: {(combined_performance['weight'] == 0).sum()}")

### query to grab from the research table

In [970]:
# # Convert timestamps to datetime UTC
# gt_df['timestamp'] = pd.to_datetime(gt_df['timestamp'])

# # Merge using nearest timestamp matching
# ground_truths_df = pd.merge_asof(
#     gt_df.sort_values('timestamp'),
#     new_time_df[['block_time', 'epoch']].sort_values('block_time'),
#     left_on='timestamp',
#     right_on='block_time',
#     direction='nearest'
# )

# # Drop duplicates keeping first occurrence of each epoch
# ground_truths_df = ground_truths_df.drop_duplicates(subset=['epoch'], keep='first')
# ground_truths_df

In [None]:
# Create the figure
fig = go.Figure()

# Add ground truth trace
fig.add_trace(
    go.Scatter(
        x=ground_truths_df['epoch'],
        y=ground_truths_df['ground_truth'],
        mode='lines+markers',
        name='Ground Truth',
        line=dict(color='#3b528b'),
        marker=dict(size=4),
        hovertemplate='Ground Truth: %{y}<extra></extra>'
    )
)

# Add naive value trace
fig.add_trace(
    go.Scatter(
        x=nw_inference_df['epoch'],
        y=nw_inference_df['naive_value'],
        mode='lines+markers',
        name='Naive Value',
        marker=dict(size=4),
        hovertemplate='Naive Value: %{y}<extra></extra>'
    )
)

# Add combined value trace
fig.add_trace(
    go.Scatter(
        x=nw_inference_df['epoch'],
        y=nw_inference_df['combined_value'],
        mode='lines+markers',
        name='Combined Value',
        marker=dict(size=4),
        hovertemplate='Combined Value: %{y}<extra></extra>'
    )
)

# Add outlier resistant values
fig.add_trace(
    go.Scatter(
        x=nw_inference_outlier_res_df['epoch'],
        y=nw_inference_outlier_res_df['combined_value'],
        mode='lines+markers',
        name='Outlier Resistant Combined Value',
        marker=dict(size=4),
        hovertemplate='Outlier Resistant Combined Value: %{y}<extra></extra>'
    )
)
fig.add_trace(
    go.Scatter(
        x=nw_inference_outlier_res_df['epoch'],
        y=nw_inference_outlier_res_df['naive_value'],
        mode='lines+markers',
        name='Outlier Resistant Naive Value',
        marker=dict(size=4),
        hovertemplate='Outlier Resistant Naive Value: %{y}<extra></extra>'
    )
)



# Update layout
fig.update_layout(
    title='Network Inferences and Ground Truth by Epoch',
    xaxis_title='Epoch',
    yaxis_title='Value',
    height=500,
    width=800,
    template='plotly_white',
    hovermode='x unified',
    spikedistance=-1,
    showlegend=True,
    legend=dict(
        yanchor='top',
        y=1,
        xanchor='left',
        x=1.05
    ),
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    hoverlabel=dict(
        bgcolor='rgba(255, 255, 255, 0.7)',
        bordercolor='rgba(0, 0, 0, 0)',
        font_size=12,
        font_family='Arial',
        namelength=-1
    )
)

fig.show()

In [None]:
flattened_inferences = []
for idx, row in nw_inference_df.iterrows():
    epoch = row['epoch']
    inferer_values = row['inferer_values'] if isinstance(row['inferer_values'], list) else json.loads(row['inferer_values'])
    for inferer in inferer_values:
        flattened_inferences.append({
            'epoch': epoch,
            'value': float(inferer['value']),
            'worker': inferer['worker']
        })
        
flattened_df = pd.DataFrame(flattened_inferences)

fig = go.Figure()

# Add all raw inferences (unchanged)
fig.add_trace(
    go.Scatter(
        x=flattened_df['epoch'],
        y=flattened_df['value'],
        mode='markers',
        marker=dict(
            size=3,
            color='gray',
            opacity=0.2
        ),
        name='Worker Inferences',
        hoverinfo='skip',
    )
)

# Add ground truth trace shifted back by one epoch
fig.add_trace(
    go.Scatter(
        x=ground_truths_df['epoch'] - 1,  # Shift epoch back by 1
        y=ground_truths_df['ground_truth'],
        mode='lines+markers',
        name='Ground Truth',
        line=dict(color='#3b528b'),
        marker=dict(size=4),
        hovertemplate='Ground Truth: %{y}<extra></extra>'
    )
)

# Add naive value trace (unchanged)
fig.add_trace(
    go.Scatter(
        x=nw_inference_df['epoch'],
        y=nw_inference_df['naive_value'],
        mode='lines+markers',
        name='Naive Value',
        marker=dict(size=4),
        hovertemplate='Naive Value: %{y}<extra></extra>'
    )
)

# Add combined value trace (unchanged)
fig.add_trace(
    go.Scatter(
        x=nw_inference_df['epoch'],
        y=nw_inference_df['combined_value'],
        mode='lines+markers',
        name='Combined Value',
        marker=dict(size=4),
        hovertemplate='Combined Value: %{y}<extra></extra>'
    )
)

# Add outlier resistant values
fig.add_trace(
    go.Scatter(
        x=nw_inference_outlier_res_df['epoch'],
        y=nw_inference_outlier_res_df['combined_value'],
        mode='lines+markers',
        name='Outlier Resistant Combined Value',
        marker=dict(size=4),
        hovertemplate='Outlier Resistant Combined Value: %{y}<extra></extra>'
    )
)
fig.add_trace(
    go.Scatter(
        x=nw_inference_outlier_res_df['epoch'],
        y=nw_inference_outlier_res_df['naive_value'],
        mode='lines+markers',
        name='Outlier Resistant Naive Value',
        marker=dict(size=4),
        hovertemplate='Outlier Resistant Naive Value: %{y}<extra></extra>'
    )
)


# Rest of the layout settings remain the same
fig.update_layout(
    title='Network Inferences and Ground Truth by Epoch',
    xaxis_title='Epoch',
    yaxis_title='Value',
    height=500,
    width=800,
    template='plotly_white',
    hovermode='x unified',
    spikedistance=-1,
    showlegend=True,
    legend=dict(
        yanchor='top',
        y=1,
        xanchor='left',
        x=1.05
    ),
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    hoverlabel=dict(
        bgcolor='rgba(255, 255, 255, 0.7)',
        bordercolor='rgba(0, 0, 0, 0)',
        font_size=12,
        font_family='Arial',
        namelength=-1
    )
)

fig.show()

In [973]:
# merged_df = pd.merge(
#     ground_truths_df,
#     nw_inference_df[['epoch', 'naive_value', 'combined_value']],
#     on='epoch',
#     how='inner'
# )

# # Create the scatter plot
# fig = go.Figure()

# # Add scatter points for naive value vs ground truth
# fig.add_trace(go.Scatter(
#     x=merged_df['ground_truth'], 
#     y=merged_df['naive_value'], 
#     mode='markers',
#     marker=dict(size=8, color='#21918c'),
#     name='Naive Value',
#     text=merged_df['epoch'],
#     hovertemplate='<b>Naive Value:</b> %{y}<br><b>Ground Truth:</b> %{x}<br><b>Epoch:</b> %{text}<extra></extra>'
# ))

# # Add scatter points for combined value vs ground truth
# fig.add_trace(go.Scatter(
#     x=merged_df['ground_truth'], 
#     y=merged_df['combined_value'], 
#     mode='markers',
#     marker=dict(size=8, color='#3b528b'),
#     name='Combined Value',
#     text=merged_df['epoch'],
#     hovertemplate='<b>Combined Value:</b> %{y}<br><b>Ground Truth:</b> %{x}<br><b>Epoch:</b> %{text}<extra></extra>'
# ))

# # Calculate min and max values from merged data
# min_value = min(
#     merged_df['ground_truth'].min(),
#     merged_df['naive_value'].min(),
#     merged_df['combined_value'].min()
# )
# max_value = max(
#     merged_df['ground_truth'].max(),
#     merged_df['naive_value'].max(),
#     merged_df['combined_value'].max()
# )

# # Add padding
# padding = (max_value - min_value) * 0.05
# min_value_with_padding = min_value - padding
# max_value_with_padding = max_value + padding

# # Add y=x reference line
# fig.add_trace(go.Scatter(
#     x=[min_value_with_padding, max_value_with_padding],
#     y=[min_value_with_padding, max_value_with_padding],
#     mode='lines',
#     name='y=x',
#     line=dict(color='red', dash='dash')
# ))

# # Update layout
# fig.update_layout(
#     xaxis_title='Ground Truth',
#     yaxis_title='Network Inference',
#     title='Network Inferences vs Ground Truth',
#     title_x=0.5,
#     height=800,
#     width=1000,
#     template="plotly_white",
#     hovermode="closest",
#     spikedistance=-1,
#     xaxis=dict(
#         showspikes=True,
#         spikemode='across',
#         spikesnap='cursor',
#         spikethickness=1,
#         showline=True,
#         showgrid=True,
#         spikedash="dot",
#         spikecolor="grey",
#         range=[min_value_with_padding, max_value_with_padding],
#         scaleanchor='y',
#         constrain='domain',
#     ),
#     yaxis=dict(
#         showspikes=True,
#         spikemode='across',
#         spikesnap='cursor',
#         spikethickness=1,
#         showline=True,
#         showgrid=True,
#         spikedash="dot",
#         spikecolor="grey",
#         range=[min_value_with_padding, max_value_with_padding],
#         scaleratio=1
#     ),
#     hoverlabel=dict(
#         bgcolor="rgba(255, 255, 255, 0.7)",
#         bordercolor="rgba(0, 0, 0, 0)",
#         font=dict(
#             color='black',
#             size=12,
#             family="Arial"
#         )
#     ),
#     showlegend=True,
#     legend=dict(
#         yanchor="top",
#         y=0.99,
#         xanchor="left",
#         x=1.05
#     )
# )

# fig.show()

In [None]:
# Get the percentiles from the first row - already a list
percentiles = ci_df['confidence_interval_percentiles'].iloc[0]
percentile_labels = [f'{p}th percentile' for p in percentiles]

# Create new figure for confidence intervals
fig_ci = go.Figure()

# The values are already lists, no need for conversion
ci_values = ci_df['confidence_interval_values']

# Check lengths
expected_length = len(percentiles)
for idx, values in enumerate(ci_values):
    if len(values) != expected_length:
        print(f"Row {idx} has {len(values)} values, expected {expected_length}")

# Define colors
colors = ['red', 'orange', 'green', 'blue', 'purple']

# Plot individual inferer values as blue X's
for idx, row in ci_df.iterrows():
    # inferer_values is already a list of dicts
    values = [float(val['value']) for val in row['inferer_values']]
    fig_ci.add_trace(
        go.Scatter(
            x=[row['epoch']] * len(values),
            y=values,
            mode='markers',
            name='Individual Inferences' if idx == 0 else None,
            marker=dict(symbol='x', size=8, color='blue', opacity=0.6),
            showlegend=(idx == 0)
        )
    )

# Plot each confidence interval value with larger markers
for i in range(expected_length):
    fig_ci.add_trace(
        go.Scatter(
            x=ci_df['epoch'],
            y=[values[i] for values in ci_values],
            mode='markers',
            name=percentile_labels[i],
            marker=dict(color=colors[i], size=6)
        )
    )

# Plot network inference as green plus
fig_ci.add_trace(
    go.Scatter(
        x=ci_df['epoch'],
        y=ci_df['combined_value'],
        mode='markers',
        name='Network Inference',
        marker=dict(symbol='cross', size=10, color='green', opacity=1)
    )
)

# Update layout
fig_ci.update_layout(
    title='Inferences and Percentiles per Epoch',
    xaxis_title='Epoch',
    yaxis_title='Values',
    showlegend=True,
    legend=dict(yanchor="top", y=1, xanchor="left", x=1.05),
    template='plotly_white',
    height=800,
    width=1000
)

# Add grid
fig_ci.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.2)')

# Show plot
fig_ci.show()

# Inferer Health

- We plot
    - $$\log\left(\frac{\text{MDM}}{\text{MAE}}\right)$$
    - where $$\text{MDM}$$ is the mean absolute distance to the mean inference and $$\text{MAE}$$ is the mean absolute error in the network combined inference
- A **large/positive** value is **healthy** and corresponds to the active set consisting of heterogenous inferers 😁✅
- A **small/negative** value is **unhealthy** and corresponds to the active set consisting of homogeneous inferers  😢😷

In [None]:
# Filter the DataFrame for the inferer_health metric
inferer_health_df = df[df['metric_name'] == 'infererhealth']
inferer_health_df = inferer_health_df.sort_values(by='epoch')

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = inferer_health_df['metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the Inferer Health line
forecast_health_color = '#fde725'  # Bright yellow from Viridis

# Plot Inferer Health line
fig.add_trace(go.Scatter(
    x=inferer_health_df['epoch'],
    y=inferer_health_df['metric_value'],
    mode='lines+markers',
    name='Inferer Health',
    line=dict(color=forecast_health_color),
    marker=dict(color=forecast_health_color),
    hovertemplate='Inferer Health: %{y:.2f}<extra></extra>'
))

# Add EMA line with red color
fig.add_trace(go.Scatter(
    x=inferer_health_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title=f"Inferer Health Metric for Topic {inferer_health_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Inferer Health Value",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


In [None]:
def symlog_transform(value, linthresh=1e-2, linscale=1):
    """ Approximate symlog transform function.
        - linthresh: The threshold where we switch from linear to log scale
        - linscale: Controls how steep the linear region should be
    """
    sign = np.sign(value)
    abs_value = np.abs(value)
    
    # Handle zeros explicitly to avoid divide by zero
    log_region = np.where(abs_value > 0, np.log10(abs_value), 0)
    
    # Linear region scaling for small values
    linear_region = linscale * abs_value

    # Use the log scaling when abs_value > linthresh
    return sign * np.where(abs_value > linthresh, log_region, linear_region)


# Filter data for raw_forecasts and inferer_losses
forecasts = raw_fore_inf_df[raw_fore_inf_df['metric_name'] == 'rawforecasts']
losses = raw_fore_inf_df[raw_fore_inf_df['metric_name'] == 'infererlosses']

# Group by epoch and address
grouped = forecasts.groupby(['epoch', 'address'])['metric_value'].mean().reset_index()
grouped = grouped.merge(losses.groupby(['epoch', 'address'])['metric_value'].mean().reset_index(), 
                        on=['epoch', 'address'], suffixes=('_forecast', '_loss'))

# Apply symlog transformation to both the forecast and loss data
linthresh = 0.25  # Threshold for linear region
grouped['symlog_forecast'] = symlog_transform(grouped['metric_value_forecast'], linthresh=linthresh)
grouped['symlog_loss'] = symlog_transform(grouped['metric_value_loss'], linthresh=linthresh)

# Create the scatter plot with symlog-transformed data
fig = go.Figure()

# Add scatter points for forecasts vs losses
teal_color = '#21918c'  # Teal color from Viridis

fig.add_trace(go.Scatter(
    x=grouped['symlog_loss'], 
    y=grouped['symlog_forecast'], 
    mode='markers',
    marker=dict(size=8, color=teal_color),  # Set all points to teal color
    name='',
    text=grouped['epoch'],  # Add epoch as text for hover
    hovertemplate='<b>Symlog Loss:</b> %{x}<br><b>Symlog Forecast:</b> %{y}<br><b>Epoch:</b> %{text}<extra></extra>'  # Custom hover template
))

# Calculate min and max values for symlog-transformed data
min_value = min(grouped['symlog_loss'].min(), grouped['symlog_forecast'].min())
max_value = max(grouped['symlog_loss'].max(), grouped['symlog_forecast'].max())

# Expand the range slightly to prevent cutoff
padding = (max_value - min_value) * 0.05  # 5% padding
min_value_with_padding = min_value - padding
max_value_with_padding = max_value + padding

# Add y=x reference line (from min to max in symlog space)
fig.add_trace(go.Scatter(
    x=[min_value_with_padding, max_value_with_padding],
    y=[min_value_with_padding, max_value_with_padding],
    mode='lines',
    name='y=x',
    line=dict(color='red', dash='dash')
))

# Update layout with symlog scales and hoverlabel styling
fig.update_layout(
    xaxis_title='Observed Losses (Symlog)',
    yaxis_title='Forecasted Losses (Symlog)',
    title='Forecasters (All data, Symlog Scale)',
    title_x=0.5,
    height=800,
    width=1000,
    template="plotly_white",
    hovermode="closest",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey",
        range=[min_value_with_padding, max_value_with_padding],  # Apply padded range for the x-axis
        scaleanchor='y',  # Link the scale of the x-axis to the y-axis
        constrain='domain',  # Ensure the x-axis is constrained to this range
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey",
        range=[min_value_with_padding, max_value_with_padding],  # Apply padded range for the y-axis
        scaleratio=1  # Ensure that the ticks on the y-axis are consistent with the x-axis
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",  # Semi-transparent white hover box background
        bordercolor="rgba(0, 0, 0, 0)",      # Remove the border
        font=dict(
            color='black',  # Set font color to black
            size=12,
            family="Arial"
        )
    ),
    shapes=[
        # Vertical line at +linthresh (x-axis)
        dict(
            type="line",
            x0=linthresh,
            y0=min_value_with_padding,
            x1=linthresh,
            y1=max_value_with_padding,
            line=dict(color="grey", dash="dot"),
            opacity=0.5  # Set opacity for the shape here
        ),
        # Vertical line at -linthresh (x-axis)
        dict(
            type="line",
            x0=-linthresh,
            y0=min_value_with_padding,
            x1=-linthresh,
            y1=max_value_with_padding,
            line=dict(color="grey", dash="dot"),
            opacity=0.5  # Set opacity for the shape here
        ),
        # Horizontal line at +linthresh (y-axis)
        dict(
            type="line",
            x0=min_value_with_padding,
            y0=linthresh,
            x1=max_value_with_padding,
            y1=linthresh,
            line=dict(color="grey", dash="dot"),
            opacity=0.5
        ),
        # Horizontal line at -linthresh (y-axis)
        dict(
            type="line",
            x0=min_value_with_padding,
            y0=-linthresh,
            x1=max_value_with_padding,
            y1=-linthresh,
            line=dict(color="grey", dash="dot"),
            opacity=0.5
        )
    ],
    showlegend=False  # Disable the legend
)

# Show the plot
fig.show()


In [None]:
# First, process the weights and forecaster regrets
regrets_expanded = []
for _, row in forecast_regret_df.iterrows():
    # Skip rows where addresses or regrets are None
    if row['addresses'] is not None and row['regrets'] is not None:
        try:
            for addr, regret in zip(row['addresses'], row['regrets']):
                regrets_expanded.append({
                    'epoch': row['epoch'],
                    'addresses': addr,
                    'regrets': regret
                })
        except Exception as e:
            print(f"Error processing row with epoch {row['epoch']}: {e}")
            print("Row data:", row)
            continue

forecast_regrets_expanded = pd.DataFrame(regrets_expanded)

# Get forecaster weights from weights_df
def extract_forecaster_weights(row):
    # No need for json.loads since the data is already a list
    weights = row['forecaster_weights']
    return pd.DataFrame(weights)

# Create expanded dataframe with forecaster weights
forecaster_weights_expanded = pd.DataFrame()
for idx, row in weights_df.iterrows():
    df_temp = extract_forecaster_weights(row)
    df_temp['epoch'] = row['epoch']
    df_temp['block_height'] = row['block_height']
    forecaster_weights_expanded = pd.concat([forecaster_weights_expanded, df_temp])

# Convert weight column to numeric
forecaster_weights_expanded['weight'] = pd.to_numeric(forecaster_weights_expanded['weight'])

# Merge weights with forecaster regrets
forecaster_performance = pd.merge(
    forecaster_weights_expanded,
    forecast_regrets_expanded,
    left_on=['epoch', 'worker'],
    right_on=['epoch', 'addresses'],
    how='inner'
)

# Sort by epoch and weight
forecaster_performance = forecaster_performance.sort_values(['epoch', 'weight'], ascending=[True, False])

# Create scatter plot
fig = go.Figure()

# Add small constant to weights to avoid log(0)
epsilon = 1e-10

# Add scatter trace
fig.add_trace(
    go.Scatter(
        x=forecaster_performance['regrets'],
        y=np.log10(forecaster_performance['weight'] + epsilon),
        mode='markers',
        marker=dict(
            size=8,
            opacity=0.6,
            color=forecaster_performance['epoch'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title='Epoch')
        ),
        hovertemplate=
        '<b>Address:</b> %{text}<br>' +
        '<b>Regret:</b> %{x:.4f}<br>' +
        '<b>Log Weight:</b> %{y:.4f}<br>' +
        '<b>Epoch:</b> %{marker.color}<br>' +
        '<extra></extra>',
        text=forecaster_performance['worker']
    )
)

# Update layout
fig.update_layout(
    title='Forecaster Regret vs Log Weight',
    xaxis_title='Forecaster Regret',
    yaxis_title='Log10(Weight + ε)',
    template='plotly_white',
    height=600,
    width=800
)

# Show plot
fig.show()

## Reputers

In [None]:
# First, expand the DataFrame
rows = []
for _, row in listening_df.iterrows():
    addresses = row['addresses']
    coefficients = row['coefficients']
    for addr, coef in zip(addresses, coefficients):
        rows.append({
            'block_height': row['block_height'],
            'epoch': row['epoch'],
            'address': addr,
            'coefficient': float(coef)
        })

df_expanded = pd.DataFrame(rows)

# Now create the Plotly figure
fig = go.Figure()

# Add traces for each address
for address in df_expanded['address'].unique():
    mask = df_expanded['address'] == address
    fig.add_trace(
        go.Scatter(
            x=df_expanded[mask]['epoch'],
            y=df_expanded[mask]['coefficient'],
            mode='lines+markers',
            name=address[:10] + '...',
            marker=dict(size=4),
        )
    )

# Update layout
fig.update_layout(
    title='Listening Coefficients by Epoch',
    xaxis_title='Epoch',
    yaxis_title='Coefficient Value',
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.05
    ),
    template='plotly_white'
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.2)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.2)')

fig.show()

In [None]:
reputer_rewards = raw_rewards_df[raw_rewards_df['metric_name'] == 'rawreputerreward']
# Filter data for reputer rewards and create a copy
reputer_rewards = raw_rewards_df[raw_rewards_df['metric_name'] == 'rawreputerreward'].copy()

# Divide rewards by 10^18 using loc
reputer_rewards.loc[:, 'metric_value'] = reputer_rewards['metric_value'] / 10**18

# Create the plot
fig = px.scatter(
    reputer_rewards,
    x='epoch',
    y='metric_value',
    color='address',
    title='Individual Reputer Rewards Over Time',
    labels={'metric_value': 'Reward Value', 'epoch': 'Epoch'},
    template='plotly_white'
)

# Update layout
fig.update_layout(
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey',
        dtick=200
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    hovermode='closest',
    legend_title_text='Reputer Address',
    legend=dict(
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.05
    )
)

# Update hover template to show truncated addresses
fig.update_traces(
    hovertemplate="<br>".join([
        "Epoch: %{x}",
        "Reward: %{y:.6f}",
        "Address: %{customdata}",
        "<extra></extra>"
    ]),
    customdata=reputer_rewards['address'].apply(lambda x: x[:10] + '...')
)

# Show the plot
fig.show()

In [None]:
import plotly.express as px

# Filter data for reputer scores
reputer_scores = raw_score_df[raw_score_df['metric_name'] == 'rawreputerscore'].copy()

# Create the plot
fig = px.scatter(
    reputer_scores,
    x='epoch',
    y='metric_value',
    color='address',
    title='Individual Reputer Scores Over Time',
    labels={
        'metric_value': 'Score Value', 
        'epoch': 'Epoch'
    },
    template='plotly_white'
)

# Update layout
fig.update_layout(
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey',
        dtick=200
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey',
        type='log'  # Use log scale for y-axis
    ),
    hovermode='closest',
    legend_title_text='Reputer Address',
    legend=dict(
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.05
    )
)

# Update hover template to show truncated addresses
fig.update_traces(
    hovertemplate="<br>".join([
        "Epoch: %{x}",
        "Score: %{y:.6f}",
        "Address: %{customdata}",
        "<extra></extra>"
    ]),
    customdata=reputer_scores['address'].apply(lambda x: x[:10] + '...')
)

# Show the plot
fig.show()

In [None]:
# First, let's extract and flatten the stakes data
flattened_stakes = []
for idx, row in stakes_df.iterrows():
    epoch = row['epoch']
    stakes = json.loads(row['stakes_array']) if isinstance(row['stakes_array'], str) else row['stakes_array']
    for stake_info in stakes:
        flattened_stakes.append({
            'epoch': epoch,
            'stake': float(stake_info['stake']) / 1e18,  # Divide by 10^18
            'address': stake_info['address']
        })

flat_stakes_df = pd.DataFrame(flattened_stakes)

# Take log10 of stakes
flat_stakes_df['log_stake'] = np.log10(flat_stakes_df['stake'])

# Create the plot
fig = go.Figure()

# Add a trace for each unique address
for address in flat_stakes_df['address'].unique():
    address_data = flat_stakes_df[flat_stakes_df['address'] == address]
    fig.add_trace(
        go.Scatter(
            x=address_data['epoch'],
            y=address_data['log_stake'],
            mode='lines+markers',
            name=address[:10] + '...',  # Truncate address for legend
            hovertemplate=(
                f'<b>Address:</b> {address}<br>' +
                '<b>Epoch:</b> %{x}<br>' +
                '<b>log₁₀(stake):</b> %{y:.4f}<br>' +
                '<b>Stake:</b> %{customdata:.4f}<extra></extra>'
            ),
            customdata=address_data['stake']  # Original stake values for hover
        )
    )

# Update layout
fig.update_layout(
    title='Log of Reputer Stakes Over Time',
    xaxis_title='Epoch',
    yaxis_title='log₁₀(Stake)',
    height=600,
    width=1000,
    template='plotly_white',
    hovermode='x unified',
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey'
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font=dict(
            size=12,
            family="Arial"
        )
    )
)

fig.show()

In [None]:
import pandas as pd

# Ensure all epochs are included
all_epochs = pd.DataFrame({'epoch': range(reputer_payload_df['epoch'].min(), reputer_payload_df['epoch'].max() + 1)})

# Merge the epochs with the payload data, filling missing epochs with payload_count = 0
reputer_payload_df_full = pd.merge(all_epochs, reputer_payload_df[['epoch', 'payload_count']], on='epoch', how='left').fillna({'payload_count': 0})

# Create plot
fig = go.Figure()

# Add number of payloads trace
fig.add_trace(
    go.Scatter(
        x=reputer_payload_df_full['epoch'],
        y=reputer_payload_df_full['payload_count'],
        mode='lines+markers',
        name='Number of Payloads',
        marker=dict(size=6),
        line=dict(width=2),
        hovertemplate='<b>Epoch:</b> %{x}<br><b>Number of Payloads:</b> %{y}<extra></extra>'
    )
)

# Update layout
fig.update_layout(
    title='Number of Payloads Over Time',
    xaxis_title='Epoch',
    yaxis_title='Number of Payloads',
    height=600,
    width=1000,
    template='plotly_white',
    hovermode='x unified',
    showlegend=True,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        rangemode='nonnegative'  # Ensures y-axis starts at 0 or above
    )
)

fig.show()


## Tokenomics

In [None]:
current_token_details

In [984]:
# actual_current_block_emission = current_token_details['previous_block_emission'].values[0]
# last_height_update = current_token_details['last_height_update'].values[0]
# next_height_update = current_token_details['next_height_update'].values[0]
# current_height = current_token_details['block_height'].values[0]
# ecosystem_remaining = current_token_details['ecosystem_mint_supply_remaining'].values[0]
# blocks_per_month = current_token_details['blocks_per_month'].values[0]


# target_emission_rate_per_unit_staked_token = current_token_details['target_emission_rate_per_unit_staked_token'].values[0]
# network_staked = current_token_details['network_staked'].values[0]
# emission_per_unit_staked_token = current_token_details['emission_per_unit_staked_token'].values[0]

# circulating_supply = current_token_details['circulating_supply'].values[0]
# total_locked = current_token_details['total_locked'].values[0]
# max_supply = current_token_details['max_supply'].values[0]

# months_remaining = ecosystem_remaining /(actual_current_block_emission*blocks_per_month)
# years_remaining= months_remaining /12


In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['previous_emission'],
        mode='lines+markers',
        name='Actual Emission',
        line=dict(color='blue', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)


# Update layout
fig.update_layout(
    title='Actual Emission per Block Values',
    xaxis_title='Block Height',
    yaxis_title='Actual Emission',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

## Sortition

In [None]:
# Filter the DataFrame for the reputer_score metric
reputer_score_df = df[df['metric_name'] == 'reputerscore']
reputer_score_df = reputer_score_df.sort_values(by='epoch')


# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = reputer_score_df['metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the Reputer Health line
reputer_color = '#21918c'  # Teal from Viridis

# Plot Reputer Health line
fig.add_trace(go.Scatter(
    x=reputer_score_df['epoch'],
    y=reputer_score_df['metric_value'],
    mode='lines+markers',
    name='Reputer Health',
    line=dict(color=reputer_color),
    marker=dict(color=reputer_color),
    hovertemplate='Reputer Score: %{y:.2f}<extra></extra>'
))

# Add EMA line with red color and Greek letter alpha in the name
fig.add_trace(go.Scatter(
    x=reputer_score_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',  # HTML code for alpha
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title=f"Reputer Score Metric for Topic {reputer_score_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Metric",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


In [None]:
# Filter the DataFrame for the forecast_health metric
sortition_score_df = df[df['metric_name'] == 'sortitionscore']
sortition_score_df = sortition_score_df.sort_values(by='epoch')

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = sortition_score_df['metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the original line
score_color = '#440154'  # Dark purple from Viridis

# Add original Sortition Score data points
fig.add_trace(go.Scatter(
    x=sortition_score_df['epoch'],
    y=sortition_score_df['metric_value'],
    mode='lines+markers',
    name='Sortition Score',
    line=dict(color=score_color),
    marker=dict(color=score_color),
    hovertemplate='Sortition Score: %{y:.3f}<extra></extra>'
))

# Add EMA line (red) on top of the sortition score values
fig.add_trace(go.Scatter(
    x=sortition_score_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.3f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title=f"Sortition Score Metric for Topic {sortition_score_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Metric",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


## Cycle time estimate

- We plot
    - $$\frac{\text{avg}(Q_{i}^{add}) - \text{avg}(Q_{i}^{rem})}{N_i}$$
    - where $$Q_i$$ is the ema score and $$N_i$$ is the number of replacements in the active set at epoch $$i$$
- A **large/positive** value is **healthy** and corresponds to increasing the quality of the active set😁✅
- A **small/negative** value is **unhealthy** and corresponds to decreasing the quality of the active set😢😷

## Average quality difference

# Merit-based sortition health

The below is only for inferers! (we should add a filter for worker type..)

## Reputer scores metric (WIP)

- We plot
    - $$\log\left(\frac{\text{mean}(\text{scores})}{\text{std}(\text{scores})^2+\epsilon}\right)$$
    - with $$\epsilon =10^{-6}$$
- A **large/positive** value is **healthy** and corresponds to the reputer's having large and similar scores 😁✅
- A **small/negative** value is **unhealthy** and corresponds to the reputer's having small and varying scores 😢😷

- We plot
    - $$\frac{P_i}{N_i+1}$$
    - where $$P_i$$ is the number of participants and $$N_i$$ is the number of replacements in the active set at epoch $$i$$
- This metric estimates the number of epochs it will take to cycle through all participants on topic {{topic_id}}
    - A **small** value is **healthy** means we cycle through the particpants quickly 😁✅
    - A **large** value is **unhealthy** and means it will take several epochs to cycle through all participants 😢😷


In [None]:
#Filter the DataFrame for the sortition_time metric and remove rows where metric_value is 0 or 1
sortition_time_df = df[(df['metric_name'] == 'sortitiontime') & (df['metric_value'] != 0) & (df['metric_value'] != 1)].copy()
sortition_time_df = sortition_time_df.sort_values(by='epoch')

# Apply log transformation to metric_value using .loc to avoid the warning
sortition_time_df.loc[:, 'log_metric_value'] = np.log10(sortition_time_df['metric_value'])

# Calculate EMA on the log-transformed values with alpha = 0.1
alpha = 0.1
ema_values = sortition_time_df['log_metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
sortition_color = '#3b528b'  # Mid blue from Viridis

# Add original log-transformed data points
fig.add_trace(go.Scatter(
    x=sortition_time_df['epoch'],
    y=sortition_time_df['log_metric_value'],  # Use log-transformed values
    mode='lines+markers',
    name='Sortition Time (Log)',
    line=dict(color=sortition_color),
    marker=dict(color=sortition_color),
    hovertemplate='Log Sortition Time: %{y:.2f}<extra></extra>'  # Hover shows log-transformed values
))

# Add EMA line (red) on top of the log-transformed values
fig.add_trace(go.Scatter(
    x=sortition_time_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA (Log)',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA (Log): %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings and linear scale for y-axis
fig.update_layout(
    title=f"Log Sortition Time Metric for Topic {sortition_time_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Log(Metric)",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


## Inferer lifetime estimate

- We plot
    - $$\frac{A_i}{N_i+\epsilon}$$
    - where $$A_i$$ is the number of active inferrers, $$N_i$$ is the number of replacements in the active set at epoch $$i$$, and $$\epsilon=10^{-6}$$
- This metric estimates how many epochs an inferrer typically stays active in the active set on topic {{topic_id}}
    - A **very small** value (close to 1) is **unhealthy** and indicates excessive churn in the active set 😢
    - A **moderate** value is **healthy** as it indicates appropriate turnover in the active set 😁✅
    - A **very large** value is **unhealthy** as it suggests the active set is stagnant with insufficient rotation of participants 😷

In [None]:
# Filter the DataFrame for the lifetime metric and remove rows where metric_value is 0 or 1
life_time_df = df[(df['metric_name'] == 'lifetime')].copy()
life_time_df = life_time_df.sort_values(by='epoch')

# Apply log transformation to metric_value using .loc to avoid the warning
life_time_df.loc[:, 'log_metric_value'] = np.log10(life_time_df['metric_value'])

# Calculate EMA on the log-transformed values with alpha = 0.1
alpha = 0.1
ema_values = life_time_df['log_metric_value'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
lifetime_color = '#21918c'  # Mid blue from Viridis

# Add original log-transformed data points
fig.add_trace(go.Scatter(
    x=life_time_df['epoch'],
    y=life_time_df['log_metric_value'],  # Use log-transformed values
    mode='lines+markers',
    name='Lifetime (Log)',  # Updated name
    line=dict(color=lifetime_color),
    marker=dict(color=lifetime_color),
    hovertemplate='Log Lifetime: %{y:.2f}<extra></extra>'  # Updated hover text
))

# Add EMA line (red) on top of the log-transformed values
fig.add_trace(go.Scatter(
    x=life_time_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA (Log)',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA (Log): %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings and linear scale for y-axis
fig.update_layout(
    title=f"Log Lifetime Metric for Topic {life_time_df['topic_id'].iloc[0]}",  # Updated title
    xaxis_title="Epoch",
    yaxis_title="Log(Lifetime Metric)",  # Updated y-axis label
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

Number of active inferers, should be 32!

In [None]:
# Sort the DataFrame by epoch
num_active_df = num_active_df.sort_values(by='epoch')

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = num_active_df['inferrerlosses_count'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
count_color = '#3b528b'  # Mid blue from Viridis

# Add horizontal line at y=32
fig.add_trace(go.Scatter(
    x=[num_active_df['epoch'].min(), num_active_df['epoch'].max()],
    y=[32, 32],
    mode='lines',
    name='Expected',
    line=dict(color='gray', dash='dot', width=1),
    hovertemplate='Expected: 32<extra></extra>'
))

# Add original data points
fig.add_trace(go.Scatter(
    x=num_active_df['epoch'],
    y=num_active_df['inferrerlosses_count'],
    mode='lines+markers',
    name='Active Inferers',
    line=dict(color=count_color),
    marker=dict(color=count_color),
    hovertemplate='Active Inferers: %{y}<extra></extra>'
))

# Add EMA line
fig.add_trace(go.Scatter(
    x=num_active_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Rest of the layout code remains the same
fig.update_layout(
    title=f"Number of Active Inferers for Topic {num_active_df['topic_id'].iloc[0]}",
    xaxis_title="Epoch",
    yaxis_title="Number of Active Inferers",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
# Example: Plot original metrics with Reputer first, then Inferer, then Forecaster
# Define colors for the scatter points
inferer_color = '#3b528b'  # Mid blue from Viridis (for inferers)
forecaster_color = '#fde725'  # Bright yellow from Viridis (for forecasters)

dataframe_metrics = raw_rewards_df  # Your DataFrame here
# Change the order of the metrics and their labels
original_metrics = ['rawreputerreward', 'rawinfererreward', 'rawforecasterreward']
renamed_metrics = ['Reputer rewards', 'Inferer rewards', 'Forecaster rewards']
dataframe_metrics['metric_value'] = dataframe_metrics['metric_value'] / 10**18
# Divide all rewards by 10^18
# dataframe_metrics['metric_value'] = dataframe_metrics['metric_value']

# Colors from the previous cell (Reputer first, Inferer second, Forecaster third)
colors = [reputer_color, inferer_color, forecaster_color]

# Create subplots for the bottom row (Cumulative rewards)
num_metrics = len(original_metrics)
fig = make_subplots(rows=1, cols=num_metrics, 
                    subplot_titles=[f"Cumulative {renamed_metrics[i]}" for i in range(num_metrics)],
                    horizontal_spacing=0.05,  # Reduce horizontal spacing
                    shared_yaxes=True)  # Keep shared y-axis to maintain consistent ticks

for i, metric_name in enumerate(original_metrics):
    metric_data = dataframe_metrics[dataframe_metrics['metric_name'] == metric_name]
    
    # Check if there is data for this metric
    if metric_data.empty:
        print(f"No data found for {metric_name}, skipping.")
        continue
    
    # Plot cumulative rewards (Bottom row)
    sum_per_epoch = metric_data.groupby('epoch')['metric_value'].sum().reset_index()
    cumulative_data = sum_per_epoch['metric_value'].cumsum()
    
    # Manually log-transform the cumulative data
    cumulative_data_log = np.log10(cumulative_data.replace(0, np.nan))  # Replace 0 with NaN to avoid log issues
    
    fig.add_trace(go.Scatter(
        x=sum_per_epoch['epoch'], 
        y=cumulative_data_log,  # Use log-transformed cumulative data
        mode='lines+markers', 
        line=dict(color=colors[i], width=2),  # Use the previously defined colors and increased line width
        marker=dict(color=colors[i]),
        name=f"Cumulative {renamed_metrics[i]}",
        hovertemplate='%{y:.2f}<extra></extra>'  # Use Plotly's hover template syntax
    ), row=1, col=i+1)

# Update layout with your preferred style
fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=False,  # Turn off the legend
    margin=dict(l=70, r=30, t=30, b=30),  # Increase left margin to allow y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    )
)

# Update x-axes to ensure the same ticks
fig.update_xaxes(
    title_text="Epoch", 
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    dtick=50,  # Set tick interval to every 200 epochs
    matches="x"  # Ensure all plots share the same x-axis
)

# Ensure all bottom plots share the same y-axis ticks but only show labels for the first column
for i in range(1, num_metrics + 1):
    fig.update_yaxes(
        title_text="Log(Cumulative Rewards)" if i == 1 else None,  # Add y-axis label only to the first column
        type="linear",  # Use linear scale since data is manually log-transformed
        title_font=dict(size=12), 
        tickfont=dict(size=12),
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey",
        showticklabels=True,  # Ensure ticks are shown for all subplots
        row=1, col=i
    )

# Show the plot
fig.show()


In [None]:
# Aggregate and calculate average reward per worker type per epoch
average_rewards = dataframe_metrics.groupby(['epoch', 'metric_name'])['metric_value'].mean().reset_index()

# Create subplots for the average rewards
fig = make_subplots(rows=1, cols=num_metrics, 
                    subplot_titles=[f"Average {renamed_metrics[i]} per Worker" for i in range(num_metrics)],
                    horizontal_spacing=0.05,  # Reduce horizontal spacing
                    shared_yaxes=True)  # Keep shared y-axis to maintain consistent ticks

for i, metric_name in enumerate(original_metrics):
    metric_data = average_rewards[average_rewards['metric_name'] == metric_name]
    
    # Check if there is data for this metric
    if metric_data.empty:
        print(f"No data found for {metric_name}, skipping.")
        continue
    
    # Plot average rewards
    fig.add_trace(go.Scatter(
        x=metric_data['epoch'], 
        y=metric_data['metric_value'],  # Average rewards
        mode='lines+markers', 
        line=dict(color=colors[i], width=2),  # Use the previously defined colors and increased line width
        marker=dict(color=colors[i]),
        name=f"Average {renamed_metrics[i]} per Worker",
        hovertemplate='%{y:.2f}<extra></extra>'  # Use Plotly's hover template syntax
    ), row=1, col=i+1)

# Update layout with your preferred style
fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=False,  # Turn off the legend
    margin=dict(l=70, r=30, t=30, b=30),  # Increase left margin to allow y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    )
)

# Update x-axes to ensure the same ticks
fig.update_xaxes(
    title_text="Epoch", 
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    dtick=50,  # Set tick interval to every 200 epochs
    matches="x"  # Ensure all plots share the same x-axis
)

# Ensure all bottom plots share the same y-axis ticks but only show labels for the first column
for i in range(1, num_metrics + 1):
    fig.update_yaxes(
        title_text="Average Rewards" if i == 1 else None,  # Add y-axis label only to the first column
        type="linear",  # Use linear scale for average rewards
        title_font=dict(size=12), 
        tickfont=dict(size=12),
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey",
        showticklabels=True,  # Ensure ticks are shown for all subplots
        row=1, col=i
    )

# Show the plot
fig.show()


In [None]:
# Aggregate and calculate average reward per worker type per epoch
average_rewards = dataframe_metrics.groupby(['epoch', 'metric_name'])['metric_value'].mean().reset_index()

# Create a single plot for all metrics
fig = go.Figure()

for i, metric_name in enumerate(original_metrics):
    metric_data = average_rewards[average_rewards['metric_name'] == metric_name]
    
    # Check if there is data for this metric
    if metric_data.empty:
        print(f"No data found for {metric_name}, skipping.")
        continue
    metric_data['metric_value'] = np.log10(metric_data['metric_value'])
    # Add a line for each metric
    fig.add_trace(go.Scatter(
        x=metric_data['epoch'], 
        y=metric_data['metric_value'],  # Average rewards
        mode='lines+markers', 
        line=dict(color=colors[i], width=2),  # Use the previously defined colors and increased line width
        marker=dict(color=colors[i]),
        name=f"Average {renamed_metrics[i]} per Worker",
        hovertemplate='%{y:.2f}<extra></extra>'  # Use Plotly's hover template syntax
    ))

# Update layout with your preferred style
fig.update_layout(
    title="Average Rewards by Worker Type",
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=True,  # Enable the legend to distinguish metrics
    margin=dict(l=70, r=30, t=30, b=30),  # Increase left margin to allow y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    ),
    xaxis=dict(
        title="Epoch",
        title_font=dict(size=12),
        tickfont=dict(size=12),
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey",
        dtick=50  # Set tick interval to every 200 epochs
    ),
    yaxis=dict(
        title="Average Rewards",
        title_font=dict(size=12),
        tickfont=dict(size=12),
        type="linear",  # Use linear scale for average rewards
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey"
    )
)

# Show the plot
fig.show()


In [None]:
# Aggregate and calculate the count of workers per metric type per epoch
worker_counts = dataframe_metrics.groupby(['epoch', 'metric_name'])['address'].nunique().reset_index()

# Create a single plot for worker counts
fig_counts = go.Figure()

for i, metric_name in enumerate(original_metrics):
    metric_data = worker_counts[worker_counts['metric_name'] == metric_name]
    
    # Check if there is data for this metric
    if metric_data.empty:
        print(f"No data found for {metric_name}, skipping.")
        continue
    
    # Add a line for each metric
    fig_counts.add_trace(go.Scatter(
        x=metric_data['epoch'], 
        y=metric_data['address'],  # Count of unique workers
        mode='lines+markers', 
        line=dict(color=colors[i], width=2),  # Use the previously defined colors and increased line width
        marker=dict(color=colors[i]),
        name=f"Number of {renamed_metrics[i]}",
        hovertemplate='%{y}<extra></extra>'  # Use Plotly's hover template syntax
    ))

# Update layout with your preferred style
fig_counts.update_layout(
    title="Number of Workers per Type by Epoch",
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=True,  # Enable the legend to distinguish metrics
    margin=dict(l=70, r=30, t=30, b=30),  # Increase left margin to allow y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    ),
    xaxis=dict(
        title="Epoch",
        title_font=dict(size=12),
        tickfont=dict(size=12),
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey",
        dtick=50  # Set tick interval to every 200 epochs
    ),
    yaxis=dict(
        title="Number of Workers",
        title_font=dict(size=12),
        tickfont=dict(size=12),
        type="linear",  # Use linear scale for worker counts
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey"
    )
)

# Show the plot
fig_counts.show()


In [None]:

# Create the plot
fig = go.Figure()

# Define blue color for the line
line_color = '#1f77b4'  # Standard plotly blue

# Add the main line
fig.add_trace(go.Scatter(
    x=time_df['block_time'],
    y=time_df['epoch'],
    mode='lines+markers',
    name='Epoch Progress',
    line=dict(color=line_color),
    marker=dict(color=line_color),
    hovertemplate='Epoch: %{y}<br>Time: %{x}<extra></extra>'
))

# Update the layout with matching style settings
fig.update_layout(
    title="Epoch Progress Over Time",
    xaxis_title="Time",
    yaxis_title="Epoch",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
# Create the plot
fig = go.Figure()

# Define Viridis color for the line
line_color = '#440154'  # Bright purple from Viridis

# Add the main line
fig.add_trace(go.Scatter(
    x=time_df['block_time'],
    y=time_df['block_height'],  # Changed from epoch to block_height
    mode='lines+markers',
    name='Block Height Progress',  # Updated name
    line=dict(color=line_color),
    marker=dict(color=line_color),
    hovertemplate='Block Height: %{y}<br>Time: %{x}<extra></extra>'  # Updated hover template
))

# Update the layout with matching style settings
fig.update_layout(
    title="Block Height Progress Over Time",  # Updated title
    xaxis_title="Time",
    yaxis_title="Block Height",  # Updated y-axis label
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()


In [None]:
# Convert block_time to timestamps if not already
time_df['block_time'] = pd.to_datetime(time_df['block_time'])

# Calculate time difference between consecutive blocks in seconds
time_df['time_diff'] = time_df['block_time'].diff().dt.total_seconds()

# Calculate blocks difference (should be constant if consecutive blocks)
time_df['block_diff'] = time_df['block_height'].diff()

# Calculate seconds per block
time_df['seconds_per_block'] = time_df['time_diff'] / time_df['block_diff']

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
line_color = '#440154'  # Bright purple from Viridis

# Add the main line
fig.add_trace(go.Scatter(
    x=time_df['block_time'],
    y=np.log10(time_df['seconds_per_block']),  # Take log10 of the values
    mode='lines+markers',
    name='Seconds per Block (log10)',
    line=dict(color=line_color),
    marker=dict(color=line_color),
    hovertemplate='Seconds per Block: %{text:.2f}<br>Log10: %{y:.2f}<br>Time: %{x}<extra></extra>',
    text=time_df['seconds_per_block']  # Original values for hover text
))

# Calculate and add mean line
mean_seconds = time_df['seconds_per_block'].mean()
fig.add_trace(go.Scatter(
    x=time_df['block_time'],
    y=[np.log10(mean_seconds)] * len(time_df),  # Take log10 of mean
    mode='lines',
    name=f'Mean ({mean_seconds:.2f}s)',
    line=dict(color='red', dash='dash'),
    hovertemplate='Mean: ' + f'{mean_seconds:.2f}s<br>Log10: %{{y:.2f}}<extra></extra>'  # Fixed syntax
))

# Update the layout with matching style settings
fig.update_layout(
    title="Log of Seconds per Block Over Time",
    xaxis_title="Time",
    yaxis_title="Log(Seconds per Block)",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
# Aggregate rewards by worker (address) and epoch
worker_rewards = dataframe_metrics.groupby(['epoch', 'address', 'metric_name'])['metric_value'].mean().reset_index()

# Create subplots for the worker-specific rewards
fig = make_subplots(rows=1, cols=num_metrics, 
                    subplot_titles=[f"Worker-Specific {renamed_metrics[i]}" for i in range(num_metrics)],
                    horizontal_spacing=0.05,  # Reduce horizontal spacing
                    shared_yaxes=True)  # Keep shared y-axis to maintain consistent ticks

for i, metric_name in enumerate(original_metrics):
    metric_data = worker_rewards[worker_rewards['metric_name'] == metric_name]
    
    # Check if there is data for this metric
    if metric_data.empty:
        print(f"No data found for {metric_name}, skipping.")
        continue

    # Loop through each worker and plot their rewards
    for worker in metric_data['address'].unique():
        worker_data = metric_data[metric_data['address'] == worker]
        fig.add_trace(go.Scatter(
            x=worker_data['epoch'], 
            y=worker_data['metric_value'], 
            mode='lines', 
            line=dict(width=1),  # Use thinner lines for multiple workers
            name=f"{renamed_metrics[i]} - {worker}",
            legendgroup=f"{metric_name}",  # Group lines for each metric
            showlegend=False if i > 0 else True,  # Only show legend for the first metric
            hovertemplate='%{y:.2f}<extra></extra>'
        ), row=1, col=i+1)

# Update layout with your preferred style
fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=False,  # Turn off the legend
    margin=dict(l=70, r=30, t=30, b=30),  # Increase left margin to allow y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    )
)

# Update x-axes to ensure the same ticks
fig.update_xaxes(
    title_text="Epoch", 
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    dtick=50,  # Set tick interval to every 200 epochs
    matches="x"  # Ensure all plots share the same x-axis
)

# Ensure all bottom plots share the same y-axis ticks but only show labels for the first column
for i in range(1, num_metrics + 1):
    fig.update_yaxes(
        title_text="Worker-Specific Rewards" if i == 1 else None,  # Add y-axis label only to the first column
        type="linear",  # Use linear scale for worker rewards
        title_font=dict(size=12), 
        tickfont=dict(size=12),
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        spikedash="dot",
        spikecolor="grey",
        showticklabels=True,  # Ensure ticks are shown for all subplots
        row=1, col=i
    )

# Show the plot
fig.show()


In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# Assume the DataFrame raw_rewards_df is already defined
dataframe_metrics = raw_rewards_df.sort_values(by='epoch')

# Divide all rewards by 10^18
dataframe_metrics['metric_value'] = dataframe_metrics['metric_value'] / 10**18

# First sum rewards by worker type and epoch
summed_rewards = dataframe_metrics.groupby(['epoch', 'metric_name'])['metric_value'].sum().reset_index()

# Calculate total rewards per epoch
total_rewards_per_epoch = summed_rewards.groupby('epoch')['metric_value'].sum().reset_index()
total_rewards_per_epoch.rename(columns={'metric_value': 'total_rewards'}, inplace=True)

# Merge total rewards with the summed data
summed_rewards = summed_rewards.merge(total_rewards_per_epoch, on='epoch')

# Calculate the fraction of rewards for each worker type
summed_rewards['fraction'] = summed_rewards['metric_value'] / summed_rewards['total_rewards']

# Define colors for Reputer, Inferer, and Forecaster
colors = {
    'rawreputerreward': '#21918c',  # Teal
    'rawinfererreward': '#3b528b',  # Mid blue
    'rawforecasterreward': '#fde725'  # Bright yellow
}

# Create the plot
fig = go.Figure()

# Plot fractions for each worker type
for metric_name, color in colors.items():
    metric_data = summed_rewards[summed_rewards['metric_name'] == metric_name]
    fig.add_trace(go.Scatter(
        x=metric_data['epoch'],
        y=metric_data['fraction'],
        mode='lines+markers',
        line=dict(color=color, width=2),
        marker=dict(color=color, size=4),
        name=metric_name.replace('raw', '').replace('reward', ''),
        hovertemplate='Epoch: %{x}<br>Fraction: %{y:.3f}<extra></extra>'
    ))

# Add a horizontal line at y=1/3
fig.add_hline(y=1/3, line_dash="dash", line_color="gray", 
              annotation_text="Equal Split (1/3)", 
              annotation_position="right")

# Update layout
fig.update_layout(
    title='Fraction of Rewards per Worker Type',
    xaxis_title='Epoch',
    yaxis_title='Fraction of Total Rewards',
    template='plotly_white',
    hovermode='x unified',
    spikedistance=-1,
    showlegend=True,
    legend=dict(
        yanchor='top',
        y=1,
        xanchor='left',
        x=1.05
    ),
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey',
        dtick=50
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash='dot',
        spikecolor='grey',
        range=[0, 1]  # Set y-axis range from 0 to 1
    ),
    hoverlabel=dict(
        bgcolor='rgba(255, 255, 255, 0.7)',
        bordercolor='rgba(0, 0, 0, 0)',
        font_size=12,
        font_family='Arial'
    )
)

# Show the plot
fig.show()

In [None]:
# import numpy as np
# import pandas as pd
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# Filter data for raw scores
# Filter and sort by 'epoch' for reputer scores
reputer_scores = raw_score_df[raw_score_df['metric_name'] == 'rawreputerscore'].copy()
reputer_scores = reputer_scores.sort_values(by='epoch')

# Filter and sort by 'epoch' for inferer scores
inferer_scores = raw_score_df[raw_score_df['metric_name'] == 'rawinfererscore'].copy()
inferer_scores = inferer_scores.sort_values(by='epoch')

# Filter and sort by 'epoch' for forecaster scores
forecaster_scores = raw_score_df[raw_score_df['metric_name'] == 'rawforecasterscore'].copy()
forecaster_scores = forecaster_scores.sort_values(by='epoch')


# Calculate average of scores for each category
reputer_avg = reputer_scores.groupby('epoch')['metric_value'].mean().reset_index()
inferer_avg = inferer_scores.groupby('epoch')['metric_value'].mean().reset_index()
forecaster_avg = forecaster_scores.groupby('epoch')['metric_value'].mean().reset_index()

# Manually log-transform the reputer data
reputer_avg['log_metric_value'] = np.log10(reputer_avg['metric_value'].replace(0, np.nan))  # Replace 0 with NaN to avoid log issues

# Calculate EMA for each category
alpha = 0.1  # Smoothing factor for EMA
reputer_avg['ema'] = reputer_avg['log_metric_value'].ewm(alpha=alpha).mean()
inferer_avg['ema'] = inferer_avg['metric_value'].ewm(alpha=alpha).mean()
forecaster_avg['ema'] = forecaster_avg['metric_value'].ewm(alpha=alpha).mean()

# Create subplots with independent y-axis for reputers (log-transformed) and shared y-axis for inferers and forecasters
fig = make_subplots(rows=1, cols=3, 
                    subplot_titles=("Average Reputer Score", 
                                    "Average Inferer Score", 
                                    "Average Forecaster Score"),
                    horizontal_spacing=0.05,  # Reduce horizontal spacing
                    shared_yaxes=False)  # Set shared_yaxes=False for separate scales

# Plot reputer average with log-transformed data (First subplot)
fig.add_trace(go.Scatter(
    x=reputer_avg['epoch'], 
    y=reputer_avg['log_metric_value'],  # Use log-transformed values
    mode='lines+markers', 
    line=dict(color=reputer_color, width=2),  # Increased line width
    marker=dict(color=reputer_color),
    name='Average of reputer scores (Log)',
    hovertemplate='Reputer Avg: %{y:.2f}<extra></extra>'
), row=1, col=1)

# Add EMA for reputer scores
fig.add_trace(go.Scatter(
    x=reputer_avg['epoch'],
    y=reputer_avg['ema'],
    mode='lines',
    line=dict(color='red', width=2, dash='solid'),  # Red solid line for EMA
    name='EMA (Reputer)',
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
), row=1, col=1)

# Plot inferer average (Second subplot)
fig.add_trace(go.Scatter(
    x=inferer_avg['epoch'], 
    y=inferer_avg['metric_value'],
    mode='lines+markers', 
    line=dict(color=inferer_color, width=2),  # Increased line width
    marker=dict(color=inferer_color),
    name='Average of inferer scores',
    hovertemplate='Inferer Avg: %{y:.2f}<extra></extra>'
), row=1, col=2)

# Add EMA for inferer scores
fig.add_trace(go.Scatter(
    x=inferer_avg['epoch'],
    y=inferer_avg['ema'],
    mode='lines',
    line=dict(color='red', width=2, dash='solid'),  # Red solid line for EMA
    name='EMA (Inferer)',
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
), row=1, col=2)

# Plot forecaster average (Third subplot)
fig.add_trace(go.Scatter(
    x=forecaster_avg['epoch'], 
    y=forecaster_avg['metric_value'],
    mode='lines+markers', 
    line=dict(color=forecaster_color, width=2),  # Increased line width
    marker=dict(color=forecaster_color),
    name='Average of forecaster scores',
    hovertemplate='Forecaster Avg: %{y:.2f}<extra></extra>'
), row=1, col=3)

# Add EMA for forecaster scores
fig.add_trace(go.Scatter(
    x=forecaster_avg['epoch'],
    y=forecaster_avg['ema'],
    mode='lines',
    line=dict(color='red', width=2, dash='solid'),  # Red solid line for EMA
    name='EMA (Forecaster)',
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
), row=1, col=3)

# Update layout to use independent y-axes for reputers and shared for others
fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    font=dict(size=12),  # Set consistent font size for axis labels and ticks
    showlegend=False,  # Turn off the legend
    margin=dict(l=70, r=30, t=30, b=30),  # Adjust left margin to allow space for y-axis label
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial"
    )
)

# Update x-axes to ensure consistent ticks
fig.update_xaxes(
    title_text="Epoch",
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    matches='x'  # Ensure all x-axes have the same ticks
)

# Y-axis for Reputers (First column, log-transformed data)
fig.update_yaxes(
    title_text="Log(Average Score)",  # Label only for Reputer column
    type="linear",  # Use linear scale since the data is manually log-transformed
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    showticklabels=True,  # Ensure ticks are shown
    row=1, col=1
)

# Y-axis for Inferers (Second column)
fig.update_yaxes(
    title_text="Average Score",  # Label only for Inferer column
    title_font=dict(size=12), 
    tickfont=dict(size=12),
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    showticklabels=True,  # Ensure ticks are shown
    row=1, col=2
)

# Y-axis for Forecasters (Third column)
fig.update_yaxes(
    title_text="Average Score",  # Label for Forecaster column
    showspikes=True,
    spikemode='across',
    spikesnap='cursor',
    spikethickness=1,
    spikedash="dot",
    spikecolor="grey",
    showticklabels=True,  # Ensure ticks are shown
    row=1, col=3
)

# Show the plot
fig.show()


In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['block_emission'],
        mode='lines+markers',
        name='Recomputed Emission',
        line=dict(color='blue', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)


# Update layout
fig.update_layout(
    title='Recomputed Emission per Block Values',
    xaxis_title='Block Height',
    yaxis_title='Actual Emission',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Here I plot the monthly emission per unit staked. Note that this does consider the actual current staked value, this is the value that was stored in the emissions module and was used when performing the emission update:

In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['previous_reward_emission_per_unit_staked_token'],
        mode='lines+markers',
        name='Monthly Emission',
        line=dict(color='orange', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)


# Update layout
fig.update_layout(
    title='Monthly Emission Per unit Staked',
    xaxis_title='Block Height',
    yaxis_title='Monthly Emission',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Here I plot what the new monthly emission would be if we were to compute it at block $$i$$:

In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['block_emission']*864000.0/new_supply_df['network_staked'],
        mode='lines+markers',
        name='Recomputed Monthly Emission',
        line=dict(color='green', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)


# Update layout
fig.update_layout(
    title='Recomputed Monthly emission per unit staked',
    xaxis_title='Block Height',
    yaxis_title='Monthly Emission',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Here we plot the target emission $$\hat{e}_i$$ if we were to update the emission at block $$i$$:

In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['target_reward_emission_per_unit_staked_token'],
        mode='lines+markers',
        name='Target Emission',
        line=dict(color='purple', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)


# Update layout
fig.update_layout(
    title='Target Emission per unit staked',
    xaxis_title='Block Height',
    yaxis_title='Monthly Emission',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

## Research Metrics

### Fundmental metric

We implement a calculation to verify the emission process. This fundamental metric checks the correctness of emissions at each epoch.

- A value of 1 is healthy and indicates that the emission process is functioning correctly! 😁✅
- Any deviation from 1 is unhealthy and suggests potential issues with the emission process! 😢😷

In [None]:
alpha = 0.1
# Create new interactive plot
fig = go.Figure()

# Add emission line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=(alpha*new_supply_df['target_reward_emission_per_unit_staked_token']+(1-alpha)*new_supply_df['previous_reward_emission_per_unit_staked_token'])/(new_supply_df['block_emission']*864000.0/new_supply_df['network_staked']),
        mode='lines+markers',
        name='Fundamental Metric',
        line=dict(color='pink', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Update layout
fig.update_layout(
    title='Fundamental metric (recomputed EMA vs actual emission)',
    xaxis_title='Block Height',
    yaxis_title='Fundamental Metric',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True,
        range=[0.9, 1.1]  # Set y-axis limits
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

In [None]:
# Shift block_emission forward by 1 to compare with current previous_emission
new_supply_df['shifted_block_emission'] = new_supply_df['block_emission'].shift(1)
new_supply_df['emission_ratio'] = new_supply_df['shifted_block_emission'] / new_supply_df['previous_emission']

# new_supply_df['shifted_block_emission'] = new_supply_df['emission_per_unit_staked_token'].shift(1)
# new_supply_df['emission_ratio'] = new_supply_df['shifted_block_emission'] / new_supply_df['previous_reward_emission_per_unit_staked_token']

# Rest of plotting code stays the same...
# Create new interactive plot
fig = go.Figure()

# Add ratio line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['emission_ratio'],
        mode='lines+markers',
        name='Ratio to Previous Emission',
        line=dict(color='blue', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Add reference line at 1 (without annotation)
fig.add_hline(
    y=1, 
    line_dash="dash", 
    line_color="red", 
    opacity=0.7
)

# Update layout
fig.update_layout(
    title='Ratio of Recomputed Block Emission to Previous Block Emission Value',
    xaxis_title='Block Height',
    yaxis_title='Ratio (Block Emission/Previous)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Here I compare the change in circulating supply (or inflation) to the total emission. A negative value implies that the circulating supply increased less than we thought it would. These measurements are taken every few blocks so there  some noise.

In [None]:
import numpy as np

# Calculate the ratio and apply log10
new_supply_df['total_supply_change'] = new_supply_df['circulating_supply'].diff()
new_supply_df['block_diff'] = new_supply_df['block_height'].diff()
# new_supply_df['change_per_block'] = (new_supply_df['total_supply_change'] / new_supply_df['block_diff']) / actual_current_block_emission
new_supply_df['change_per_block'] = (new_supply_df['total_supply_change'] / new_supply_df['block_diff']) / new_supply_df['previous_emission']

new_supply_df['log_ratio'] = np.log10(new_supply_df['change_per_block'])

# Create new interactive plot
fig = go.Figure()

# Add log ratio line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['log_ratio'],
        mode='lines+markers',
        name='Log10 Ratio to Current Emission',
        line=dict(color='purple', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Add reference line at 0 (log10(1) = 0)
fig.add_hline(
    y=0, 
    line_dash="dash", 
    line_color="red", 
    opacity=0.7,
)

# Update layout with scientific notation
fig.update_layout(
    title='Log10 Ratio of Actual Supply Change to Current Block Emission',
    xaxis_title='Block Height',
    yaxis_title='Log10(Actual/Current)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Here I compare the decrease in the remainging ecosystem mint supply to the total emission. A value of $$<1$$ implies that the treasury supply decreased less than we thought it would.

In [None]:
# Calculate the change in ecosystem supply per block
new_supply_df['ecosystem_change'] = new_supply_df['ecosystem_mint_supply_remaining'].diff()
new_supply_df['block_diff'] = new_supply_df['block_height'].diff()
new_supply_df['ecosystem_change_per_block'] = -(new_supply_df['ecosystem_change'] / new_supply_df['block_diff']) / new_supply_df['previous_emission']

# Create new interactive plot
fig = go.Figure()

# Add ratio line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['ecosystem_change_per_block'],
        mode='lines+markers',
        name='Ratio to Current Emission',
        line=dict(color='green', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Add reference line at -1
fig.add_hline(
    y=1, 
    line_dash="dash", 
    line_color="red", 
    opacity=0.7
)

# ... rest of the code stays the same until update_layout ...

# Update layout with scientific notation
fig.update_layout(
    title='Ratio of Ecosystem Supply Change per Block to Current Block Emission',
    xaxis_title='Block Height',
    yaxis_title='Ratio (Ecosystem Change/Current Emission)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.10e',  # Scientific notation with 2 decimal places
        showexponent='all',
        exponentformat='e',
        showgrid=True
    )
)

# ... rest of the code stays the same ...

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

Below are plots to monitor the total tokens staked, tokens circulating, and the remaining ecosystem mint balance:

In [None]:
alpha = 0.1
# Create new interactive plot
fig = go.Figure()

# Add emission line, converting values to millions
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['circulating_supply'] / 1_000_000,  # Convert to millions
        mode='lines+markers',
        name='Circulating Supply',
        line=dict(color='red', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Update layout
fig.update_layout(
    title='Circulating Supply',
    xaxis_title='Block Height',
    yaxis_title='Circulating Supply (Millions)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat=',',  # Add thousand separators
        dtick=0.01,      # Set tick interval to 0.5 million
        showgrid=True,
        # Add 1% padding to axis range
        # range=[
        #     (new_supply_df['circulating_supply'].min() / 1_000_000) * 0.99,
        #     (new_supply_df['circulating_supply'].max() / 1_000_000) * 1.01
        # ]
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

# Supply monitor

### Inflation rate and treasury decrease rate

### Normalized Emission per Unit Staked

In [None]:
# Create new interactive plot
fig = go.Figure()

# Add emission line, converting values to millions
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['network_staked'] / 1_000_000,  # Convert to millions
        mode='lines+markers',
        name='Network Staked',
        line=dict(color='blue', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Update layout
fig.update_layout(
    title='Network Staked',
    xaxis_title='Block Height',
    yaxis_title='Staked Tokens (Millions)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat=',',  # Add thousand separators
        dtick=0.1,      # Set tick interval to 0.5 million
        showgrid=True,
        # Add 1% padding to axis range
        # range=[
        #     (new_supply_df['network_staked'].min() / 1_000_000) * 0.99,
        #     (new_supply_df['network_staked'].max() / 1_000_000) * 1.01
        # ]
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

In [None]:
# Create interactive plot using plotly
fig = go.Figure()

# Add ecosystem balance line
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['ecosystem_balance'],
        mode='lines+markers',
        name='Ecosystem Balance',
        line=dict(color='purple', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Update layout
fig.update_layout(
    title='Ecosystem Balance Over Time',
    xaxis_title='Block Height',
    yaxis_title='Ecosystem Balance',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e'
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

In [None]:
# Create interactive plot
fig = go.Figure()

# Add each locked token category
fig.add_trace(
    go.Scatter(
        x=locked_df['block_height'],
        y=locked_df['investors_seed_locked'],
        mode='lines',
        name='Investors Seed Locked',
        line=dict(color='blue', width=2)
    )
)

fig.add_trace(
    go.Scatter(
        x=locked_df['block_height'],
        y=locked_df['investors_preseed_locked'],
        mode='lines',
        name='Investors Preseed Locked',
        line=dict(color='green', width=2)
    )
)

fig.add_trace(
    go.Scatter(
        x=locked_df['block_height'],
        y=locked_df['ecosystem_locked'],
        mode='lines',
        name='Ecosystem Locked',
        line=dict(color='red', width=2)
    )
)

fig.add_trace(
    go.Scatter(
        x=locked_df['block_height'],
        y=locked_df['total_locked'],
        mode='lines',
        name='Total Locked',
        line=dict(color='purple', width=2)
    )
)

fig.add_trace(
    go.Scatter(
        x=locked_df['block_height'],
        y=locked_df['team_locked'],
        mode='lines',
        name='Team Locked',
        line=dict(color='orange', width=2)
    )
)

# Update layout
fig.update_layout(
    title='Locked Token Categories Over Time',
    xaxis_title='Block Height',
    yaxis_title='Amount Locked',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat='.2e',
        showexponent='all',
        exponentformat='e'
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

In [None]:
alpha = 0.1
# Create new interactive plot
fig = go.Figure()

# Add emission line, converting values to millions
fig.add_trace(
    go.Scatter(
        x=new_supply_df['block_height'],
        y=new_supply_df['ecosystem_mint_supply_remaining'] / 1_000_000,  # Convert to millions
        mode='lines+markers',
        name='Ecosystem Mint Supply Remaining',
        line=dict(color='green', width=2),
        marker=dict(size=6, opacity=0.7)
    )
)

# Update layout
fig.update_layout(
    title='Ecosystem Mint Supply Remaining',
    xaxis_title='Block Height',
    yaxis_title='Supply Remaining (Millions)',
    showlegend=True,
    hovermode='x unified',
    template='plotly_white',
    yaxis=dict(
        tickformat=',',  # Add thousand separators
        dtick=0.01,      # Set tick interval to 0.5 million
        showgrid=True,
        # Add 1% padding to axis range
        # range=[
        #     (new_supply_df['ecosystem_mint_supply_remaining'].min() / 1_000_000) * 0.99,
        #     (new_supply_df['ecosystem_mint_supply_remaining'].max() / 1_000_000) * 1.01
        # ]
    )
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# Display plot
fig.show()

- Let $$P_i$$ be the valdidator reward distribution at epoch $$i$$
- First we define the entropy as
    - $$E_i = -\sum_{m=1}^{n_r} P_{im} \ln\left( P_{im}\right) \left( \frac{n_{r,eff}}{n_r}\right)^\beta$$
    - where $$n_{r,eff} =  \frac{1}{\sum_{m=1}^{n_r} P_{im}^2},$$ and $$\beta = 0.25$$
- Then, we plot
    - $$    H_i = 10^{C_r(E_i / \ln(n_r) - 1)}$$
    - with $$C_r = 1$$
- This metric is bounded on the interval $$[0,1]$$
- A **large/close to 1** value is **healthy** and corresponds to the valdidators having similar rewards 😁✅
- A **small/close to 0** value is **unhealthy** and corresponds to the valdidators having varying rewards 😢😷

## Validator Rewards

In [None]:
# Sort validator_df by block height
validator_df = validator_df.sort_values(by='block_height').copy()

# Calculate EMA directly on the normalized values with alpha = 0.1
alpha = 0.1
ema_values = validator_df['normalized_amount'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the line
validator_color = '#3b528b'  # Mid blue from Viridis

# Add original data points
fig.add_trace(go.Scatter(
    x=validator_df['block_height'],
    y=validator_df['normalized_amount'],
    mode='lines+markers',
    name='Validator Reward Metric',
    line=dict(color=validator_color),
    marker=dict(color=validator_color),
    hovertemplate='Distribution Metric: %{y:.2f}<extra></extra>'
))

# Add EMA line (red) on top of the original values
fig.add_trace(go.Scatter(
    x=validator_df['block_height'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout
fig.update_layout(
    title="Validator Reward Distribution Metric over Block Heights",
    xaxis_title="Block Height",
    yaxis_title="Distribution Metric",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    # Add log scale to y-axis
    yaxis_type="log",
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
import plotly.graph_objects as go

# Sort DataFrame by epoch
new_ema_score_df = new_ema_scores_df.sort_values(by='epoch')

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = new_ema_score_df['active_participants'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define Viridis color for the Active Inferers line
active_inferers_color = '#fde725'  # Bright yellow from Viridis

# Plot Active Inferers line
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=new_ema_score_df['active_participants'],
    mode='lines+markers',
    name='Active Inferers',
    line=dict(color=active_inferers_color),
    marker=dict(color=active_inferers_color),
    hovertemplate='Active Inferers: %{y}<extra></extra>'
))

# Add EMA line with red color
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title="Active Inferers Over Time",
    xaxis_title="Epoch",
    yaxis_title="Number of Active Inferers",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
import plotly.graph_objects as go

# Sort DataFrame by epoch
new_ema_score_df = new_ema_score_df.sort_values(by='epoch')

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = new_ema_score_df['new_addresses'].ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define color for the New Inferers line
new_inferers_color = '#21918c'  # Teal from Viridis

# Plot New Inferers line
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=new_ema_score_df['new_addresses'],
    mode='lines+markers',
    name='New Inferers',
    line=dict(color=new_inferers_color),
    marker=dict(color=new_inferers_color),
    hovertemplate='New Inferers: %{y}<extra></extra>'
))

# Add EMA line with red color
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=ema_values/2,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title="New Inferers Over Time",
    xaxis_title="Epoch",
    yaxis_title="Number of New Inferers",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
import plotly.graph_objects as go
import numpy as np

# Sort DataFrame by epoch
new_ema_score_df = new_ema_score_df.sort_values(by='epoch')

# Calculate the lifetime metric (log10 of the ratio)
lifetime_metric = np.log10(new_ema_score_df['active_participants'] / (new_ema_score_df['new_addresses'] + 1e-2))

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = lifetime_metric.ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define color for the Lifetime Metric line
metric_color = '#440154'  # Deep purple from Viridis

# Plot Lifetime Metric line
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=lifetime_metric,
    mode='lines+markers',
    name='Inferer Lifetime',
    line=dict(color=metric_color),
    marker=dict(color=metric_color),
    hovertemplate='Lifetime Metric: %{y:.2f}<extra></extra>'
))

# Add EMA line with red color
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title="Inferer Lifetime Metric Over Time",
    xaxis_title="Epoch",
    yaxis_title="log10(Active / (New + 0.01))",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()

In [None]:
import plotly.graph_objects as go
import numpy as np

# Sort DataFrame by epoch
new_ema_score_df = new_ema_score_df.sort_values(by='epoch')

# Calculate the sortition time metric (log10 of the ratio)
sortition_metric = np.log10(new_ema_score_df['total_participants'] / (new_ema_score_df['new_addresses'] + 1e-2))

# Calculate EMA with alpha = 0.1
alpha = 0.1
ema_values = sortition_metric.ewm(alpha=alpha).mean()

# Create the plot
fig = go.Figure()

# Define color for the Sortition Time Metric line
metric_color = '#3b528b'  # Blue-purple from Viridis

# Plot Sortition Time Metric line
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=sortition_metric,
    mode='lines+markers',
    name='Sortition Time',
    line=dict(color=metric_color),
    marker=dict(color=metric_color),
    hovertemplate='Sortition Time Metric: %{y:.2f}<extra></extra>'
))

# Add EMA line with red color
fig.add_trace(go.Scatter(
    x=new_ema_score_df['epoch'],
    y=ema_values,
    mode='lines',
    name='EMA',
    line=dict(color='red', dash='solid', width=2),
    hovertemplate='EMA: %{y:.2f}<extra></extra>'
))

# Update the layout with Viridis style settings
fig.update_layout(
    title="Sortition Time Metric Over Time",
    xaxis_title="Epoch",
    yaxis_title="log10(Total / (New + 0.01))",
    height=500,
    width=800,
    template="plotly_white",
    hovermode="x unified",
    spikedistance=-1,
    xaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    yaxis=dict(
        showspikes=True,
        spikemode='across',
        spikesnap='cursor',
        spikethickness=1,
        showline=True,
        showgrid=True,
        spikedash="dot",
        spikecolor="grey"
    ),
    hoverlabel=dict(
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="rgba(0, 0, 0, 0)",
        font_size=12,
        font_family="Arial",
        namelength=-1
    )
)

# Show the plot
fig.show()