# Instructions

- Update configs values (section 2 below)
- Run all cells (Use topnav: `Cell`>`Run all`)
- Entity updates in the divergent blocks will be saved to a .csv in the output directory

# Configs

In [None]:
# Investigation config
indexer_name = '<MY_INDEXER_NAME>'
subgraph_name = '<SUBGRAPH_NAME>'
subgraph_id = '<DEPLOYMENT_ID>'
chain_name = 'mainnet'
divergent_blocks = [<BLOCK_1>, <BLOCK_2>, ...]
output_dir = '../outputs/'

In [2]:
# Create connection to local indexer DB
POSTGRES_ADDRESS = 'localhost'
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = '<USERNAME>' 
POSTGRES_PASSWORD = '<PASSWORD>' 
POSTGRES_DBNAME = '<DB>' 

# Setup

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
# Import data processing and visualization libs
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

In [5]:
# Create connection to local indexer DB
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    ipaddress=POSTGRES_ADDRESS,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DBNAME)
)

indexer_local_db_cnx = create_engine(postgres_str)

# Fetch entity updates

## Setup Fetch Functions

In [6]:
def fetch_entity_updates_in_block(indexer, db_cnx, divergent_block, deployment_id):
    schema = pd.read_sql(sql='SELECT name FROM public.deployment_schemas WHERE subgraph = \'%s\''%deployment_id, con=db_cnx)
    schema_name = schema.iat[0,0]

    entity_tables_query = "SELECT table_name FROM information_schema.tables WHERE table_schema = \'%s\'"%schema_name

    entity_tables = pd.read_sql(sql=entity_tables_query, con=db_cnx)
    entity_tables = entity_tables[entity_tables['table_name'] != 'poi2$']['table_name']

    entity_names = []
    entity_changes = []
    divergent_blocks = []
    indexers = []
    subgraph_ids = []
    for table in entity_tables:
        changes_in_divergent_block = pd.read_sql(sql="SELECT * FROM %s.%s where lower(block_range)=%s"%(schema_name, table, divergent_block), con=db_cnx)
        if len(changes_in_divergent_block) > 0:
            entity_names.append(table)
            entity_changes.append(changes_in_divergent_block.to_dict(orient='records'))
            divergent_blocks.append(divergent_block)
            indexers.append(indexer)
            subgraph_ids.append(subgraph_id)
    entity_changes_divergent_block = pd.DataFrame(list(zip(entity_names, entity_changes, divergent_blocks, indexers)),columns =['Entity', 'Updates', 'Block', 'Indexer'])
    return entity_changes_divergent_block

def fetch_entity_updates_for_blocks(indexer, db_cnx, divergent_blocks, deployment_id):
    frames = []
    for block in divergent_blocks:
        changes = fetch_entity_updates_in_block(indexer, db_cnx, block, deployment_id)
        frames.append(changes)
    combined_df = pd.concat(frames, sort=False)
    return combined_df

def fetch_eth_call_cached_results_for_blocks(indexer, db_cnx, divergent_blocks, chain, deployment_id):
    schema = pd.read_sql(sql='SELECT namespace FROM public.chains WHERE name = \'%s\''%chain, con=db_cnx)
    schema_name = schema.iat[0,0]
    
    call_cache_query = "SELECT id, return_value, contract_address, block_number FROM {schema_name}.call_cache WHERE block_number in {blocks}".format(schema_name=schema_name, blocks=divergent_blocks)
    call_cache_query = call_cache_query.replace('[', '(')
    call_cache_query = call_cache_query.replace(']', ')')
    
    call_cache_for_divergent_blocks = pd.read_sql(sql=call_cache_query, con=db_cnx)
    call_cache_for_divergent_blocks['indexer'] = indexer
    return call_cache_for_divergent_blocks

## Fetch entity updates from local indexer

In [7]:
local_diverge_changes = fetch_entity_updates_for_blocks(indexer_name, indexer_local_db_cnx, divergent_blocks, subgraph_id)
local_diverge_call_cache = fetch_eth_call_cached_results_for_blocks(indexer_name, indexer_local_db_cnx, divergent_blocks, chain_name, subgraph_id)

NameError: name 'indexer_name' is not defined

In [None]:
local_diverge_changes

In [None]:
local_diverge_call_cache

In [None]:
# Save entity updates to a local csv
local_diverge_changes.to_csv(output_dir + subgraph_name + '_' + indexer_name + '_entity_updates_in_divergent_blocks.csv')

## Compare between two indexers

In [None]:
# Example of comparing two sets of entity updates (in this case it )
local_diverge_changes2 = fetch_entity_updates_for_blocks(indexer_name, indexer_local_db_cnx, divergent_blocks, subgraph_id)
diffs = local_diverge_changes.compare(local_diverge_changes2)
len(diffs)