In [1]:
import numpy as np
import pandas as pd
import os
import time
os.getcwd()
os.chdir('C:\\Users\\Wetauzer\\Desktop\\Data')

In [2]:
tx_df = pd.read_csv('Z:03_22_03_26.csv')
tx_df = tx_df.sort_values('block_number')

In [3]:
# This is just for selecting subset of blocks. Comment out if you want the whole dataset
num_blocks = 1000
tx_df = tx_df[tx_df.block_number.isin(range(tx_df.block_number.min(), tx_df.block_number.unique()[num_blocks]))]

In [4]:
tx_df.reset_index(inplace=True)
tx_df = tx_df.loc[:, tx_df.columns.intersection(['block_number','hash', 'gas_price'])]
tx_df.gas_price = tx_df.gas_price/1e9 # convert the prices to gwei
tx_df = tx_df[tx_df['gas_price']!=0] # Getting rid of zero gas price transactions

This is like a "price oracle." It has quantile data from the last N blocks. By default it returns the min/max and 50th, 75th, and 90th percentiles since those are what oracles seem to use. Window argument tells it how many of the previous blocks to look at. By default, N = 10 blocks.

In [5]:
def oracle_prices(tx_df, return_blk = False, low = 50, medium = 75, high = 90, window = 10):
    """Gives min/max and 3 quantile values over the last N blocks"""
    blx_in = pd.DataFrame(tx_df["block_number"].copy().unique()).rename(columns = {0: 'block_number'})
    blx_in = blx_in['block_number'] # blocks from our dataset
    blx_out = [] # blocks that will have oracle values
    p1 = []
    p2 = []
    p3 = []
    p4 = []
    p5 = []
    for blk in blx_in:
        if blk >= blx_in[window - 1]:
            idx = pd.Index(list(blx_in)).get_loc(blk)
            blk_win = blx_in[idx - window + 1: idx + 1]
            blx_out.append(blk)
            p1.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.min())
            p2.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(low/100))
            p3.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(medium/100))
            p4.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(high/100))
            p5.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.max())
    blx_oracle_dict = {'block_number':blx_out, 'minimum':p1, 'low':p2, 'average':p3, 'fast':p4, 'maximum':p5}
    blk_oracle_df = pd.DataFrame(blx_oracle_dict)
    tx_oracle_df = pd.merge(tx_df, blk_oracle_df, how="right", on=["block_number"])
    if return_blk:
        return blk_oracle_df
    else:
        return tx_oracle_df

In [6]:
oracle_tx_df = oracle_prices(tx_df, window = 10)
oracle_tx_df

Unnamed: 0,hash,block_number,gas_price,minimum,low,average,fast,maximum
0,0xd8da865116d5c98058edc4d99bac921779c34995b483...,12085263,150.0,1.0,176.000000,190.95,213.0,2000.000000
1,0xe678baf30ce88680c44323bc34dc4dd2292ecec01d54...,12085263,145.0,1.0,176.000000,190.95,213.0,2000.000000
2,0xad8ff40cead03a48b4f13ef7bcd5c9f018b9e4452177...,12085263,193.0,1.0,176.000000,190.95,213.0,2000.000000
3,0xbd2b8898fd3dc2ab2dc63f180d2bb307311edc53ef48...,12085263,150.0,1.0,176.000000,190.95,213.0,2000.000000
4,0x133c1c1a6a0ae8548cbf7ff5952c29a1a20f31a03f60...,12085263,213.0,1.0,176.000000,190.95,213.0,2000.000000
...,...,...,...,...,...,...,...,...
194097,0xc04f2edf2c25db13171622865ba35d58d80046d02c5a...,12086270,165.0,1.0,132.000001,143.00,167.7,1238.336253
194098,0xff76e8917b83913a7958241c1fe56c9fde62db855011...,12086270,128.0,1.0,132.000001,143.00,167.7,1238.336253
194099,0x37f9d0210e710fc9461da105d6a8cf2a411f4164cf85...,12086270,141.9,1.0,132.000001,143.00,167.7,1238.336253
194100,0xf943fa5ab1cb1b50253309c35c0fc90df09a37dcbf62...,12086270,130.9,1.0,132.000001,143.00,167.7,1238.336253


In [7]:
oracle_block_df = oracle_prices(tx_df, return_blk = True, window = 10)
oracle_block_df

Unnamed: 0,block_number,minimum,low,average,fast,maximum
0,12085263,1.0,176.000000,190.950,213.0,2000.000000
1,12085264,1.0,175.800000,190.301,213.0,2000.000000
2,12085266,138.0,178.000000,192.600,213.0,2000.000000
3,12085267,138.0,185.650000,207.000,213.0,2000.000000
4,12085268,138.0,182.600000,206.000,213.0,2000.000000
...,...,...,...,...,...,...
982,12086266,1.0,123.000000,141.000,160.0,1238.336253
983,12086267,1.0,123.000000,141.000,160.1,1238.336253
984,12086268,1.0,129.000000,143.000,165.0,1238.336253
985,12086269,1.0,129.470000,143.000,165.0,1238.336253
