In [1]:
import numpy as np
import pandas as pd
import os
import time
os.getcwd()
os.chdir('C:\\Users\\Wetauzer\\Desktop\\Data')

In [2]:
tx_df = pd.read_csv('Z:03_22_03_26.csv')
tx_df = tx_df.sort_values('block_number')
tx_df = tx_df[tx_df.block_number.isin(range(tx_df.block_number.min(), tx_df.block_number.min() + 1000))]
tx_df.reset_index(inplace=True)
tx_df = tx_df.loc[:, tx_df.columns.intersection(['block_number','hash', 'gas_price'])]
tx_df.gas_price = tx_df.gas_price/1e9 # convert the prices to gwei
tx_df = tx_df[tx_df['gas_price']!=0] # Getting rid of zero gas price transactions
block_df = pd.DataFrame(tx_df["block_number"].copy().unique()).rename(columns = {0: 'block_number'})

This is like a "price oracle." It has quantile data from the last N blocks. By default it returns the min/max and 50th, 75th, and 90th percentiles since those are what oracles seem to use. Window argument tells it how many of the previous blocks to look at. By default, N = 10 blocks.

In [3]:
def oracle_prices(tx_df, block_df, return_blk = False, low = 50, medium = 75, high = 90, window = 10):
    """Gives min/max and 3 quantile values over the last N blocks"""
    blx_in = block_df['block_number'] # blocks from our dataset
    blx_out = [] # blocks that will have oracle values
    p1 = []
    p2 = []
    p3 = []
    p4 = []
    p5 = []
    for blk in blx_in:
        if blk >= blx_in[window - 1]:
            idx = pd.Index(list(blx_in)).get_loc(blk)
            blk_win = blx_in[idx - window + 1: idx + 1]
            blx_out.append(blk)
            p1.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.min())
            p2.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(low/100))
            p3.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(medium/100))
            p4.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.quantile(high/100))
            p5.append(tx_df[tx_df.block_number.isin(blk_win)].gas_price.max())
    blx_oracle_dict = {'block_number':blx_out, 'minimum':p1, 'low':p2, 'average':p3, 'fast':p4, 'maximum':p5}
    blk_oracle_df = pd.DataFrame(blx_oracle_dict)
    tx_oracle_df = pd.merge(tx_df, blk_oracle_df, how="right", on=["block_number"])
    if return_blk:
        return blk_oracle_df
    else:
        return tx_oracle_df

In [4]:
oracle_tx_df = oracle_prices(tx_df, block_df, window = 10)
oracle_tx_df

Unnamed: 0,hash,block_number,gas_price,minimum,low,average,fast,maximum
0,0xd8da865116d5c98058edc4d99bac921779c34995b483...,12085263,150.00000,1.000000e+00,176.0,190.95,213.0,2000.0
1,0xe678baf30ce88680c44323bc34dc4dd2292ecec01d54...,12085263,145.00000,1.000000e+00,176.0,190.95,213.0,2000.0
2,0xad8ff40cead03a48b4f13ef7bcd5c9f018b9e4452177...,12085263,193.00000,1.000000e+00,176.0,190.95,213.0,2000.0
3,0xbd2b8898fd3dc2ab2dc63f180d2bb307311edc53ef48...,12085263,150.00000,1.000000e+00,176.0,190.95,213.0,2000.0
4,0x133c1c1a6a0ae8548cbf7ff5952c29a1a20f31a03f60...,12085263,213.00000,1.000000e+00,176.0,190.95,213.0,2000.0
...,...,...,...,...,...,...,...,...
190431,0x680f94b835e32600531720076ccbfc32a15efbd44c1b...,12086253,123.00000,7.000000e-09,126.0,145.00,165.6,500.0
190432,0x04597725f988f259b1a8755e36b4535364a25b3cf283...,12086253,135.30000,7.000000e-09,126.0,145.00,165.6,500.0
190433,0x510c2245b963e0d434fa16d47fcc5ddb74c4f8fa4a16...,12086253,151.80046,7.000000e-09,126.0,145.00,165.6,500.0
190434,0x0b91c0a1ea2aec33ea03d0092b72c3797f5b41982942...,12086253,137.50000,7.000000e-09,126.0,145.00,165.6,500.0


In [5]:
oracle_block_df = oracle_prices(tx_df, block_df, return_blk = True, window = 10)
oracle_block_df

Unnamed: 0,block_number,minimum,low,average,fast,maximum
0,12085263,1.000000e+00,176.00,190.950,213.0,2000.0
1,12085264,1.000000e+00,175.80,190.301,213.0,2000.0
2,12085266,1.380000e+02,178.00,192.600,213.0,2000.0
3,12085267,1.380000e+02,185.65,207.000,213.0,2000.0
4,12085268,1.380000e+02,182.60,206.000,213.0,2000.0
...,...,...,...,...,...,...
965,12086249,1.000000e-09,121.00,135.000,154.0,500.0
966,12086250,7.000000e-09,120.00,135.795,154.0,500.0
967,12086251,7.000000e-09,119.00,134.200,154.0,500.0
968,12086252,7.000000e-09,120.00,141.900,157.0,500.0
