In [4]:
import pandas as pd 
import pickle
import time
import py2neo
import urllib3
import os.path
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
def query_database(query):
    # REMEMBER TO BE CONNECTED TO IMPERIAL WIFI!
    graph_db = py2neo.Graph("https://dsi-bitcoin.doc.ic.ac.uk:7473/db/data/", auth=("adi", "aditi123"))
    return graph_db.run(query)

def get_block_data(blk):
    query_string = """
                    MATCH (b:Block) <-[:MINED_IN]- (t:Tx) <-[:IN]- (txi:TxIn) <-[:UNLOCK]- (iadr:Address)
                    WHERE b.height = {}
                    MATCH (txi) <-[:SPENT]- (txo_in:TxOut) 
                    MATCH (oadr:Address) <-[:LOCK]- (txo_out:TxOut) <-[:OUT]- (t)
                    
                    RETURN iadr.address as iadr, oadr.address as oadr, txo_in.value as input_val, txo_out.value as output_val, ID(txo_in) as id_txo_in, ID(txi) as id_txi, ID(t) as id_t, ID(txo_out) as id_txo_out
                    """.format(blk)
    return query_string

def get_coinbase(blk):
    query_string = """
                    MATCH (b:Block) <-[:MINED_IN]- (t:Tx)<-[:IN]-(cb:CBscript)
                    MATCH (oadr:Address) <-[:LOCK]- (txo_out:TxOut) <-[:OUT]- (t)
                    WHERE b.height = {}
                    RETURN                       oadr.address as oadr,                            txo_out.value as output_val,                          ID(cb) as id_txi, ID(t) as id_t, ID(txo_out) as id_txo_out             
                    """.format(blk)
    return query_string

def check_block(first_block, last_block):
    query_string = """ 
                    MATCH (b:Block) 
                    WHERE b.height >= {} AND b.height <= {}
                    RETURN b
                    """.format(first_block, last_block)
    return query_string

def write_to_csv(result,string):

    df = result.to_data_frame()

    if (df.empty):
        print("Something went wrong, there is no data for this/these blocks")
    else:
        df.to_csv('{}.csv'.format(string), encoding='utf-8', index=False)
        
def seen_before_out(block, adr):
    params = {'block':block,'adr':adr}
    query_string = """
                    MATCH (a:Address) <-[:LOCK]- (to:TxOut) <-[:OUT]- (t1:Tx)-[:MINED_IN]->(b1:Block)
                    WHERE a.address = "{adr}" AND b1.height<{block}
                    RETURN b1.height LIMIT 1
                    """.format(**params)
    
    data_fr = query_database(query_string).to_data_frame()
    if data_fr.empty:
        return False
    return True

def seen_before_in(block, adr):
    params = {'block':block,'adr':adr}
    query_string = """
                    MATCH (a:Address) -[:UNLOCK]-> (ti:TxIn) -[:IN]-> (t2:Tx)-[:MINED_IN]->(b2:Block)
                    WHERE a.address = "{adr}" AND b2.height<{block}
                    RETURN b2.height LIMIT 1
                    """.format(**params)
    data_fr = query_database(query_string).to_data_frame()
    if data_fr.empty:
        return False
    return True

#temptime = time.time()
#starttime = time.time()

def all_block_data(block):
#for block in range(first_block,last_block+1,1):
    if(os.path.exists("../pickles/df/{}.pickle".format(block))):
        print('df already exists')
        df = pd.read_pickle("../pickles/df/{}.pickle".format(block))
        print('Got all block data')
        return df
    
    result = query_database(get_block_data(block))
    df = result.to_data_frame()
    
    #Append coinbase txs to dataframe
    result_cb = query_database(get_coinbase(block))
    df_cb = result_cb.to_data_frame()
    df_cb.insert(0, 'iadr', '0')
    df_cb.insert(3, 'id_txo_in', 0)
    df_cb.insert(5, 'input_val', 0)
    
    df = df.append(df_cb)
    df = df.reset_index()
    df = df.drop(columns=['index'])
    df.to_pickle("../pickles/df/{}.pickle".format(block))
    print('Got all block data')
    return df
    #print(block, " processed in ", time.time()-temptime)
    #temptime = time.time()  
#print("Total time to process everything: {}".format(time.time()-starttime))


initaltime = time.time()
#for block in block_list:
for block in range(420000,426565+1,1):
    starttime = time.time()
    print(block)
    if(os.path.exists('../pickles/otc/otc_{}.pickle'.format(block))):
        print('otc already exists')
        continue
    
    df = all_block_data(block)
    
    if df.empty:
        print("Block {} contained nothing".format(block))
        continue

    addresses = list(df.oadr.value_counts().index)

    not_seen =[]
    import time

    temptime = time.time()

    for adr in addresses:
        if seen_before_in(block, adr):
            continue
        elif seen_before_out(block, adr):
            continue 
        else:
            not_seen.append(adr)
        #print(adr, " processed in ", time.time()-temptime)
        temptime = time.time()
    print('otc done')
    
    with open('../pickles/otc/out.txt', 'a') as f:
        print("Total time to process block {}: {}".format(block, time.time()-starttime), file=f)
        
    with open('../pickles/otc/otc_{}.pickle'.format(block),'wb') as f:
        pickle.dump(not_seen,f)

with open('../pickles/otc/out.txt', 'a') as f:
    print("Total time to process everything: {}".format(time.time()-initaltime), file=f)

416565
Got all block data
otc done
416566
Got all block data
otc done
416567
Got all block data
otc done
416568
Got all block data
otc done
416569
Got all block data
otc done
416570
Got all block data
otc done
416571
Got all block data
otc done
416572
Got all block data
otc done
416573
Got all block data
otc done
416574
Got all block data
otc done
416575
Got all block data
otc done
416576
Got all block data
otc done
416577
Got all block data
otc done
416578
Got all block data
otc done
416579
Got all block data
otc done
416580
Got all block data
otc done
416581
Got all block data
otc done
416582
Got all block data
otc done
416583
Got all block data
otc done
416584
Got all block data
otc done
416585
Got all block data
otc done
416586
Got all block data
otc done
416587
Got all block data
otc done
416588
Got all block data
otc done
416589
Got all block data
otc done
416590
Got all block data
otc done
416591
Got all block data
otc done
416592
Got all block data
otc done
416593
Got all block

Got all block data
otc done
416800
Got all block data
otc done
416801
Got all block data
otc done
416802
Got all block data
otc done
416803
Got all block data
otc done
416804
Got all block data
otc done
416805
Got all block data
otc done
416806
Got all block data
otc done
416807
Got all block data
otc done
416808
Got all block data
otc done
416809
Got all block data
otc done
416810
Got all block data
otc done
416811
Got all block data
otc done
416812
Got all block data
otc done
416813
Got all block data
otc done
416814
Got all block data
otc done
416815
Got all block data
otc done
416816
Got all block data
otc done
416817
Got all block data
otc done
416818
Got all block data
otc done
416819
Got all block data
otc done
416820
Got all block data
otc done
416821
Got all block data
otc done
416822
Got all block data


In [None]:
# myNotebook = "./pickle_to_df.ipynb"

# %run $myNotebook

In [None]:
# addresses1 = list(df.oadr.value_counts()[df.oadr.value_counts()>1].index)
# addresses2 = list(df.oadr.value_counts()[df.oadr.value_counts()==1].index)

In [None]:
# not_seen = [adr for adr in addresses1[:5] if not seen_before_in(block, adr) and not seen_before_out(block, adr)]