In [2]:
import pandas as pd 
import pickle
import time
import py2neo
import urllib3
import os.path
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [3]:
def query_database(query):
    # REMEMBER TO BE CONNECTED TO IMPERIAL WIFI!
    graph_db = py2neo.Graph("https://dsi-bitcoin.doc.ic.ac.uk:7473/db/data/", auth=("adi", "aditi123"))
    return graph_db.run(query)

def get_block_data(blk):
    query_string = """
                    MATCH (b:Block) <-[:MINED_IN]- (t:Tx) <-[:IN]- (txi:TxIn) <-[:UNLOCK]- (iadr:Address)
                    WHERE b.height = {}
                    MATCH (txi) <-[:SPENT]- (txo_in:TxOut) 
                    MATCH (oadr:Address) <-[:LOCK]- (txo_out:TxOut) <-[:OUT]- (t)
                    
                    RETURN iadr.address as iadr, oadr.address as oadr, txo_in.value as input_val, txo_out.value as output_val, ID(txo_in) as id_txo_in, ID(txi) as id_txi, ID(t) as id_t, ID(txo_out) as id_txo_out
                    """.format(blk)
    return query_string

def get_coinbase(blk):
    query_string = """
                    MATCH (b:Block) <-[:MINED_IN]- (t:Tx)<-[:IN]-(cb:CBscript)
                    MATCH (oadr:Address) <-[:LOCK]- (txo_out:TxOut) <-[:OUT]- (t)
                    WHERE b.height = {}
                    RETURN                       oadr.address as oadr,                            txo_out.value as output_val,                          ID(cb) as id_txi, ID(t) as id_t, ID(txo_out) as id_txo_out             
                    """.format(blk)
    return query_string

def check_block(first_block, last_block):
    query_string = """ 
                    MATCH (b:Block) 
                    WHERE b.height >= {} AND b.height <= {}
                    RETURN b
                    """.format(first_block, last_block)
    return query_string

def write_to_csv(result,string):

    df = result.to_data_frame()

    if (df.empty):
        print("Something went wrong, there is no data for this/these blocks")
    else:
        df.to_csv('{}.csv'.format(string), encoding='utf-8', index=False)
        
def seen_before_out(block, adr):
    params = {'block':block,'adr':adr}
    query_string = """
                    MATCH (a:Address) <-[:LOCK]- (to:TxOut) <-[:OUT]- (t1:Tx)-[:MINED_IN]->(b1:Block)
                    WHERE a.address = "{adr}" AND b1.height<{block}
                    RETURN b1.height LIMIT 1
                    """.format(**params)
    
    data_fr = query_database(query_string).to_data_frame()
    if data_fr.empty:
        return False
    return True

def seen_before_in(block, adr):
    params = {'block':block,'adr':adr}
    query_string = """
                    MATCH (a:Address) -[:UNLOCK]-> (ti:TxIn) -[:IN]-> (t2:Tx)-[:MINED_IN]->(b2:Block)
                    WHERE a.address = "{adr}" AND b2.height<{block}
                    RETURN b2.height LIMIT 1
                    """.format(**params)
    data_fr = query_database(query_string).to_data_frame()
    if data_fr.empty:
        return False
    return True

#temptime = time.time()
#starttime = time.time()

def all_block_data(block):
#for block in range(first_block,last_block+1,1):
    if(os.path.exists("../pickles/df/{}.pickle".format(block))):
        print('df already exists')
        df = pd.read_pickle("../pickles/df/{}.pickle".format(block))
        print('Got all block data')
        return df
    
    result = query_database(get_block_data(block))
    df = result.to_data_frame()
    
    #Append coinbase txs to dataframe
    result_cb = query_database(get_coinbase(block))
    df_cb = result_cb.to_data_frame()
    df_cb.insert(0, 'iadr', '0')
    df_cb.insert(3, 'id_txo_in', 0)
    df_cb.insert(5, 'input_val', 0)
    
    df = df.append(df_cb)
    df = df.reset_index()
    df = df.drop(columns=['index'])
    df.to_pickle("../pickles/df/{}.pickle".format(block))
    print('Got all block data')
    return df
    #print(block, " processed in ", time.time()-temptime)
    #temptime = time.time()  
#print("Total time to process everything: {}".format(time.time()-starttime))


initaltime = time.time()
#for block in block_list:
for block in range(420000,426565+1,1):
    starttime = time.time()
    print(block)
    if(os.path.exists('../pickles/otc/otc_{}.pickle'.format(block))):
        print('otc already exists')
        continue
    
    df = all_block_data(block)
    
    if df.empty:
        print("Block {} contained nothing".format(block))
        continue

    addresses = list(df.oadr.value_counts().index)

    not_seen =[]
    import time

    temptime = time.time()

    for adr in addresses:
        if seen_before_in(block, adr):
            continue
        elif seen_before_out(block, adr):
            continue 
        else:
            not_seen.append(adr)
        #print(adr, " processed in ", time.time()-temptime)
        temptime = time.time()
    print('otc done')
    
    with open('../pickles/otc/out.txt', 'a') as f:
        print("Total time to process block {}: {}".format(block, time.time()-starttime), file=f)
        
    with open('../pickles/otc/otc_{}.pickle'.format(block),'wb') as f:
        pickle.dump(not_seen,f)

with open('../pickles/otc/out.txt', 'a') as f:
    print("Total time to process everything: {}".format(time.time()-initaltime), file=f)

420000
Got all block data
otc done
420001
Got all block data
otc done
420002
Got all block data
otc done
420003
Got all block data
otc done
420004
Got all block data
otc done
420005
Got all block data
otc done
420006
Got all block data
otc done
420007
Got all block data
otc done
420008
Got all block data
otc done
420009
Got all block data
otc done
420010
Got all block data
otc done
420011
Got all block data
otc done
420012
Got all block data
otc done
420013
Got all block data
otc done
420014
Got all block data
otc done
420015
Got all block data
otc done
420016
Got all block data
otc done
420017
Got all block data
otc done
420018
Got all block data
otc done
420019
Got all block data
otc done
420020
Got all block data
otc done
420021
Got all block data
otc done
420022
Got all block data
otc done
420023
Got all block data
otc done
420024
Got all block data
otc done
420025
Got all block data
otc done
420026
Got all block data
otc done
420027
Got all block data
otc done
420028
Got all block

Got all block data
otc done
420235
Got all block data
otc done
420236
Got all block data
otc done
420237
Got all block data
otc done
420238
Got all block data
otc done
420239
Got all block data
otc done
420240
Got all block data
otc done
420241
Got all block data
otc done
420242
Got all block data
otc done
420243
Got all block data
otc done
420244
Got all block data
otc done
420245
Got all block data
otc done
420246
Got all block data
otc done
420247
Got all block data
otc done
420248
Got all block data
otc done
420249
Got all block data
otc done
420250
Got all block data
otc done
420251
Got all block data
otc done
420252
Got all block data
otc done
420253
Got all block data
otc done
420254
Got all block data
otc done
420255
Got all block data
otc done
420256
Got all block data
otc done
420257
Got all block data
otc done
420258
Got all block data
otc done
420259
Got all block data
otc done
420260
Got all block data
otc done
420261
Got all block data
otc done
420262
Got all block data
o

otc done
420469
Got all block data
otc done
420470
Got all block data
otc done
420471
Got all block data
otc done
420472
Got all block data
otc done
420473
Got all block data
otc done
420474
Got all block data
otc done
420475
Got all block data
otc done
420476
Got all block data
otc done
420477
Got all block data
otc done
420478
Got all block data
otc done
420479
Got all block data
otc done
420480
Got all block data
otc done
420481
Got all block data
otc done
420482
Got all block data
otc done
420483
Got all block data
otc done
420484
Got all block data
otc done
420485
Got all block data
otc done
420486
Got all block data
otc done
420487
Got all block data
otc done
420488
Got all block data
otc done
420489
Got all block data
otc done
420490
Got all block data
otc done
420491
Got all block data
otc done
420492
Got all block data
otc done
420493
Got all block data
otc done
420494
Got all block data
otc done
420495
Got all block data
otc done
420496
Got all block data
otc done
420497
Got 

KeyboardInterrupt: 

In [None]:
# myNotebook = "./pickle_to_df.ipynb"

# %run $myNotebook

In [None]:
# addresses1 = list(df.oadr.value_counts()[df.oadr.value_counts()>1].index)
# addresses2 = list(df.oadr.value_counts()[df.oadr.value_counts()==1].index)

In [None]:
# not_seen = [adr for adr in addresses1[:5] if not seen_before_in(block, adr) and not seen_before_out(block, adr)]