In [None]:
# enable this to download the file if not already present

#!wget http://s3.amazonaws.com/open.source.geoscience/open_data/newzealand/Taranaiki_Basin/Keri_3D/Kerry3D.segy

In [1]:
import numpy as np
from numba import vectorize, cuda
from struct import unpack
from ibm2ieee import ibm2float32 as i2f
import segyio
from segyio import _segyio
import cupy as cp
%config Completer.use_jedi = False

In [2]:
class ReadBinHdr(object):

    ''' Read binary trace header for a trace '''

    def __init__(self, bh):
        if len(bh) != 400:
            print("Binary header should be 400 bytes long")
        else:
            self.jobid = unpack(">i", bh[0:4])[0]
            self.line = unpack(">i", bh[4:8])[0]
            self.reel = unpack(">i", bh[8:12])[0]
            self.numtrcens = unpack(">h", bh[12:14])[0]
            self.numauxtrcs = unpack(">h", bh[14:16])[0]
            self.sampint = unpack(">h", bh[16:18])[0]
            self.sampint2 = unpack(">h", bh[18:20])[0]
            self.samppertrc = unpack(">h", bh[20:22])[0]
            self.samppertrc2 = unpack(">h", bh[22:24])[0]
            self.datasampcode = unpack(">h", bh[24:26])[0]
            self.ensfold = unpack(">h", bh[26:28])[0]
            self.sortcode = unpack(">h", bh[28:30])[0]
            self.dis_units = unpack(">h", bh[54:56])[0]
            self.segyformat = unpack(">h", bh[300:302])[0] // 256
            self.lengthflag = unpack(">h", bh[302:304])[0]
            self.numexthdrs = unpack(">h", bh[304:306])[0]
    
class ReadTrcHdr(object):
    ''' Read Trace header '''
    def __init__(self, bh):
        if len(bh) != 240:
            print("Trace Header should be 240 bytes long")
        else:
            self.inline = unpack(">i", bh[220:224])[0]
            self.xline = unpack(">i", bh[20:24])[0]
            self.sou_x = unpack(">i", bh[72:76])[0]
            self.sou_y = unpack(">i", bh[76:80])[0]

In [3]:
%%time
hdrs = []
trcs = []
with open("../Kerry3D.segy","rb") as f:
    EBCDIC = f.read(3200)
    BIN = ReadBinHdr(f.read(400))
    samples_per_trace = BIN.samppertrc
    sample_interval = BIN.sampint
    idx=1
    print("Starting loop....")
    while True:
        tmp = f.read(240)
        if not tmp:
            print("End scanning .......")
            break
        hdr = ReadTrcHdr(tmp).__dict__
        trc = f.read(samples_per_trace*4)
#         arr = ibmtoieee(np.frombuffer(trc,dtype='>u4'))
        hdrs.append(hdr)
        trcs.append(trc)
        idx+=1
data = trcs[10000] #data to perform analysis on
data[:100]

Starting loop....
End scanning .......
CPU times: user 486 ms, sys: 475 ms, total: 961 ms
Wall time: 965 ms


b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbf\x94\xb9_\xc0\x1b\xe2\xc2\xc0J\\\xb0\xc0f?r\xc0x\xd6\x9d\xc0\x8bm\xc9\xc0\x94\xb9_\xc0\x94\xb9_\xc0S\xa8F@\x8bm\xc9A\x1d\xa0\xeeA%.XA\x12\x97,\xc0.y\xee\xc0\x1b\xe2\xc2@\x82"3\x00\x00\x00\x00\xc1\x10DF\xc1\x18fj'

In [4]:
# the Cuda Kernel code that runs on every unsigned int in array
ibm32cupy = cp.RawKernel(r'''
extern "C" __global__
void ibm32cupy(const unsigned int* x1, float* y) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int x = x1[tid];
    if (x != 0){
        int sign = ((x1[tid] >> 31) & 0x01) * (-2) + 1;
        int exponent = (x1[tid] >> 24) & 0x7F;
        int tmp = 4 * (exponent - 64);
        double p;
        if (tmp < 0) {
            int po2 = 1 << (abs(tmp));
            p = (double)(1.0/po2);
        }
        else{
            p = 1 << tmp;
        }
        int mantissa = x1[tid] & 0x00ffffff;
        float frac = ((float)mantissa / 0x1000000);
        y[tid] = sign * frac * p;
    }
    else{
        y[tid] = 0.0;
    }    
}
''', 'ibm32cupy')
# vectorized version of regular python function
@vectorize(['float32(uint32)'])
def ibmpy_vec(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

In [5]:
#uses pypi module ibm2ieee which is cython under the hood. https://pypi.org/project/ibm2ieee/
%timeit i2f(np.frombuffer(data,dtype=">u4"))

3.69 µs ± 9.51 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [46]:
import dask
import timeit

In [38]:
def timing_func(trcs, n):
    t4 = %timeit -o -q -n 100 -r 5 for k in range(10000,10001+n):i2f(np.frombuffer(trcs[k],dtype=">u4"))
    return (n, t4.average,t4.stdev)

In [39]:
timing_func(trcs,10)

(10, 5.1623046019813045e-05, 3.3368964168316334e-06)

In [16]:
def timing_func2(trcs, n):
    t4 = %timeit -o -q -n 100 -r 5 for k in range(10000,10001+n):i2f(np.frombuffer(trcs[k],dtype=">u4"))
    return (n, t4.average,t4.stdev)

In [17]:
timing_func2(trcs,10)

(10, 5.100327399122762e-05, 1.9426434629144976e-06)

In [18]:
%%time
l = []
for n in range(0,100,10):
    l.append(timing_func2(trcs,n))
l

CPU times: user 1.11 s, sys: 61 µs, total: 1.11 s
Wall time: 1.11 s


[(0, 4.109667999728117e-06, 1.19279900516599e-07),
 (10, 5.604132800363004e-05, 6.507285848887355e-06),
 (20, 9.714140599680831e-05, 1.5223671533501416e-06),
 (30, 0.0001455323999907705, 5.082251690361012e-07),
 (40, 0.000194221670004481, 5.159439568808434e-07),
 (50, 0.00024249170999974013, 1.738680552089854e-07),
 (60, 0.00029178346000844615, 5.208036628264847e-07),
 (70, 0.0003409520099958172, 3.146046081269603e-07),
 (80, 0.00038915052600350464, 1.1935072699181988e-06),
 (90, 0.0004394023120112251, 9.206297848943163e-07)]

In [47]:
%%time
ll = []
for n in range(0,100,10):
    t = dask.delayed(timing_func)(trcs,n)
    ll.append(t)
ll = dask.compute(*ll)

CPU times: user 19.6 s, sys: 551 ms, total: 20.2 s
Wall time: 19.5 s


In [48]:
ll

((0, 0.001117686279991176, 0.0005116520075115981),
 (10, 0.0018797069839929462, 0.0010612570041337195),
 (20, 0.0012848549460031792, 0.0011178622141942032),
 (30, 0.0011519734460161998, 0.0005489304761720399),
 (40, 0.00020253404400136788, 0.00033088393246989326),
 (50, 0.0014421922259934944, 0.0010465412346804565),
 (60, 0.0019709020699665417, 0.0016022977690459024),
 (70, 0.0010238773320015754, 0.0008288810339876246),
 (80, 0.002405309168010717, 0.0016191677743844647),
 (90, 0.0004707289199868682, 0.0009216995544221529))

In [14]:
times_for_i2f=[]
def ff4(trcs,n):
    for i in range(10000,10001+n):
        i2f(np.frombuffer(trcs[i],dtype=">u4"))
for n in range(0,10000,10):
    tim3 = %timeit -o -q -n 100 -r 7 ff4(trcs,n)
    times_for_i2f.append((n, tim3.average,tim3.stdev))
    if n % 500 == 0:
        print(n)

0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500


In [38]:
with open("results.json","w") as jsonfile:
    jsonfile.write(json.dumps(final))