In [None]:
# enable this to download the file if not already present

#!wget http://s3.amazonaws.com/open.source.geoscience/open_data/newzealand/Taranaiki_Basin/Keri_3D/Kerry3D.segy

In [1]:
import numpy as np
from numba import vectorize, cuda
from struct import unpack
from ibm2ieee import ibm2float32 as i2f
import segyio
from segyio import _segyio
import cupy as cp
%config Completer.use_jedi = False

In [2]:
class ReadBinHdr(object):

    ''' Read binary trace header for a trace '''

    def __init__(self, bh):
        if len(bh) != 400:
            print("Binary header should be 400 bytes long")
        else:
            self.jobid = unpack(">i", bh[0:4])[0]
            self.line = unpack(">i", bh[4:8])[0]
            self.reel = unpack(">i", bh[8:12])[0]
            self.numtrcens = unpack(">h", bh[12:14])[0]
            self.numauxtrcs = unpack(">h", bh[14:16])[0]
            self.sampint = unpack(">h", bh[16:18])[0]
            self.sampint2 = unpack(">h", bh[18:20])[0]
            self.samppertrc = unpack(">h", bh[20:22])[0]
            self.samppertrc2 = unpack(">h", bh[22:24])[0]
            self.datasampcode = unpack(">h", bh[24:26])[0]
            self.ensfold = unpack(">h", bh[26:28])[0]
            self.sortcode = unpack(">h", bh[28:30])[0]
            self.dis_units = unpack(">h", bh[54:56])[0]
            self.segyformat = unpack(">h", bh[300:302])[0] // 256
            self.lengthflag = unpack(">h", bh[302:304])[0]
            self.numexthdrs = unpack(">h", bh[304:306])[0]
    
class ReadTrcHdr(object):
    ''' Read Trace header '''
    def __init__(self, bh):
        if len(bh) != 240:
            print("Trace Header should be 240 bytes long")
        else:
            self.inline = unpack(">i", bh[220:224])[0]
            self.xline = unpack(">i", bh[20:24])[0]
            self.sou_x = unpack(">i", bh[72:76])[0]
            self.sou_y = unpack(">i", bh[76:80])[0]

In [3]:
%%time
hdrs = []
trcs = []
with open("../Kerry3D.segy","rb") as f:
    EBCDIC = f.read(3200)
    BIN = ReadBinHdr(f.read(400))
    samples_per_trace = BIN.samppertrc
    sample_interval = BIN.sampint
    idx=1
    print("Starting loop....")
    while True:
        tmp = f.read(240)
        if not tmp:
            print("End scanning .......")
            break
        hdr = ReadTrcHdr(tmp).__dict__
        trc = f.read(samples_per_trace*4)
#         arr = ibmtoieee(np.frombuffer(trc,dtype='>u4'))
        hdrs.append(hdr)
        trcs.append(trc)
        idx+=1
data = trcs[10000] #data to perform analysis on
data[:100]

Starting loop....
End scanning .......
CPU times: user 589 ms, sys: 352 ms, total: 942 ms
Wall time: 943 ms


b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_\xb9\x94\xbf\xc2\xe2\x1b\xc0\xb0\\J\xc0r?f\xc0\x9d\xd6x\xc0\xc9m\x8b\xc0_\xb9\x94\xc0_\xb9\x94\xc0F\xa8S\xc0\xc9m\x8b@\xee\xa0\x1dAX.%A,\x97\x12A\xeey.\xc0\xc2\xe2\x1b\xc03"\x82@\x00\x00\x00\x00FD\x10\xc1jf\x18\xc1'

In [4]:
# vectorized version of regular python function
@vectorize(['float32(uint32)'],target='cpu')
def numba_py(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

In [5]:
def pure_py(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

In [6]:
pure_py(1605997759)

1.541746956846918e+37

In [7]:
np.frombuffer(trcs[10000],dtype='>u4')[:100]

array([         0,          0,          0,          0,          0,
                0, 1605997759, 3269598144, 2958838464, 1916757696,
       2648078528, 3379399616, 1605997760, 1605997760, 1185436608,
       3379399488, 4003470657, 1479419201,  748098113, 4000919232,
       3269598144,  857899584,          0, 1178865857, 1785075905,
       3816737984, 1479419328, 2215843521, 1640636097, 1916757568,
       1337530945, 2648078400, 3379399616, 4110720704, 2757879872,
        887434561,  603658305,   14225601,  748098241, 2958838336,
       2227517248, 2215843521, 3706936512, 2533173825, 2966493121,
       4140255681, 1785075777, 2230068801, 2337318720, 4147910465,
       3697814209, 2805276353, 1337530945, 2337318848, 2043336000,
        887434561, 3505978048, 3856479425,  437338560, 2374508609,
        126578496, 2227517248, 2757879872,  603658433,  889986497,
       1481970753,  748098113,          0, 1916757568, 4110720704,
        300553409, 3379399488, 1178865729, 3085416896, 2227517

In [8]:
# uses a numba vectorized version of the python function. effectively precompiles to machine code.
numba_py(np.frombuffer(trcs[10000],dtype='>u4'))[:100]
# %timeit -n 100 -r 7 ibmpy_vec(np.frombuffer(data,dtype='>u4'))

  numba_py(np.frombuffer(trcs[10000],dtype='>u4'))[:100]


array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  1.54174696e+37, -2.26108398e+02,
       -1.95435895e-20,             inf, -6.01157041e-43, -2.94060032e+10,
        1.54174708e+37,  1.54174708e+37,  1.10314880e+07, -2.94054789e+10,
                  -inf,  1.42813477e+28,  4.88137711e-25,            -inf,
       -2.26108398e+02,  2.99316033e-17,  0.00000000e+00,  4.46073700e+06,
                   inf,            -inf,  1.42819474e+28, -0.00000000e+00,
                   inf,             inf,  8.33627795e+17, -6.01157041e-43,
       -2.94060032e+10,            -inf, -7.36572107e-35,  3.18046663e-15,
        1.18064180e-35,  0.00000000e+00,  4.88144022e-25, -1.95431759e-20,
       -0.00000000e+00, -0.00000000e+00, -4.93597395e+33, -0.00000000e+00,
       -4.42771203e-20,            -inf,             inf, -0.00000000e+00,
       -0.00000000e+00,            -inf, -2.11275791e+33, -1.63689046e-31,
        8.33627795e+17, -

In [None]:
times_for_numba=[]
def ff5(trcs,n):
    for i in range(10000,10000+n):
        ibmpy_vec(np.frombuffer(trcs[i],dtype='>u4'))
for n in range(1,1000,10):
    tim4 = %timeit -o -n 100 -r 7 ff5(trcs,n)
    times_for_numba.append((n, tim4.average,tim4.stdev))
times_for_numba

In [10]:
ibm32cupy = cp.RawKernel(r'''
extern "C" __global__
void ibm32cupy(const unsigned int* x1, float* y) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int x = x1[tid];
    if (x != 0){
        int sign = ((x1[tid] >> 31) & 0x01) * (-2) + 1;
        int exponent = (x1[tid] >> 24) & 0x7F;
        int tmp = 4 * (exponent - 64);
        double p;
        if (tmp < 0) {
            int po2 = 1 << (abs(tmp));
            p = (double)(1.0/po2);
        }
        else{
            p = 1 << tmp;
        }
        int mantissa = x1[tid] & 0x00ffffff;
        float frac = ((float)mantissa / 0x1000000);
        y[tid] = sign * frac * p;
    }
    else{
        y[tid] = 0.0;
    }    
}
''', 'ibm32cupy')

In [11]:
# uses cupy with a raw kernel wriiten in cuda. memory output must be preallocated
x1 = cp.array(np.frombuffer(data,dtype='>u4'),dtype=cp.uint32)
arr = cp.zeros(x1.size, dtype=cp.float32)
%timeit ibm32cupy((x1.size,), (1,), (x1, arr))

2.65 µs ± 27.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [12]:
arr[:100]

array([ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
        0.0000000e+00,  0.0000000e+00,  0.0000000e+00, -2.2610840e+02,
                 -inf,  0.0000000e+00,           -inf, -0.0000000e+00,
        0.0000000e+00,  0.0000000e+00,  1.1031488e+07, -0.0000000e+00,
       -0.0000000e+00,  0.0000000e+00,            inf, -0.0000000e+00,
       -2.2610840e+02,            inf,  0.0000000e+00,  4.4607370e+06,
        0.0000000e+00, -0.0000000e+00,  0.0000000e+00,           -inf,
        0.0000000e+00,  0.0000000e+00,  0.0000000e+00,           -inf,
       -0.0000000e+00, -0.0000000e+00,           -inf,            inf,
                  inf,            inf,            inf,           -inf,
                 -inf,           -inf, -0.0000000e+00,           -inf,
                 -inf, -0.0000000e+00,  0.0000000e+00,           -inf,
                 -inf, -0.0000000e+00, -0.0000000e+00,           -inf,
        0.0000000e+00,           -inf,  0.0000000e+00,            inf,
      

In [14]:
i2f(np.frombuffer(trcs[10000],dtype=">u4"))[:100]

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  1.54174696e+37, -2.26108398e+02,
       -1.95435895e-20,             inf, -6.01157041e-43, -2.94060032e+10,
        1.54174708e+37,  1.54174708e+37,  1.10314880e+07, -2.94054789e+10,
                  -inf,  1.42813477e+28,  4.88137711e-25,            -inf,
       -2.26108398e+02,  2.99316033e-17,  0.00000000e+00,  4.46073700e+06,
                   inf,            -inf,  1.42819474e+28, -0.00000000e+00,
                   inf,             inf,  8.33627795e+17, -6.01157041e-43,
       -2.94060032e+10,            -inf, -7.36572107e-35,  3.18046663e-15,
        1.18064180e-35,  0.00000000e+00,  4.88144022e-25, -1.95431759e-20,
       -0.00000000e+00, -0.00000000e+00, -4.93597395e+33, -0.00000000e+00,
       -4.42771203e-20,            -inf,             inf, -0.00000000e+00,
       -0.00000000e+00,            -inf, -2.11275791e+33, -1.63689046e-31,
        8.33627795e+17, -

In [15]:
def ibmpython(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

In [17]:
np.array([ibmpython(x) for x in np.frombuffer(data,dtype=">u4")],dtype=np.float32)[:100] == i2f(np.frombuffer(trcs[10000],dtype=">u4"))[:100]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [20]:
i2f(np.frombuffer(trcs[10000],dtype=">u4"))[:100] == arr[:100].get()

array([ True,  True,  True,  True,  True,  True, False,  True, False,
       False, False, False, False, False,  True, False, False, False,
       False, False,  True, False,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True,  True, False, False, False, False, False, False, False,
       False])

In [21]:
numba_py(np.frombuffer(trcs[10000],dtype='>u4'))[:100] == i2f(np.frombuffer(trcs[10000],dtype=">u4"))[:100]

  numba_py(np.frombuffer(trcs[10000],dtype='>u4'))[:100] == i2f(np.frombuffer(trcs[10000],dtype=">u4"))[:100]


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [None]:
final = []
for x in list(zip(times_for_regular_python,times_for_vectorize_python,times_for_segyio,times_for_i2f,times_for_numba,times_for_cupy)):
    tmp = [x[0][0]]
    for y in x:
        tmp.append(y[1])
    final.append(tmp)

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(final, columns=["N","python","vec_python","segyio","i2f","numba","cupy"])

In [None]:
import matplotlib.pyplot as plt

In [None]:
cols = df.columns[1:]
for col in cols:
    plt.plot(df["N"],df[col]*1000, label=col)
plt.legend()
plt.xlabel("Number of traces converted")
plt.ylabel("total time (ms)")

In [None]:
cols = df.columns[3:]
for col in cols:
    plt.plot(df["N"],df[col]*1000, label=col)
plt.legend()
plt.xlabel("Number of traces converted")
plt.ylabel("total time (ms)")

In [None]:
df2= df.copy()
for col in df2.columns[1:]:
    df2[col] = np.log10((df2[col])*100000)
for col in df2.columns[3:]:
    plt.plot(df2["N"],df2[col], label=col)
plt.legend()
plt.xlabel("Number of traces converted")
plt.ylabel("Log10 total time (us) ")