In [1]:
!wget http://s3.amazonaws.com/open.source.geoscience/open_data/newzealand/Taranaiki_Basin/Keri_3D/Kerry3D.segy

--2020-12-30 00:33:03--  http://s3.amazonaws.com/open.source.geoscience/open_data/newzealand/Taranaiki_Basin/Keri_3D/Kerry3D.segy
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.245.70
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.245.70|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1107042960 (1.0G) [application/octet-stream]
Saving to: ‘Kerry3D.segy’


2020-12-30 00:33:59 (19.0 MB/s) - ‘Kerry3D.segy’ saved [1107042960/1107042960]



In [2]:
import numpy as np
from numba import vectorize, cuda
from struct import unpack
from ibm2ieee import ibm2float32 as i2f
import segyio
from segyio import _segyio
import cupy as cp
%config Completer.use_jedi = False

In [3]:
class ReadBinHdr(object):

    ''' Read binary trace header for a trace '''

    def __init__(self, bh):
        if len(bh) != 400:
            print("Binary header should be 400 bytes long")
        else:
            self.jobid = unpack(">i", bh[0:4])[0]
            self.line = unpack(">i", bh[4:8])[0]
            self.reel = unpack(">i", bh[8:12])[0]
            self.numtrcens = unpack(">h", bh[12:14])[0]
            self.numauxtrcs = unpack(">h", bh[14:16])[0]
            self.sampint = unpack(">h", bh[16:18])[0]
            self.sampint2 = unpack(">h", bh[18:20])[0]
            self.samppertrc = unpack(">h", bh[20:22])[0]
            self.samppertrc2 = unpack(">h", bh[22:24])[0]
            self.datasampcode = unpack(">h", bh[24:26])[0]
            self.ensfold = unpack(">h", bh[26:28])[0]
            self.sortcode = unpack(">h", bh[28:30])[0]
            self.dis_units = unpack(">h", bh[54:56])[0]
            self.segyformat = unpack(">h", bh[300:302])[0] // 256
            self.lengthflag = unpack(">h", bh[302:304])[0]
            self.numexthdrs = unpack(">h", bh[304:306])[0]
    
class ReadTrcHdr(object):
    ''' Read Trace header '''
    def __init__(self, bh):
        if len(bh) != 240:
            print("Trace Header should be 240 bytes long")
        else:
            self.inline = unpack(">i", bh[220:224])[0]
            self.xline = unpack(">i", bh[20:24])[0]
            self.sou_x = unpack(">i", bh[72:76])[0]
            self.sou_y = unpack(">i", bh[76:80])[0]

In [4]:
%%time
hdrs = []
trcs = []
with open("Kerry3D.segy","rb") as f:
    EBCDIC = f.read(3200)
    BIN = ReadBinHdr(f.read(400))
    samples_per_trace = BIN.samppertrc
    sample_interval = BIN.sampint
    idx=1
    print("Starting loop....")
    while True:
        tmp = f.read(240)
        if not tmp:
            print("End scanning .......")
            break
        hdr = ReadTrcHdr(tmp).__dict__
        trc = f.read(samples_per_trace*4)
#         arr = ibmtoieee(np.frombuffer(trc,dtype='>u4'))
        hdrs.append(hdr)
        trcs.append(trc)
        idx+=1
data = trcs[10000] #data to perform analysis on
data[:100]

Starting loop....
End scanning .......
CPU times: user 542 ms, sys: 413 ms, total: 956 ms
Wall time: 961 ms


b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbf\x94\xb9_\xc0\x1b\xe2\xc2\xc0J\\\xb0\xc0f?r\xc0x\xd6\x9d\xc0\x8bm\xc9\xc0\x94\xb9_\xc0\x94\xb9_\xc0S\xa8F@\x8bm\xc9A\x1d\xa0\xeeA%.XA\x12\x97,\xc0.y\xee\xc0\x1b\xe2\xc2@\x82"3\x00\x00\x00\x00\xc1\x10DF\xc1\x18fj'

In [5]:
# the Cuda Kernel code that runs on every unsigned int in array
ibm32cupy = cp.RawKernel(r'''
extern "C" __global__
void ibm32cupy(const unsigned int* x1, float* y) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int x = x1[tid];
    if (x != 0){
        int sign = ((x1[tid] >> 31) & 0x01) * (-2) + 1;
        int exponent = (x1[tid] >> 24) & 0x7F;
        int tmp = 4 * (exponent - 64);
        double p;
        if (tmp < 0) {
            int po2 = 1 << (abs(tmp));
            p = (double)(1.0/po2);
        }
        else{
            p = 1 << tmp;
        }
        int mantissa = x1[tid] & 0x00ffffff;
        float frac = ((float)mantissa / 0x1000000);
        y[tid] = sign * frac * p;
    }
    else{
        y[tid] = 0.0;
    }    
}
''', 'ibm32cupy')
# vectorized version of regular python function
@vectorize(['float32(uint32)'])
def ibmpy_vec(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

#reguar python
def ibmpython(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

In [6]:
# intuitive first attempt of converting. apply a function to every element of an array in loop
%timeit np.array([ibmpython(x) for x in np.frombuffer(data,dtype=">u4")],dtype=np.float32)

5.12 ms ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
#vectorized version of previous attempt. 
%timeit np.vectorize(ibmpython)(np.frombuffer(data,dtype='>u4'))

304 µs ± 1.15 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
# uses the current industry method of segyio module
offset = 3600 + 10001 * (240 + (samples_per_trace * 4))
d = np.memmap('Kerry3D.segy', offset = offset, dtype = np.uint32)
%timeit segyio.tools.native(d[240:240+samples_per_trace])

8.47 µs ± 54.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
#uses pypi module ibm2ieee which is cython under the hood. https://pypi.org/project/ibm2ieee/
%timeit i2f(np.frombuffer(data,dtype=">u4"))

3.78 µs ± 6.36 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [10]:
# uses a numba vectorized version of the python function. effectively precompiles to machine code.
%timeit ibmpy_vec(np.frombuffer(data,dtype='>u4'))

3.12 µs ± 4.86 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
# uses cupy with a raw kernel wriiten in cuda. memory output must be preallocated
x1 = cp.array(np.frombuffer(data,dtype='>u4'),dtype=cp.uint32)
arr = cp.zeros(x1.size, dtype=cp.float32)
%timeit ibm32cupy((x1.size,), (1,), (x1, arr))

2.73 µs ± 3.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [12]:
ans = np.array([ibmpython(x) for x in np.frombuffer(data,dtype=">u4")],dtype=np.float32)

In [32]:
np.sum((ans - np.vectorize(ibmpython)(np.frombuffer(data,dtype='>u4')))**2)

0.0

In [37]:
np.sum((ans - segyio.tools.native(d[240:240+samples_per_trace],format=1))**2)

1089.5319

In [34]:
np.sum((ans - i2f(np.frombuffer(data,dtype=">u4")))**2)

0.0

In [35]:
np.sum((ans - ibmpy_vec(np.frombuffer(data,dtype='>u4')))**2)

0.0

In [36]:
cp.sum((cp.array(ans,dtype=cp.float32) - arr)**2)

array(0., dtype=float32)

In [46]:
d[239:240+samples_per_trace][:120]

memmap([2915528000, 1553341121, 1847443520, 1716396609, 3260371648,
           7411137, 1038031041, 1502600000, 1038030913, 3787002817,
        3432793152, 1502599999, 2068651201, 1553340993, 1888818241,
        3950058433, 3605214784, 2756382785, 2019865280, 1847443520,
        2411539265, 1140979648, 4120524993, 1502600127, 2915528000,
        1502600128, 2726329152, 3260371648, 3787002817, 3605214912,
        2404128321, 2926849345, 2756382785, 2756382913, 1502600128,
        2919438529, 2381485760, 1201086529, 1390285377, 3787002817,
        2919438529, 3260371648, 1330178368,  858198337, 3432793280,
        1502600128, 3777636672, 3787002689, 3432793280, 3434748353,
        3254915649, 1390285377, 1879452353, 1675021760, 4113113921,
        1502600127, 1888818369, 3787002689, 3434748225,  505944001,
        2567184065, 1502600000,  170466625, 3777636672, 1888818369,
        1502600127, 3260371648,  170466753, 1888818241, 3950058305,
        2567184065, 3260371648, 1038030913, 2915

In [42]:
np.frombuffer(data,dtype=">u4")[:100]

array([         0,          0,          0,          0,          0,
                0, 3214195039, 3223052994, 3226098864, 3227926386,
       3229144733, 3230363081, 3230972255, 3230972255, 3226708038,
       1082879433, 1092460782, 1092955736, 1091737388, 3224271342,
       3223052994, 1082270259,          0, 3239068742, 3239601770,
       3234627299, 3223662168, 3239449476, 3240020577, 1080442738,
       1092270415, 1081661085, 3230363081, 3231581429, 1088971172,
       1093526836, 1092156195, 3239106816, 3239221036, 1078615216,
       1077396868, 3239449476, 3227317212, 1093336470, 3239563696,
       3242838006, 1092118122, 1093069956, 1084706955, 1092041975,
       3240782044, 3239982503, 1092270415, 3232190603, 1087752825,
       1093526836, 3237673168, 3241467365, 3225489690, 1092651149,
       1081051911, 1077396868, 1088971172, 3239639843, 3239906357,
       1091851608, 1091737388,          0, 1080442738, 3231581429,
       3239373329, 1082879433, 1091585094, 3233408951, 3224880