In [2]:
import numpy as np
from numba import vectorize
from struct import unpack
import cupy as cp
import pandas as pd

In [24]:
class ReadBinHdr(object):

    ''' Read binary trace header for a trace '''

    def __init__(self, bh):
        if len(bh) != 400:
            print("Binary header should be 400 bytes long")
        else:
            self.jobid = unpack(">i", bh[0:4])[0]
            self.line = unpack(">i", bh[4:8])[0]
            self.reel = unpack(">i", bh[8:12])[0]
            self.numtrcens = unpack(">h", bh[12:14])[0]
            self.numauxtrcs = unpack(">h", bh[14:16])[0]
            self.sampint = unpack(">h", bh[16:18])[0]
            self.sampint2 = unpack(">h", bh[18:20])[0]
            self.samppertrc = unpack(">h", bh[20:22])[0]
            self.samppertrc2 = unpack(">h", bh[22:24])[0]
            self.datasampcode = unpack(">h", bh[24:26])[0]
            self.ensfold = unpack(">h", bh[26:28])[0]
            self.sortcode = unpack(">h", bh[28:30])[0]
            self.dis_units = unpack(">h", bh[54:56])[0]
            self.segyformat = unpack(">h", bh[300:302])[0] // 256
            self.lengthflag = unpack(">h", bh[302:304])[0]
            self.numexthdrs = unpack(">h", bh[304:306])[0]

    def __str__(self):
        return "Job ID : {} \nLine Number : {} \nReel Number : {}\
                 \nNumber of traces per ensemble : {} \nNumber of Aux Traces : {}\
                 \nSample interval : {}\nField Sample Interval : {}\nSample per trace :{}\
                 \nField Samples per Trace : {} \nSample Format : {}\nEnsemble Fold : {}\
                 \nTrace Sorting Code : {}\nMeasurement units(1-Meters 2-Feet) : {}\
                 \nSEGY Format : {}\nLength Flag(0-Variable 1-Fixed) : {}\
                 \nNumber of Extended Text : {}".format(self.jobid, self.line, self.reel, \
                 self.numtrcens, self.numauxtrcs, self.sampint, self.sampint2, self.samppertrc, \
                 self.samppertrc2, self.datasampcode, self.ensfold, self.sortcode, self.dis_units, \
                 self.segyformat, self.lengthflag, self.numexthdrs)
    
class ReadTrcHdr(object):
    ''' Read Trace header '''
    def __init__(self, bh):
        if len(bh) != 240:
            print("Trace Header should be 240 bytes long")
        else:
            self.inline = unpack(">i", bh[220:224])[0]
            self.xline = unpack(">i", bh[20:24])[0]
            self.sou_x = unpack(">i", bh[72:76])[0]
            self.sou_y = unpack(">i", bh[76:80])[0]

    def __str__(self):
        return "##########################START################################\
                \nLine Sequence Number : {}\nReel Sequence Number : {}\
                \nOriginal field record number : {}\nTrace number within the original field record : {}\
                \nEnergy source point number : {}\
                \nYear : {}\
                \nDay : {}\
                \nHour : {}\
                \nMinute : {}\
                \nSecond : {}\
                \n##############################END###########################".format(*list(self.__dict__.values()))

ibm2ieee = cp.RawKernel(r'''
extern "C" __global__
void ibm2ieee(const unsigned int* x1, float* y) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int x = x1[tid];
    if (x != 0){
        int sign = ((x1[tid] >> 31) & 0x01) * (-2) + 1;
        int exponent = (x1[tid] >> 24) & 0x7F;
        int tmp = 4 * (exponent - 64);
        double p;
        if (tmp < 0) {
            int po2 = 1 << (abs(tmp));
            p = (double)(1.0/po2);
        }
        else{
            p = 1 << tmp;
        }
        int mantissa = x1[tid] & 0x00ffffff;
        float frac = ((float)mantissa / 0x1000000);
        y[tid] = sign * frac * p;
    }
    else{
        y[tid] = 0.0;
    }    
}
''', 'ibm2ieee')

@vectorize(['float32(uint32)'])
def ibmtoieee(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)

#reguar python
def ibmpython(data):
    if data == 0:
        return 0.0
    sign = data >> 31 & 0x01
    exponent = data >> 24 & 0x7f
    mantissa = (data & 0x00ffffff) / float(pow(2, 24))
    return (1 - 2 * sign) * mantissa * pow(16.0, exponent - 64)
#nupy version
def ibm32numpy(data):
    sign = np.bitwise_and(np.right_shift(data,31),1) * (-2) +1
    exponent = np.bitwise_and(np.right_shift(data,24), int('0x7F',0))
    p = np.power(16.0, exponent - 64)
    mantissa = np.bitwise_and(data, int('0x00ffffff', 0))
    frac = mantissa / int('0x1000000', 0)
    return sign * frac * p

In [15]:
hdrs = []
trcs = []
with open("Kerry3D.segy","rb") as f:
    EBCDIC = f.read(3200)
    BIN = ReadBinHdr(f.read(400))
    samples_per_trace = BIN.samppertrc
    sample_interval = BIN.sampint
    idx=1
    print("Starting loop....")
    while True:
        tmp = f.read(240)
        if not tmp:
            print("End scanning .......")
            break
        hdr = ReadTrcHdr(tmp).__dict__
        trc = f.read(samples_per_trace*4)
#         arr = ibmtoieee(np.frombuffer(trc,dtype='>u4'))
        hdrs.append(hdr)
        trcs.append(trc)
        idx+=1

Starting loop....
End scanning .......


In [17]:
hdrs[10000]

{'inline': 523, 'xline': 503, 'sou_x': 1703290, 'sou_y': 5593917}

In [21]:
data = trcs[10000]

In [25]:
#regular python
%timeit np.vectorize(ibmpython)(np.frombuffer(data,dtype='>u4'))

414 µs ± 1.63 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
#numpy version
%timeit np.vectorize(ibm32numpy)(np.frombuffer(data,dtype='>u4'))

16.3 ms ± 52.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
%timeit ibmtoieee(np.frombuffer(data,dtype='>u4'))

4.24 µs ± 2.31 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [29]:
x1 = cp.array(np.frombuffer(data,dtype='>u4'),dtype=cp.uint32)
arr = cp.zeros(x1.size, dtype=cp.float32)
%timeit ibm2ieee((x1.size,), (1,), (x1, arr))

4.1 µs ± 12.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
