# Poland 2D Analysis
Step through the segy file to analyze the headers for further analysis.

In [1]:
# standard
import os
from pathlib import Path
import textwrap

# third party
# import numpy

# local
from poland2d_context import segytools


In [2]:
TEXTHEADERLENGTH = 3200
FILEHEADERLENGTH = 400
TRCHEADERLENGTH = 240
ENDIANESS = 'big'
TEXTENCODE = 'ebcdic'

segyfile = "data/Line_001.sgy"
pathsegyfile = Path(str(segyfile))
assert (pathsegyfile.is_file())

segyfilesize = os.path.getsize(segyfile)

bsgyout = bytes()
hdr4df = {}

# 'rb' is "read bytes"
with open(segyfile, 'rb') as fobj:
    # read the first 3200 bytes.
    # This will always be 3200 byte textual file header
    b_text_header = fobj.read(TEXTHEADERLENGTH)
    # print(type(b_text_header))

    textheader = str()
    if TEXTENCODE == 'ebcdic':
        textheader = b_text_header.decode(encoding='cp500')
    elif TEXTENCODE == 'ascii':
        textheader = b_text_header.decode(encoding='utf-8')

    current_location = fobj.tell()
    fobj.close()

for w in textwrap.wrap(textheader, width=80):
    print(w)

C 1 CLIENT                        COMPANY                       CREW NO
C 2 LINE:  LINE_001           AREA                        MAP ID
C 3 REEL NO           DAY-START OF REEL     YEAR      OBSERVER
C 4 INSTRUMENT: MFG            MODEL            SERIAL NO
C 5 DATA TRACES/RECORD: 282  AUXILIARY TRACES/RECORD:  2    CDP FOLD
C 6 SAMPLE INTERNAL:  4MS     SAMPLES/TRACE: 750  BITS/IN      BYTES/SAMPLE 4
C 7 RECORDING FORMAT        FORMAT THIS REEL: SEGY   MEASUREMENT SYSTEM
C 8 SAMPLE CODE: FLOATING PT     FIXED PT     FIXED PT-GAIN     CORRELATED
C 9 GAIN  TYPE: FIXED     BINARY     FLOATING POINT     OTHER
C10 FILTERS: ALIAS     HZ  NOTCH     HZ  BAND    -     HZ  SLOPE    -    DB/OCT
C11 SOURCE: TYPE            NUMBER/POINT        POINT INTERVAL
C12     PATTERN:                           LENGTH        WIDTH
C13 SWEEP: START     HZ  END     HZ  LENGTH      MS  CHANNEL NO     TYPE
C14 TAPER: START LENGTH       MS  END LENGTH       MS  TYPE
C15 SPREAD: OFFSET        MAX DISTANCE        G

In [3]:
from segytools.segy_file_header import SegyFileHeaderRev2

with open(segyfile, 'rb') as fobj:
    fobj.seek(current_location)
    fileheader = SegyFileHeaderRev2()
    b_file_header = fobj.read(FILEHEADERLENGTH)
    fileheader.set_file_header_values(bsgy=b_file_header, endianess=ENDIANESS)
    
    current_location = fobj.tell()
    fobj.close()

print(fileheader)

segy file headers
name: jobid, description: job identification number, start byte: 1, byte length: 4, value: 0
name: lineno, description: line number, start byte: 5, byte length: 4, value: 0
name: reelno, description: reel number, start byte: 9, byte length: 4, value: 0
name: ntrcens, description: number of data traces per ensemble, start byte: 13, byte length: 2, value: -31073
name: ntrcaux, description: number of auxiliary traces per ensemble, start byte: 15, byte length: 2, value: 0
name: smpint, description: sample interval in microseconds, start byte: 17, byte length: 2, value: 2000
name: smpinto, description: sample interval in microseconds or original recording, start byte: 19, byte length: 2, value: 2000
name: numsmp, description: number of samples per data trace, start byte: 21, byte length: 2, value: 1501
name: numsmpo, description: number of samples per data trace or original recording, start byte: 23, byte length: 2, value: 8193
name: dsfmt, description: data sample format 

In [4]:
from segytools.segy_trace_header import SegyTraceHeaderRev2

hdr = SegyTraceHeaderRev2()

with open(segyfile, 'rb') as fobj:
    fobj.seek(current_location)
    bsgy = fobj.read(TRCHEADERLENGTH)
    hdr.set_trace_header_values(bsgy=bsgy, endianess=ENDIANESS)

    current_location = fobj.tell()
    fobj.close()

print(hdr)


segy trace header values
name: trseql, description: trace_sequence_number_within_line, start byte: 1, byte length: 4, value: 1
name: trseqf, description: trace_sequence_number_within_segy_file, start byte: 5, byte length: 4, value: 1
name: ffid, description: original_field_record_number, start byte: 9, byte length: 4, value: 231
name: trffid, description: trace_number_within_the_original_field_record, start byte: 13, byte length: 4, value: -1
name: shot, description: energy_source_point_number, start byte: 17, byte length: 4, value: 32
name: ens, description: ensemble_number, start byte: 21, byte length: 4, value: 0
name: trcens, description: trace_number_within_the_ensemble, start byte: 25, byte length: 4, value: 1
name: trid, description: trace_identification_code, start byte: 29, byte length: 2, value: Unknown
name: vsum, description: number_of_vertically_summed_traces_yielding_this_trace, start byte: 31, byte length: 2, value: 0
name: hsum, description: number_of_horizontally_stac

Some segy files have additional auxilary traces with a different number of samples from the rest of the data. Therefore, the trace data length in number of bytes might be different and each trace must be read sequentially.

In [5]:
from segytools.toolkit import read_binary_values

TRCDATALENGTH = hdr.numsmp.value * fileheader.bytes_per_sample()

with open(segyfile, 'rb') as fobj:
    fobj.seek(current_location)
    bsgy = fobj.read(TRCDATALENGTH)

    trace_values = read_binary_values(bsgy, fileheader.numsmp.value, fileheader.ctype(), ENDIANESS)
    # tracenp = numpy.asarray(trace_values, dtype=numpy.float32, order='C')
    
    current_location = fobj.tell()
    fobj.close()


In [6]:
trace_counter = 2

with open(segyfile, 'rb') as fobj:
    fobj.seek(current_location)

    while fobj.tell() < segyfilesize:
        print(f'reading trace {trace_counter}')
        bsgy = fobj.read(TRCHEADERLENGTH)
        hdr.set_trace_header_values(bsgy=bsgy, endianess=ENDIANESS)
        if hdr.numsmp.value != fileheader.numsmp.value:
            print('inconsistent number of samples in file.')
        bsgy = fobj.read(TRCDATALENGTH)
        trace_counter += 1
    
    current_location = fobj.tell()
    fobj.close()

reading trace 2
reading trace 3
reading trace 4
reading trace 5
reading trace 6
reading trace 7
reading trace 8
reading trace 9
reading trace 10
reading trace 11
reading trace 12
reading trace 13
reading trace 14
reading trace 15
reading trace 16
reading trace 17
reading trace 18
reading trace 19
reading trace 20
reading trace 21
reading trace 22
reading trace 23
reading trace 24
reading trace 25
reading trace 26
reading trace 27
reading trace 28
reading trace 29
reading trace 30
reading trace 31
reading trace 32
reading trace 33
reading trace 34
reading trace 35
reading trace 36
reading trace 37
reading trace 38
reading trace 39
reading trace 40
reading trace 41
reading trace 42
reading trace 43
reading trace 44
reading trace 45
reading trace 46
reading trace 47
reading trace 48
reading trace 49
reading trace 50
reading trace 51
reading trace 52
reading trace 53
reading trace 54
reading trace 55
reading trace 56
reading trace 57
reading trace 58
reading trace 59
reading trace 60
readi