In [25]:
import os
from datetime import datetime, time, date
from filemanager import file_ext_search as fes
from dataclasses import dataclass, field
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

In [26]:
@dataclass
class SegyPosFile:
    name: str
    path: str
    fin_traceno: int = 0
    datetime: list = field(default_factory=list)
    traceno: list = field(default_factory=list)
    cdp_x: list = field(default_factory=list)
    cdp_x_smoothed: list = field(default_factory=list)
    cdp_y_smoothed: list = field(default_factory=list)
    cdp_x_cartesian_smoothed: list = field(default_factory=list)
    cdp_y_cartesian_smoothed: list = field(default_factory=list)
    cdp_y: list = field(default_factory=list)
    year: list = field(default_factory=list)
    day: list = field(default_factory=list)
    hour: list = field(default_factory=list)
    minute: list = field(default_factory=list)
    second: list = field(default_factory=list)

In [27]:
def read_segypos(pos_files, finedict, baddict, posobj_list, year, utm_coords=False):
    for pos_file in pos_files:
        segy_name = os.path.splitext(os.path.basename(pos_file))[0]
        pos_obj = SegyPosFile(name=segy_name, path=pos_file)
        
        number = 0
        has_error = False
        was_before = False
        
        with open(pos_file, 'r') as file1:
            file_content = file1.read().splitlines()
            
            for line in file_content[1:]:
                line_content = line.split()

                try:
                    
                    if utm_coords:
                        if int(line_content[3]) != year:
                            raise RuntimeError('BadYear')
                        
                        elif int(int(line_content[4])) > 370 and int(line_content[4]) < 0:
                            raise RuntimeError('BadDay')
                        
                        elif float(line_content[1]) < 50000 and float(line_content[1]) > 500000:
                            raise RuntimeError('BadCDP_X')
                        
                        elif float(line_content[2]) < 200000 and float(line_content[2]) > 8000000:
                            raise RuntimeError('BadCDP_Y')

                    else:
                        if int(line_content[3]) != year:
                            raise RuntimeError('BadYear')
                        
                        elif int(int(line_content[4])) > 370 and int(line_content[4]) < 0:
                            raise RuntimeError('BadDay')
                        
                        elif float(line_content[1]) < 15.0 and float(line_content[1]) > 50.0:
                            raise RuntimeError('BadCDP_X')
                        
                        elif float(line_content[2]) < 30.0 and float(line_content[2]) > 70.0:
                            raise RuntimeError('BadCDP_Y')
                    
                    ar = time(hour=int(line_content[5]),
                            minute=int(line_content[6]), second=int(line_content[7]))
                except:
                    number += 1
                    has_error = True
                    if not was_before:
                        baddict[segy_name] = pos_obj.traceno[-1]
                        was_before = True
                        
                else:
                    pos_obj.datetime.append(f'{line_content[3]}-{line_content[4]}T{line_content[5]}:{line_content[6]}:{line_content[7]}')
                    pos_obj.traceno.append(int(line_content[0]))
                    pos_obj.cdp_x.append(float(line_content[1]))
                    pos_obj.cdp_y.append(float(line_content[2]))
                    pos_obj.year.append(int(line_content[3]))
                    pos_obj.day.append(int(line_content[4]))
                    pos_obj.hour.append(int(line_content[5]))
                    pos_obj.minute.append(int(line_content[6]))
                    pos_obj.second.append(int(line_content[7]))
                    
            finedict[segy_name] = pos_obj.traceno[-1]
            
            if has_error:
                print(f'Number of bad lines in {segy_name}: {number}')
                
            posobj_list.append(pos_obj)

In [33]:
def process_track(pos_objs, transformer, window_length=201, smooth=True, utm_coords=False):
    for segy_pos_obj in pos_objs:
        window_length = window_length
        file_length = len(segy_pos_obj.cdp_x)
        
        loop = True
        while loop:
            # if window_length > file_length/4:
            #     window_length = int(window_length/4)
            if window_length < 10:
                loop = False
            else:
                loop = False
        
        if smooth is False:
            if utm_coords:
                cartesian_x = segy_pos_obj.cdp_x
                cartesian_y = segy_pos_obj.cdp_y
            else:
                cartesian_x, cartesian_y = transformer.transform(segy_pos_obj.cdp_x, segy_pos_obj.cdp_y)
            
            segy_pos_obj.cdp_x_cartesian_smoothed = cartesian_x
            segy_pos_obj.cdp_y_cartesian_smoothed = cartesian_y
        
        elif window_length < 10:
            print(f'Can not smooth file {segy_pos_obj.name}')
            
            if utm_coords:
                cartesian_x = segy_pos_obj.cdp_x
                cartesian_y = segy_pos_obj.cdp_y
            else:
                cartesian_x, cartesian_y = transformer.transform(segy_pos_obj.cdp_x, segy_pos_obj.cdp_y)
                
            if '_not_smoothed' in segy_pos_obj.name:
                pass
            else:
                segy_pos_obj.name = segy_pos_obj.name + '_not_smoothed'
            
            segy_pos_obj.cdp_x_cartesian_smoothed = cartesian_x
            segy_pos_obj.cdp_y_cartesian_smoothed = cartesian_y
        
        else:
            segy_pos_obj.cpd_x_smoothed = signal.savgol_filter(segy_pos_obj.cdp_x,window_length,3)
            segy_pos_obj.cpd_y_smoothed = signal.savgol_filter(segy_pos_obj.cdp_y,window_length,3)
            
            if utm_coords:
                cartesian_x = segy_pos_obj.cpd_x_smoothed
                cartesian_y = segy_pos_obj.cpd_y_smoothed
            else:
                cartesian_x, cartesian_y = transformer.transform(segy_pos_obj.cpd_x_smoothed, segy_pos_obj.cpd_y_smoothed)
        
            segy_pos_obj.cdp_x_cartesian_smoothed = cartesian_x.tolist()
            segy_pos_obj.cdp_y_cartesian_smoothed = cartesian_y.tolist()

def save_track(pos_objs, save_to):
    with open(save_to, 'w') as file2:
        file2.write('num_o,num_i,name,datetime,traceno,cdp_x,cdp_y,year,day,hour,minute,second\n')
        num_o = 0
        num_f = 0
        
        for segy_pos_obj in pos_objs:
            
            if '_rawpos' in segy_pos_obj.name:
                name = segy_pos_obj.name[:-7]
            else:
                name = segy_pos_obj.name
            
            for num_i,traceno in enumerate(segy_pos_obj.traceno):
                file2.write(f'{num_o},{num_i},{name},{segy_pos_obj.datetime[num_i]},{segy_pos_obj.traceno[num_i]},{segy_pos_obj.cdp_x_cartesian_smoothed[num_i]},')
                file2.write(f'{segy_pos_obj.cdp_y_cartesian_smoothed[num_i]},{segy_pos_obj.year[num_i]},{segy_pos_obj.day[num_i]},{segy_pos_obj.hour[num_i]},{segy_pos_obj.minute[num_i]},')
                file2.write(f'{segy_pos_obj.second[num_i]}\n')
                num_o += 1
                
            num_f += 1
            print(f'File {segy_pos_obj.name} is done {num_f} of {len(pos_objs)}')
            
def save_track_to_radex(pos_objs):
        
    for pos_obj in pos_objs:
        save_to = os.path.dirname(pos_obj.path)
        
        if '_rawpos' in pos_obj.name:
            name = pos_obj.name[:-7]
        else:
            name = pos_obj.name
    
        with open(os.path.join(save_to, name + '_proc.txt'), 'w') as file3:
            file3.write(f'TraceNo\tCDPX\tCDPY\n')
            for num, cdp_x in enumerate(pos_obj.cdp_x_cartesian_smoothed):
                file3.write(f'{pos_obj.traceno[num]}\t{pos_obj.cdp_x_cartesian_smoothed[num]}\t{pos_obj.cdp_y_cartesian_smoothed[num]}\n')

In [45]:
from pyproj import Proj, CRS, Transformer

crs_wgs84 = CRS.from_epsg(4326)
crs_utm35n = CRS.from_epsg(32635)
crs_utm34n = CRS.from_epsg(32634)

transformer = Transformer.from_crs(crs_wgs84, crs_utm34n, always_xy=True)

data_path = r'F:\SBP_DATA\ANS47'
# track
save_to = r'F:\SBP_DATA\\ANS47_SBP_WGS84UTM34N.txt'

utm_coords = True
year = 2020

pos_files = fes.file_ext_search('.txt', data_path, recursive=True)
print(len(pos_files))

segy_pos_objs = []
bad_data_dict = {}
fine_data_dict = {}

read_segypos(pos_files, fine_data_dict, bad_data_dict, segy_pos_objs, year, utm_coords = False)
process_track(segy_pos_objs, transformer, window_length=41, utm_coords=False)
save_track(segy_pos_objs, save_to)
save_track_to_radex(segy_pos_objs)

Searching *.txt files in directory:F:\SBP_DATA\ANS47
504
Number of bad lines in 00000000.400_rawpos: 2
File 00000000.000_rawpos is done 1 of 504
File 00000000.001_rawpos is done 2 of 504
File 00000000.002_rawpos is done 3 of 504
File 00000000.003_rawpos is done 4 of 504
File 00000000.004_rawpos is done 5 of 504
File 00000000.005_rawpos is done 6 of 504
File 00000000.006_rawpos is done 7 of 504
File 00000000.007_rawpos is done 8 of 504
File 00000000.008_rawpos is done 9 of 504
File 00000000.009_rawpos is done 10 of 504
File 00000000.010_rawpos is done 11 of 504
File 00000000.011_rawpos is done 12 of 504
File 00000000.012_rawpos is done 13 of 504
File 00000000.013_rawpos is done 14 of 504
File 00000000.014_rawpos is done 15 of 504
File 00000000.015_rawpos is done 16 of 504
File 00000000.016_rawpos is done 17 of 504
File 00000000.017_rawpos is done 18 of 504
File 00000000.018_rawpos is done 19 of 504
File 00000000.019_rawpos is done 20 of 504
File 00000000.020_rawpos is done 21 of 504
Fil

In [35]:
# Replica
has_header = False

if os.path.exists(replica_path):
    has_header = True
    print('file exists')

with open(replica_path, 'a') as file4:
    if not has_header:
        file4.write('prof_folder\tfile_name\tfile_name_proc\tprof_name\n')
    
    for sgy_file in sgy_files:
        filename = os.path.splitext(os.path.basename(sgy_file))[0]
        prof_folder = data_path
        prof_name = os.path.split(data_path)[1]
        filename_proc = filename + '_proc'
        
        file4.write(f'{prof_folder}\t{filename}\t{filename_proc}\t{prof_name}\n')


Searching *.sgy files in directory:D:\SBP and Seismic\abp49_SBP\abp49_medu_taran_11
file exists


In [None]:
# Sorting files
SLF_objs = []
PHF_objs= []

for segy_pos_obj in segy_pos_objs:
    if 'SLF' in segy_pos_obj.name:
        SLF_objs.append(segy_pos_obj)
    elif 'PHF' in segy_pos_obj.name:
        PHF_objs.append(segy_pos_obj)

In [24]:
a = 'aaa_rawpos'


if '_rawpos' in a:
    print(a[:-7])

aaa
