# Convert AGS4 files to LAS

### Load libs

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import os
from python_ags4 import AGS4
import lasio
from datetime import datetime

### Function to scan folder (poss. recursively) for files with a specific extension

In [None]:
def find_files(path, recursive=False, extension='', case_sensitive=False):
    '''Function to scan a path (optional recursive) to find all files with a specific
    extension.
    Mandatory argument:
    :path: path/directory to look in for files 
    Optional arguments:
    :recursive: decends into directories. Default = False
    :extension: file extension. Default = ''
    :case_sensitive. Treats extention as case sensitive. Default = False
    Returns a list with all files matching the extension. Files/elements contain the full path.'''
    file_list = list()
    
    for f in os.listdir(path):
        fpath = os.path.join(path,f)
        if os.path.isdir(fpath):
            if recursive == True:
                file_list.extend(find_files(fpath, recursive=recursive, extension=extension, case_sensitive=case_sensitive))
        else:
            if case_sensitive == True:
                if fpath.endswith(extension):
                    file_list.append(fpath)
            else:
                if fpath.lower().endswith(extension.lower()):
                    file_list.append(fpath)
    return(file_list)

### Only the following tables (GROUPS) and columns (HEADERS) will be extracted from the AGS file for each location (if present). (this can be changed later, but for starters only the curves that are present in typical plots were extracted)

#### The following codes (depth/variable pairs) need to be collected for the LAS file:
<div class="alert alert-block alert-success">
<span style="font-family:Courier New">
- SCPT -- SCPT_RES:  qc<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_FRES: fs<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_QT:   qt<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_NQT:  Qt<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_BQ:   Bq<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_NFR:  Fr<br>
- SCDT -- SCDT_PWP2: u2<br>
- SCPP -- SCPP_CIC:  Ic(n)<br>
</span>
</div>

Create a list with these codes (note that the GROUP/table names appear as prefix in column headers. Use the column headers as code in the list - e.g. "SCPT_RES")

In [None]:
codes = ['SCPT_RES', 'SCPT_FRES', 'SCPT_PWP2', 'SCDT_PWP2', 'SCPT_QT', 'SCPT_NQT', 'SCPT_BQ', 'SCPT_NFR', 'SCPP_CIC']

Create a list with code that will be used to fill the "PARAMETER" section in the LAS file

In [None]:
parameters = ['LOCA_NATN','LOCA_NATE', 'LOCA_GL', 'LOCA_FDEP', 'LOCA_WDEP', 'LOCA_DATM', 'LOCA_LAT', 'LOCA_LON']

#### Some other user settings: all tables with depth-related data in AGS have a column with suffix "_DPTH". However, we only need one in the final LAS file: col_depth=...

In [None]:
depth_suffix = '_DPTH'
depth_col = 'DEPTH'

In [None]:
group_headings_file = '../background/group_headings_dict_file.csv'
group_headings = pd.read_csv(group_headings_file, sep='|', index_col=0)

In [None]:
def create_group_headings_dictionary(group_headings=group_headings):
    '''Creates a dictionaty with GROUP (=names of tables in the AGS file)
    a dictionary as value. The latter dictionary contains column names (=keys)
    within the GROUP and description of the columns as values.
    Returns a (nested) dictionary. First level: GROUP = key, value is (second level)
      dictionary of all columns-headers in GROUP (the keys in the nested dictionary),
      with the description of the column-headers as values.'''
    group_headings_dict = dict()
    for g in group_headings.index.unique():
        tmp_dict = dict()
        for h in group_headings.loc[group_headings.index==g, 'Heading'].unique():
            try:
                tmp_dict[h]=group_headings.loc[(group_headings.index==g) & (group_headings['Heading']==h),'Description'].values[0]
            except:
                pass
        group_headings_dict[g]= tmp_dict
    return group_headings_dict

In [None]:
group_headings_dict = create_group_headings_dictionary(group_headings=group_headings)

Convert to dictinaty ({code: description})

In [None]:
codes = dict([(c, group_headings_dict[c[:4]][c]) for c in codes])

In [None]:
parameters = dict([(p, group_headings_dict[p[:4]].get(p,p)) for p in parameters])

#### Functions

In [None]:
def convert_data_for_single_location(loca, file, tables):
    '''Creates a LAS file for data picked up for a one location
    Mandatory input:
    :loca: unique location name, used in the AGS file
    :file: name of the AGS4 file (used to add as meta-data to the LAS file and in the logging)
    Returns one LAS file per location with "{loca}_({ags4_file}).LAS" as filename. Besides, this
    function returns a code (0, 1, or 2 for OK, WARNING, resp ERROR) for each location/file > LAS
    combination. This info is written to a log in the LAS export location for later reference.'''
    try:
        process_code = 0
        _dfs_list = pick_up_depth_related_data(loca, codes)
        df = check_whether_different_dataframes_depth_related_data_are_on_same_depths(_dfs_list)
        las = create_and_fill_LAS_file(df, tables, loca, codes, parameters, file)
        filename = loca + '__' + file.split(os.sep)[-1] + '__.LAS'
        las.write(os.path.join(export_folder,filename), version=2)
        print(f'...[OK!] LAS file created for {loca} in {file.split(os.sep)[-1]}...\n')
    except:
        if len(_dfs_list)==0 or _dfs_list==None:
            process_code = 1
            print(f'...[WARNING] no logs found for {loca} in {file.split(os.sep)[-1]}...\n')
        else:
            process_code = 2
            print(f'...[ERROR] with {loca} in {file.split(os.sep)[-1]}:...\n')
    return(process_code, loca, file.split(os.sep)[-1])

In [None]:
def pick_up_depth_related_data(loca, codes):
    '''Picks up all depth related data
    Mandatory input:
    :loca: unique location name, used in the AGS file
    :codes: dictionary with all (depth-related) codes that should be looked for. The keys should
       be the GROUP_HEADER codes (e.g. "SCPT_QT") that will be looked up; the values are
       the descriptions
    Returns a list with each element in the list being one depth-related curve from the "codes dictionary"'''
    _dfs_list = list()
    groups = list()
    
    for code in codes.keys():
        groups.append(code.split('_')[0])
    groups = list(set(groups))

    for group in groups:
        try:
            cols = [c for c in tables[group].loc[(tables[group]['LOCA_ID']==loca)].columns if c in [c for c in codes.keys()]]
            depth_col = group+depth_suffix
            _df = tables[group].loc[(tables[group]['LOCA_ID']==loca), [group+depth_suffix]+cols]
            if len(_df)>0:
                _dfs_list.append(_df)
        except:
            pass
    return _dfs_list

In [None]:
def check_whether_different_dataframes_depth_related_data_are_on_same_depths(_dfs_list):
    '''Checks whether different dataframes are on the same depth grid.
    Mandatory input:
    :_dfs_list: list with each element being a dataframe (obtained from function "pick_up_depth_related_data")
    Returns a pandas DataFrame (product from concatenating all elements along the column axis)'''
    # use first dataframe in "collector" as standard to compare depths against
    if len(_dfs_list)>0:
        # make numeric first (all numbers are strings in AGS4 files?)
        for c in _dfs_list[0].columns:
            _dfs_list[0][c] = pd.to_numeric(_dfs_list[0][c], errors='coerce')
            
        for c in _dfs_list[0].columns:
            if c.endswith(depth_suffix):
                _df0 = _dfs_list[0].copy(deep=True)
                _df0.rename(columns={c:depth_col}, inplace=True)
                
    # run through all other dataframes in "collector":
    for d in range(1,len(_dfs_list)):
        # make numeric first (all numbers are strings in AGS4 files?)            
        _df1 = _dfs_list[d].copy(deep=True)
        for c in _df1.columns:
            _df1[c] = pd.to_numeric(_df1[c], errors='coerce')

        for c in _df1.columns:
            if c.endswith(depth_suffix):
                # but rename first to avoid getting it nevertheless
                _df1.rename(columns={c:depth_col}, inplace=True)
                if _df0[depth_col].equals(_df1[depth_col]):
                    # same depths: merge 
                    _df0 = pd.merge(_df0, _df1, how='left', on=depth_col)
                else:
                    # IF DATA IS NOT ON THE SAME "DEPTH GRID", THIS IS NOT HANDLED CURRENTLY
                    pass
    return _df0

In [None]:
def create_and_fill_LAS_file(df, tables, loca, codes, parameters, file):
    '''Wrapper function containing all functions creating the different sections of the LAS file.
    Mandatory input:
    :df: pandas DataFrame containing the depth-related data (the DataFrame is generated through 
       functions "pick_up_depth_related_data" and
       "check_whether_different_dataframes_depth_related_data_are_on_same_depths")
    :tables: tables-section from AGS file
    :codes: dictionary with all (depth-related) codes that should be looked for. The keys should
       be the GROUP_HEADER codes (e.g. "SCPT_QT") that will be looked up; the values are the 
       descriptions
    :parameters: dictionary with codes with meta-data that will eventually go into the
       parameters section of the LAS file
    :loca: unique location name, used in the AGS file
    :file: name of the AGS4 file (used to add as meta-data to the LAS file and in the logging)
    Returns the las-object'''
    las = lasio.LASFile()
    fill_LAS_well_section(las, df, tables, loca)
    fill_LAS_curve_section(las, df, codes)
    fill_LAS_params_section(las, tables, loca, parameters)
    fill_LAS_other_section(las, loca, file)
    return las

In [None]:
def get_sampling_rate(df, depth_col):
    '''Function to get the sampling rate of the passed DataFrame.
    Mandatory arguments:
    :df: pandas DataFrame with concatenated depth-related data (the DataFrame is generated through 
       functions "pick_up_depth_related_data" and
       "check_whether_different_dataframes_depth_related_data_are_on_same_depths")
    :depth_col: name of the (renamed) depth column
    Returns the sampling rate (used for the LAS header)'''
    try:
        return(round(df[depth_col].diff().median(), 4))
    except:
        return(np.nan)

In [None]:
def get_depth_range(df, depth_col):
    '''Function to get the depth range of the passed DataFrame.
    Mandatory arguments:
    :df: pandas DataFrame with concatenated depth-related data (the DataFrame is generated through 
       functions "pick_up_depth_related_data" and
       "check_whether_different_dataframes_depth_related_data_are_on_same_depths")
    :depth_col: name of the (renamed) depth column
    Returns the minimum and maximum depth as a tuple (used for the LAS header)'''
    try:
        return(round(min(df[depth_col]),4), round(max(df[depth_col]),4))
    except:
        return(np.nan, np.nan)

In [None]:
def fill_LAS_well_section(las, df, tables, loca):
    '''Function to fill fields in the "well section" ("~W") of the LAS file.
    Mandatory arguments:
    :las: the las object (generated through function "create_and_fill_LAS_file")
    :df: pandas DataFrame with concatenated depth-related data (the DataFrame is generated through 
       functions "pick_up_de, pth_related_data" and
       "check_whether_different_dataframes_depth_related_data_are_on_same_depths")
    :tables: tables-section from AGS file
    :loca: unique location name, used in the AGS file
    Does not return anything, but changes the ~W section of the current LAS-object'''
    las.well.STRT = get_depth_range(df, depth_col)[0]
    las.well.STOP = get_depth_range(df, depth_col)[1]
    las.well.STEP = get_sampling_rate(df, depth_col)
    las.well.NULL = -999.2500000
    las.well.COMP = tables['PROJ'].loc[2,'PROJ_CLNT']
    las.well.WELL = loca
    las.well.FLD = tables['PROJ'].loc[2,'PROJ_NAME']
    las.well.LOC = tables['PROJ'].loc[2,'PROJ_LOC']    
    las.well.PROV = ''
    las.well.CNTY = ''
    las.well.STAT = ''
    las.well.CTRY = ''
    las.well.SRVC = tables['PROJ'].loc[2,'PROJ_CONT']
    las.well.DATE = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
    las.well.API = 'NULL'

In [None]:
def fill_LAS_curve_section(las, df, codes):
    '''Function to fill the "curve section" ("~C") of the LAS file.
    Mandatory arguments:
    :las: the las object (generated through function "create_and_fill_LAS_file")
    :df: pandas DataFrame with concatenated depth-related data (the DataFrame is generated through 
       functions "pick_up_depth_related_data" and
       "check_whether_different_dataframes_depth_related_data_are_on_same_depths")
    :codes: dictionary with all (depth-related) codes that should be looked for. The keys should
       be the GROUP_HEADER codes (e.g. "SCPT_QT") that will be looked up; the values are
       the descriptions
    Does not return anything, but changes the ~C section of the current LAS-object'''
    tmp = list()
    for i, c in enumerate(df.columns):
        tmp.append(find_depth_unit(tables, c))
        if len(set(tmp))>1:
            depth_unit = list(set(tmp))[0]
        else:
            depth_unit = max(set(tmp), key=tmp.count)

    for i, c in enumerate(df.columns):
        if i == 0:
            las.add_curve(c, df[c], unit=depth_unit)
        else:
            unit = find_unit(tables, c)
            descr = ''.join([d[1] for d in codes.items() if d[0]==c])
            las.add_curve(c, df[c], descr=descr, unit=unit)

In [None]:
def fill_LAS_params_section(las, tables, loca, parameters):
    '''Function to fill fields in the "parameter section" ("~P") of the LAS file.
    Mandatory arguments:
    :las: the las object (generated through function "create_and_fill_LAS_file")
    :tables: tables-section from AGS file
    :loca: unique location name, used in the AGS file
    :parameters: dictionary with codes with meta-data to go into the
       parameters section of the LAS-object
    Does not return anything, but changes the ~P section of the current las-object.'''
    for p in parameters.items():
        try:
            unit = find_unit(tables, p[0])
            las.params[p[0]] = lasio.HeaderItem(mnemonic=p[0],
                                                unit=unit,
                                                value=tables['LOCA'].loc[(tables['LOCA']['LOCA_ID']==loca),p[0]].values[0],
                                                descr=p[1])
        except:
            pass

In [None]:
def fill_LAS_other_section(las, loca, file):
    '''Function to fill field in the "other section" ("~O") of the LAS file.
    Mandatory arguments:
    :las: the las object (generated through function "create_and_fill_LAS_file")
    :loca: unique location name, used in the AGS file
    :file: name of the AGS4 file
    Does not return anything, but changes the ~O section of the current las-object.'''
    try:
        las.other = f'LAS file generated for data for location "{loca}". Original file: "{file}".'
    except:
        pass

In [None]:
def find_depth_unit(tables, header):
    '''Small function to get the units of depth.
    Mandatory arguments:
    :tables: tables-section from AGS file
    :header: name of column'''
    try:
        unit = tables[header[:4]].loc[0,header[:4]+depth_suffix]
    except:
        unit = ''
    return unit

In [None]:
def find_unit(tables, header):
    '''Small function to get the units of a parameter/variable.
    Mandatory arguments:
    :tables: tables-section from AGS file
    :header: name of column'''
    try:
        unit = tables[header[:4]].loc[0,header]
    except:
        unit = ''
    return unit

In [None]:
def write_conversion_log(export_folder, summary_processing):
    '''Function writing the info regarding the conversion to a file "conversion.log" in the same LAS
    export location. This log can be used to check locations/files that caused warnings or errors.
    Mandatory input:
    :export_folder: location of the exported LAS files (the log file will be written to the same location)
    :summary processing: list with tuples (code, location, file) that will be written to the file
    "conversion.log"
    Returns nothing, but will put a file "conversion.log" in the same directory the LAS files were exported
    to with info about the conversion job.'''
    max_length_location_name = 0
    max_length_file_name = 0
    for i in range(2,-1,-1):
        try:
            if max([len(s[1]) for s in summary_processing if s[0] == i]) > max_length_location_name:
                max_length_location_name = max([len(s[1]) for s in summary_processing if s[0] == i])
        except:
            pass
        try:
            if max([len(s[2]) for s in summary_processing if s[0] == i]) > max_length_file_name:
                max_length_file_name = max([len(s[2]) for s in summary_processing if s[0] == i])
        except:
            pass
    headers = ['LOCATIONS IN FILE PROCESSED OK:\n',
               'LOCATIONS IN FILE WITHOUT ANY DEPTH-DATA (i.e no logs):\n',
               'LOCATIONS IN FILE THAT CAUSED AN ERROR:\n']
    
    f = open(os.path.join(export_folder, 'conversion.log'), 'w')
    for i in range(2,-1,-1):
        f.write(headers[i])
        f.write('-'*(max_length_location_name+11+max_length_file_name)+'\n')
        for c in [s for s in summary_processing if s[0] == i]:
            f.write(f'{c[1]:<{max_length_location_name}} in file   {c[2]:<{max_length_file_name}}\n')
        f.write('\n\n')
    f.close()

### Find all AGS files in a location (use find_files function defined earlier)

In [None]:
folder = os.path.join(r'C:\Users\harbr\OneDrive - Equinor\petrophysical mindboggles\AGS_files\AGS4_files')
extension = '.AGS'

ags4_files = find_files(folder, recursive=True, extension=extension, case_sensitive=False)

In [None]:
ags4_files

## Convert all locations in all AGS files

In [None]:
export_folder = os.path.join(r'C:\Users\harbr\OneDrive - Equinor\petrophysical mindboggles\AGS_files\output')

In [None]:
summary_processing = list()
for file in ags4_files:
    print(f'...working on file {file}...')
    tables, headers = AGS4.AGS4_to_dataframe(file) # extract tables and headers for current AGS file
    locations = tables['LOCA'].drop([0,1]).reset_index(drop=True)['LOCA_ID'].unique() # find all locations in current AGS file
    
    for loca in locations:
        print(f'   ...converting location {loca}...')
        process_code, location, ags_filename = convert_data_for_single_location(loca, file, tables)
        summary_processing.append((process_code, location, ags_filename))
write_conversion_log(export_folder, summary_processing)