# Open Source convert .dlis to .las v2.0 using dlisio and lasio python packages

## Using DLISIO v0.1.12

## Still a work in progress.  Need to dos:
 - add functionality to manipulate the units of the index track because often it is in 0.1 in.
 - package into a function: done, on same github
 
Work released under MIT License (MIT)

## But this code will handle multiple embedded files within a dlis and multiple frames in a single dlis.  This code will also handle scenarios where values are actually lists of values.

In [None]:
import os
import pandas as pd
import numpy as np
import lasio
import dlisio
from datetime import datetime
import re

In [None]:
null = -999.25

## Fill in the file path for the dlis you want to read in the below cell:

In [None]:
filepath = r"...\Volve_Well_logs_pr_WELL\15_9-F-4\02.LWD_EWL\WL_RAW_BHPR-GR-MECH_MWD_1.DLIS"

## Fill in the file path for where you want the output las file to go in the below cell:

In [None]:
output_file_location = ""

In [None]:
filename = os.path.basename(filepath)
filename = os.path.splitext(filename)[0]

In [None]:
embedded_files = []
origins = []
object_columns = []
frame_count = 0
object_warning = ''

In [None]:
def df_column_uniquify(df):
    df_columns = df.columns
    new_columns = []
    for item in df_columns:
        counter = 0
        newitem = item
        while newitem in new_columns:
            counter += 1
            newitem = "{}_{}".format(item, counter)
        new_columns.append(newitem)
    df.columns = new_columns
    return df

## Second we will read the dlis file for the curves values (from 2D arrays to pandas), expand out any values that are actually arrays, assign the index track, get the metadata, and output the .las files.  One file will be outputted per embedded file per frame.

## From each frame, we will also get the channel metadata, including the long description and the units if they are filled out.  These will be written to the Curves section of the resultant .las files.

In [None]:
with dlisio.load(filepath) as file:
    print(file.describe())
    for d in file:
        embedded_files.append(d)
        frame_count = 0
        for origin in d.origin:
            origins.append(origin)
        for fram in d.frames:
            curves_L = []
            curves_name = []
            longs = []
            unit = []
            frame_count = frame_count + 1
            for channel in fram.channels:
                curves_name.append(channel.name)
                longs.append(channel.long_name)
                unit.append(channel.units)
                curves = channel.curves()
                curves_L.append(curves)
            name_index = 0
            las = lasio.LASFile()
            curve_df = pd.DataFrame()
            las_names = []
            las_units = []
            las_longs = []
            for c in curves_L:
                name = curves_name[name_index]
                print("Processing " + name)
                units = unit[name_index]
                long = longs[name_index]
                c = np.vstack(c)
                try:
                    num_col = c.shape[1]
                    col_name = [name] * num_col
                    df = pd.DataFrame(data=c, columns=col_name)
                    #df = df_column_uniquify(df)
                    curve_df = pd.concat([curve_df, df], axis=1)
                    name_index = name_index + 1
                    object_warning = str(name) + ' had to be expanded in the final .las file, as it has multiple samples per index'
                except:
                    num_col = 1
                    df = pd.DataFrame(data=c, columns=[name])
                    name_index = name_index + 1
                    curve_df = pd.concat([curve_df, df], axis=1)
                    continue
                u = [units] * num_col
                l = [long] * num_col
                las_units.append(u)
                las_longs.append(l)
                print("Completed " + name)

            las_units = [item for sublist in las_units for item in sublist]
            las_longs = [item for sublist in las_longs for item in sublist]

            #Check that the lists are ready for the curve metadata
            print("If these are different lengths, something is wrong:")
            print(len(las_units))
            print(len(las_longs))
            curve_df = df_column_uniquify(curve_df)
            curves_name = list(curve_df.columns.values)
            print(len(curves_name))

            #we will take the first curve in the frame as the index.
            curve_df = curve_df.set_index(curves_name[0])
            #write the pandas data to the las file
            print("Writing to las... ")
            las.set_data(curve_df)
            #write the curve metadata from our three lists.
            counter = 0
            print("Writing las header...")
            for x in curves_name:
                las.curves[x].unit = las_units[counter]
                las.curves[x].descr = las_longs[counter]
                counter=counter + 1
            las.well.COMP = origin.company
            las.well.WELL = origin.well_name
            las.well.FLD = origin.field_name
            las.well.SRVC = origin.producer_name
            las.well.DATE = origin.creation_time
            las.well.UWI = origin.well_id
            las.well.API = origin.well_id
            las.well.NULL = null
            las.params['PROD'] = lasio.HeaderItem('PROD', value=origin.product)
            las.params['PROG'] = lasio.HeaderItem('PROG', value=origin.programs)
            las.params['RUN'] = lasio.HeaderItem('RUN', value=origin.run_nr)
            las.params['DESCENT'] = lasio.HeaderItem('DESCENT', value=origin.descent_nr)
            las.params['VERSION'] = lasio.HeaderItem('VERSION', value=origin.version)
            las.params['LINEAGE'] = lasio.HeaderItem('LINEAGE', value="Python-converted from DLIS")
            las.params['ORFILE'] = lasio.HeaderItem('ORFILE', value=filepath)
            las.write(output_file_location + "\\" + filename + "_" + 'converted_with_python_' + str(frame_count) + '.las', version=2)
            print("number of logical files: " + str(frame_count) + ": this is the number of .las files created")

## Warnings :  You will get more than one las file as output if there are multiple files in a single .dlis or multiple frames in a single .dlis

In [None]:
print("This file has " + str(len(origins)) + " metadata headers.  This code has used the first.")