# DLISIO in a Nutshell

### This is a guide to walk you through the basic functionalities of dlisio

## Importing

In [None]:
%matplotlib inline

import os
import pandas as pd
import dlisio
import matplotlib.pyplot as plt
import numpy as np
import numpy.lib.recfunctions as rfn

import hvplot.pandas
import holoviews as hv
from holoviews import opts, streams
from holoviews.plotting.links import DataLink
hv.extension('bokeh', logo=None)

### You can work with a single file using the cell below - or by adding an additional for loop to the code below, you can work through a list of files.  Another option is to use os.walk to get all .dlis files in a parent folder.  Example:

    for (root, dirs, files) in os.walk(folderpath):
        for f in files:
            filepath = os.path.join(root, f)
            if filepath.endswith('.' + 'dlis'):
                print(filepath)
                
### But for this example, we will work with a single .dlis file specified in the cell below.  Note that there are some .dlis file formats that are not supported by DLISIO yet - good to catch them in a try except loop if you are reading files enmasse.

### We will load a dlis file for well 15/9-F-4 from the open source Volve dataset available here: https://data.equinor.com/dataset/Volve

In [None]:
filepath = r"\WL_RAW_CAL-DEN-GR-NEU-REMP_MWD_1.DLIS"

## Query for specific curve

### Very quickly you can use regex to find certain curves in a file (helpful if you are scanning a lot of files for certain curves)

In [None]:
with dlisio.dlis.load(filepath) as file:
    for d in file:
        depth_channels = d.find('CHANNEL','TDEP')
        for channel in depth_channels:
            print(channel.name)
            print(channel.curves())

## Examining internal files and frames

### Keep in mind that dlis files can contain multiple files and multiple frames.  You can quickly get a numpy array of the curves in each frame below.

In [None]:
with dlisio.dlis.load(filepath) as file:
    print(file.describe())

In [None]:
with dlisio.dlis.load(filepath) as file:
    for d in file:
        for fram in d.frames:
            print(d.channels)
            print(fram.curves())

## Metadata including Origin information per frame (well name and header)

In [None]:
with dlisio.dlis.load(filepath) as file:
    for d in file:
        print(d.describe())
        for fram in d.frames:
            print(fram.describe())
            for channel in d.channels:
                print(channel.describe())

In [None]:
with dlisio.dlis.load(filepath) as file:
    for d in file:
        for origin in d.origins:
            print(origin.describe())

## Reading a full dlis file

### But most likely we want a single data frame of every curve, no matter which frame it came from.  So we write a bit more code to look through each frame, then look at each channel and get the curve name and unit information along with it.  We will also save the information about which internal file and which frame each curve resides in.  

In [None]:
curves_L = []
curves_name = []
longs = []
unit = []
files_L = []
files_num = []
frames = []
frames_num = []
with dlisio.dlis.load(filepath) as file:
    for d in file:
        files_L.append(d)
        frame_count = 0
        for fram in d.frames:
            if frame_count == 0:
                frames.append(fram)
                frame_count = frame_count + 1
                for channel in d.channels:
                    curves_name.append(channel.name)
                    longs.append(channel.long_name)
                    unit.append(channel.units)
                    files_num.append(len(files_L))
                    frames_num.append(len(frames))
                    curves = channel.curves()
                    curves_L.append(curves)

In [None]:
curve_index = pd.DataFrame(
{'Curve': curves_name,
'Long': longs,
'Unit': unit,
'Internal_File': files_num,
'Frame_Number': frames_num
})

In [None]:
curve_index

## Creating a Pandas dataframe for the entire .dlis file

### We have to be careful creating a dataframe for the whole .dlis file as often there are some curves that represent mulitple values (numpy array of list values).  So, you can use something like:

df = pd.DataFrame(data=curves_L, index=curves_name).T

### to view the full dlis file with lists as some of the curve values.

### Or we will use the code below to process each curve's 2D numpy array, stacking it if the curve contains multiple values per sample.  Then we convert each curve into its own dataframe (uniquifying the column names by adding a .1, .2, .3...etc).  Then, to preserve the order with the curve index above, append each data frame together in order to build the final dlis full dataframe.

In [None]:
def df_column_uniquify(df):
    df_columns = df.columns
    new_columns = []
    for item in df_columns:
        counter = 0
        newitem = item
        while newitem in new_columns:
            counter += 1
            newitem = "{}_{}".format(item, counter)
        new_columns.append(newitem)
    df.columns = new_columns
    return df

In [None]:
curve_df = pd.DataFrame()
name_index = 0
for c in curves_L:
    name = curves_name[name_index]
    np.vstack(c)
    try:
        num_col = c.shape[1]
        col_name = [name] * num_col
        df = pd.DataFrame(data=c, columns=col_name)
        name_index = name_index + 1
        df = df_column_uniquify(df)
        curve_df = pd.concat([curve_df, df], axis=1)
    except:
        num_col = 0
        df = pd.DataFrame(data=c, columns=[name])
        name_index = name_index + 1
        curve_df = pd.concat([curve_df, df], axis=1)
        continue

In [None]:
curve_df.head()

In [None]:
## If we have a simpler dlis file with a single logical file and single frame and with single data values in each channel.
with dlisio.dlis.load(filepath) as file:
    logical_count = 0
    for d in file:
        frame_count = 0
        for fram in d.frames:
            if frame_count == 0 & logical_count == 0:
                curves = fram.curves()
                curve_df = pd.DataFrame(curves, index=curves[fram.index])

In [None]:
curve_df.head()

### Then we can set the index and start making some plots.

In [None]:
curve_df = df_column_uniquify(curve_df)
curve_df['DEPTH_Calc_ft'] = curve_df.loc[:,'TDEP'] * 0.0083333 #0.1 inch/12 inches per foot
curve_df['DEPTH_ft'] = curve_df['DEPTH_Calc_ft']
curve_df = curve_df.set_index("DEPTH_Calc_ft")
curve_df.index.names = [None]
curve_df = curve_df.replace(-999.25,np.nan)
min_val = curve_df['DEPTH_ft'].min()
max_val = curve_df['DEPTH_ft'].max()
curve_list = list(curve_df.columns)
curve_list.remove('DEPTH_ft')

In [None]:
curve_df.head()

In [None]:
def curve_plot(log, df, depthname):
  aplot = df.hvplot(x=depthname, y=log, invert=True, flip_yaxis=True, shared_axes=True,
                       height=600, width=300).opts(fontsize={'labels': 16,'xticks': 14, 'yticks': 14})
  return aplot;

In [None]:
plotlist = [curve_plot(x, df=curve_df, depthname='DEPTH_ft') for x in curve_list]
well_section = hv.Layout(plotlist).cols(len(curve_list))
well_section

# Hopefully that is enough code to get you started working with DLISIO.  There is much more functionality which can be accessed with help(dlisio) or at the read the docs.