### Load libs

In [1]:
import pandas as pd

### Load files

In [2]:
group_file = '../background/group_dict_file.csv'
group_headings_file = '../background/group_headings_dict_file.csv'

In [3]:
groups = pd.read_csv(group_file, sep='|', index_col=0)
group_headings = pd.read_csv(group_headings_file, sep='|', index_col=0)

### Preview "groups"

In [4]:
groups.head()

Unnamed: 0_level_0,Contents,Notes,Parent Group
Group Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PROJ,Project Information,Required in all files (Rule 13),-
ABBR,Abbreviation Definitions,Required in all files (Rule 16),-
DICT,User Defined Groups and Headings,Required in all files which include user defin...,-
FILE,Associated Files,Required in file if FILE_FSET data provided in...,-
TRAN,Data File Transmission Information / Data Status,Required in all files (Rule 14),-


#### Create a dictionary out of this for easy lookup.
- key   = "Group Name"
- value = "Contents"

In [5]:
group_dict = dict(zip(groups.index, groups['Contents']))

In [6]:
group_dict

{'PROJ': 'Project Information',
 'ABBR': 'Abbreviation Definitions',
 'DICT': 'User Defined Groups and Headings',
 'FILE': 'Associated Files',
 'TRAN': 'Data File Transmission Information / Data Status',
 'TYPE': 'Definition of Data Types',
 'UNIT': 'Definition of Units',
 'AAVT': 'Aggregate Abrasion Tests',
 'ACVT': 'Aggregate Crushing Value Tests',
 'AELO': 'Aggregate Elongation Index Tests',
 'AFLK': 'Aggregate Flakiness Tests',
 'AIVT': 'Aggregate Impact Value Tests',
 'ALOS': 'Los Angeles Abrasion Tests',
 'APSV': 'Aggregate Polished Stone Tests',
 'ARTW': 'Aggregate Determination of the Resistance to Wear (micro-Deval)',
 'ASDI': 'Slake Durability Index Tests',
 'ASNS': 'Aggregate Soundness Tests',
 'AWAD': 'Aggregate Water Absorption Tests',
 'BKFL': 'Exploratory Hole Backfill Details',
 'CBRG': 'California Bearing Ratio Tests – General',
 'CBRT': 'California Bearing Ratio Tests - Data',
 'CDIA': 'Casing Diameter by Depth',
 'CHIS': 'Chiselling Details',
 'CHOC': 'Chain of Custo

### Preview "group_headings"

In [7]:
group_headings.head()

Unnamed: 0_level_0,Status,Heading,Suggested Unit,Suggested Type,Description,Example
Parent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PROJ,*R,PROJ_ID,,ID,Project identifier,121415
PROJ,,PROJ_NAME,,X,Project title,ACME Gas Works
PROJ,,,,,,Redevelopment
PROJ,,PROJ_LOC,,X,Location of site,"High Street, Anytown"
PROJ,,PROJ_CLNT,,X,Client name,ACME Enterprises


#### Create a dictionary out of this for easy lookup.
- key   = "Group Name"
- value = dictionary with:
          - key   = "Heading"
          - value = "Description"

In [8]:
group_headings_dict = dict()
for g in group_headings.index.unique():
    tmp_dict = dict()
    for h in group_headings.loc[group_headings.index==g, 'Heading'].unique():
        try:
            tmp_dict[h]=group_headings.loc[(group_headings.index==g) & (group_headings['Heading']==h),'Description'].values[0]
        except:
            pass
    group_headings_dict[g]= tmp_dict

In [9]:
group_headings_dict

{'PROJ': {'PROJ_ID': 'Project identifier',
  'PROJ_NAME': 'Project title',
  'PROJ_LOC': 'Location of site',
  'PROJ_CLNT': 'Client name',
  'PROJ_CONT': 'Contractors name',
  'PROJ_ENG': 'Project Engineer',
  'PROJ_MEMO': 'General project comments',
  'FILE_FSET': 'Associated file reference (e.g. project specification, site location drawings)'},
 'ABBR': {'ABBR_HDNG': 'Field heading in group',
  'ABBR_CODE': 'Abbreviation used',
  'ABBR_DESC': 'Description of abbreviation',
  'ABBR_LIST': 'Source of abbreviation',
  'ABBR_REM': 'Remarks',
  'FILE_FSET': 'Associated file reference (e.g. contract data specification)'},
 'DICT': {'DICT_TYPE': 'Flag to indicate definition is a GROUP or HEADING (i.e. can be either of GROUP or HEADINGHEADING)',
  'DICT_GRP': 'Group name',
  'DICT_HDNG': 'Heading name (Note: This data is REQUIRED where DICT_TYPE="HEADING")',
  'DICT_STAT': 'Heading status KEY, REQUIRED or OTHER (Note: This data is REQUIRED where DICT_TYPE="HEADING")',
  'DICT_DTYP': 'Type of

### Find out which codes belong to the data in the (converted) LAS files 

Example (converted) LAS files contains (only) the following curves:
- qc.MPa :Cone resistance
- fs.MPa :Sleeve friction
- u2.MPa :Pore 2 pressure
- qt.MPa :Corrected cone resistance
- Qt.-   :Normalized tip resistance
- Bq.-   :Pore pressure response
- Fr.-   :Friction ratio
- Icn.-  :Soil type behaviour

Create a small function that searches for a partial string in the description and returns the key (=GROUP) and column (header):

In [10]:
def find_group_code_from_description(partial_string, case_sensitive=False):
    for k, v in zip(group_headings_dict.keys(), group_headings_dict.values()):
        for kk, vv in zip(v.keys(), v.values()):
            if case_sensitive == True:
                try:
                    if vv.find(partial_string)>-1:
                        print(k, '>', kk, '>', vv)
                except:
                    pass
            else:
                try:
                    if vv.lower().find(partial_string.lower())>-1:
                        print(k, '>', kk, '>', vv)
                except:
                    pass

In [11]:
find_group_code_from_description('qc', case_sensitive=True)

SCPT > SCPT_RES > Cone resistance (qc)


In [12]:
find_group_code_from_description('(fs)', case_sensitive=True)

SCPT > SCPT_FRES > Local unit side friction resistance (fs)


In [13]:
find_group_code_from_description('u2', case_sensitive=True)

SCDT > SCDT_PWP2 > Shoulder porewater pressure (u2)
SCPT > SCPT_PWP2 > Shoulder porewater pressure (u2) (u3)


In [14]:
find_group_code_from_description('(qt)', case_sensitive=True)

SCPT > SCPT_QT > Corrected  cone  resistance piezocone only (qt)


In [15]:
find_group_code_from_description('Qt', case_sensitive=True)

SCPT > SCPT_NQT > Normalised cone resistance (Qt)


In [16]:
find_group_code_from_description('Bq', case_sensitive=True)

SCPT > SCPT_BQ > Pore pressure ratio (Bq) piezocone only


In [17]:
find_group_code_from_description('(Fr)', case_sensitive=True)

SCPT > SCPT_NFR > Normalised friction ratio (Fr)


In [18]:
find_group_code_from_description('Soil beh', case_sensitive=False)

SCPP > SCPP_CIC > Soil Behaviour Type Index (Ic)


All the above are depth-related data (depth/value pairs of data). There are many more data-types with depth data though:<br>(perhaps these should be included in teh LAS file also in the future?)

In [19]:
find_group_code_from_description('DEPTH')

AAVT > SAMP_TOP > Depth to top of sample
AAVT > SPEC_DPTH > Depth to top of test specimen
AAVT > SPEC_BASE > Depth to base of specimen
ACVT > SAMP_TOP > Depth to top of sample
ACVT > SPEC_DPTH > Depth to top of test specimen
ACVT > SPEC_BASE > Depth to base of specimen
AELO > SAMP_TOP > Depth to top of sample
AELO > SPEC_DPTH > Depth to top of test specimen
AELO > SPEC_BASE > Depth to base of specimen
AFLK > SAMP_TOP > Depth to top of sample
AFLK > SPEC_DPTH > Depth to top of test specimen
AFLK > SPEC_BASE > Depth to base of specimen
AIVT > SAMP_TOP > Depth to top of sample
AIVT > SPEC_DPTH > Depth to top of test specimen
AIVT > SPEC_BASE > Depth to base of specimen
ALOS > SAMP_TOP > Depth to top of sample
ALOS > SPEC_DPTH > Depth to top of test specimen
ALOS > SPEC_BASE > Depth to base of specimen
APSV > SAMP_TOP > Depth to top of sample
APSV > SPEC_DPTH > Depth to top of test specimen
APSV > SPEC_BASE > Depth to base of specimen
ARTW > SAMP_TOP > Depth to top of sample
ARTW > SPEC_DP

#### So the following codes (depth/variable pairs) need to be collected for the LAS file:
<div class="alert alert-block alert-success">
<span style="font-family:Courier New">
- SCPT -- SCPT_RES:  qc<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_FRES: fs<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_QT:   qt<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_NQT:  Qt<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_BQ:   Bq<br>
- &nbsp;&nbsp;&nbsp;&nbsp; -- SCPT_NFR:  Fr<br>
- SCDT -- SCDT_PWP2: u2<br>
- SCPP -- SCPP_CIC:  Ic(n)<br>
</span>
</div>