In [1]:
import csv_helpers as csv
from IPython.display import display

## Save Dummy Data for all elements into tsv folder structure


In [3]:

import dummydata

print("Generating Dummy tsv files with the following settings")
run_settings = dummydata.defaults
display(run_settings)

run_settings['instrument']['sensor'] = 'DUM01'

chem_map = dummydata.dummyChemistry(run_settings)
run_settings['instrument']['chemistry_map'] = chem_map

dummydata.generate_tsv(run_settings, append=False)

# print("appending another data set with new sensor name")
# run_settings['instrument']['sensor'] = 'DUM02'
# dummydata.generate_tsv(run_settings, append=True)


Generating Dummy tsv files with the following settings


{'filename': 'test.hdf5',
 'output_dir': './dummydata',
 'instrument': {'name': 'Lab_setup_02',
  'sensor': 'DUM01',
  'element_rows': 4,
  'element_cols': 4,
  'chemistry_map': {'A01': 'Tin',
   'A02': 'Vanadium',
   'A03': 'Silver',
   'A04': 'Cadmium',
   'B01': 'Boron',
   'B02': 'Molybdenum',
   'B03': 'Nickel',
   'B04': 'Scandium',
   'C01': 'Palladium',
   'C02': 'Neodymium',
   'C03': 'Barium',
   'C04': 'Magnesium',
   'D01': 'Zinc',
   'D02': 'Antimony',
   'D03': 'Silicon',
   'D04': 'Palladium'},
  'light Source': 'Stellarnet LED White',
  'spectrometer': 'Stellarnet BlueWave VIS-25'},
 'fluid_list': ['water', 'beer1', 'beer2'],
 'wavelength_range': [400, 420, 0.5],
 'elements': 'all',
 'repeats': 3}

## Preview of csv spectrometer data

In [5]:
import os
import pandas as pd
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

Unnamed: 0,wavelength,rep1,rep2,rep3
0,457.131,98.77,99.22,98.77
1,457.204,99.00,99.16,98.96
2,457.277,95.12,95.17,94.66
3,457.349,93.82,93.88,93.54
4,457.422,93.98,94.03,93.78
...,...,...,...,...
4090,730.534,66.20,65.54,66.02
4091,730.598,66.68,66.14,66.71
4092,730.661,67.71,67.92,67.83
4093,730.724,66.41,67.32,66.72


## Import from .txt or .csv files

### Input File

Requires a text file with at least 2 columns.
(They don't need to be named in the file)

[wavelength]	[transmission data 1]	...[transmission data n (optional)]

The default is for tab separation, but can be specified with:

separator = '\t'

### Regex
A regex must be provided to identify these metadata fields from the filename:
* sensor
* element
* fluid

Optional, will be preserved in the metadata:
* rotation


In [3]:
#Folder containing text/csv files to import
# import_dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
input_dir = "/Users/calum/git/Glasgow/sampleData/combined"
output_dir = 'sampledata'

# Construct a regex to extract metadata from the filename
# example 'HAN24_Sensor9_BeerBitter3_Rotation2.txt'
regex = '(?P<sensor>.+)_Sensor(?P<element>.+)_(?P<fluid>.+)_Rotation(.+).txt'

csv.import_dir_to_csv(input_dir, regex, output_dir, append=False)





imported HAN24_Sensor10_BeerBitter1_Rotation1.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter1-10.tsv
imported HAN24_Sensor10_BeerBitter1_Rotation2.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter1-10.tsv
imported HAN24_Sensor10_BeerBitter2_Rotation1.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter2-10.tsv
imported HAN24_Sensor10_BeerBitter2_Rotation2.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter2-10.tsv
imported HAN24_Sensor10_BeerBitter3_Rotation1.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter3-10.tsv
imported HAN24_Sensor10_BeerBitter3_Rotation2.txt to sampledata/HAN24/2021-11-03-HAN24-BeerBitter3-10.tsv
imported HAN24_Sensor10_BeerCorny1_Rotation1.txt to sampledata/HAN24/2021-11-03-HAN24-BeerCorny1-10.tsv
imported HAN24_Sensor10_BeerCorny1_Rotation2.txt to sampledata/HAN24/2021-11-03-HAN24-BeerCorny1-10.tsv
imported HAN24_Sensor10_BeerCorny2_Rotation1.txt to sampledata/HAN24/2021-11-03-HAN24-BeerCorny2-10.tsv
imported HAN24_Sensor10_BeerCorny2_Rotation2.txt to 

## Chemistry Mapping

An Example of applying a chemistry map to a metadata file


In [5]:
chem_map = {
    '01': 'Tin',
    '02': 'Vanadium',
    '03': 'Silver',
    '04': 'Cadmium',
    '05': 'Boron',
    '06': 'Molybdenum',
    '07': 'Nickel',
    '08': 'Scandium',
    '09': 'Palladium',
    '10': 'Neodymium',
    '11': 'Barium',
    '12': 'Magnesium',
    '13': 'Zinc',
    '14': 'Antimony',
    '15': 'Silicon'
    }
    
csv.apply_chem_map(chem_map, path='sampledata')

## Example of searching by metadata

filter_by_metadata() returns a list of h5 nodes that match the
criteria.

It can search a full h5 file, or if a nodelist is provided, it will work from
that.

Search criteria must include a metadata key, and can optionally include a
metadata value

In [8]:
import csv_helpers as csv
df = csv.filter_by_metadata('sensor', 'HAN24', path = './imported')
df = csv.filter_by_metadata('element', '10', input_df=df)
df = csv.filter_by_metadata('fluid', '.*Corny.*', input_df=df, regex=True)
df


Unnamed: 0_level_0,sensor,element,fluid,repeats,import_date,date,rotation
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-03-HAN24-BeerCorny1-10,HAN24,10,BeerCorny1,6,NaT,2021-11-03,1
2021-11-03-HAN24-BeerCorny2-10,HAN24,10,BeerCorny2,6,NaT,2021-11-03,1
2021-11-03-HAN24-BeerCorny3-10,HAN24,10,BeerCorny3,6,NaT,2021-11-03,1


## Export Table

export_dataframes() requires a list of measurements to export

e.g. by filtering for nodes with 'HAN24' in the 'sensor' metadata



In [4]:
selection = csv.filter_by_metadata('sensor', 'HAN24', path='imported')
# export = csv.export_dataframes(selection, path='imported')
export = csv.export_dataframes(path='dummydata')

display(export)

Unnamed: 0_level_0,Element A01,Element A01,Element A01,Element A01,Element A01,Element A01,Element A01,Element A01,Element A01,Element A01,...,Element D04,Element D04,Element D04,Element D04,Element D04,Element D04,Element D04,Element D04,Element D04,Element D04
wavelength,400.0,400.5,401.0,401.5,402.0,402.5,403.0,403.5,404.0,404.5,...,414.5,415.0,415.5,416.0,416.5,417.0,417.5,418.0,418.5,419.0
water,0.272873,0.004352,0.547732,0.859814,0.587844,0.695763,0.594448,0.444134,0.917752,0.112672,...,0.208928,0.656191,0.174285,,,,,,,
water,0.78257,0.790309,0.468786,0.993721,0.229588,0.036067,0.20717,0.729775,0.002463,0.253845,...,0.837502,0.502604,0.711348,0.287637,0.086511,,,,,
water,0.374733,0.950513,0.277958,0.71803,0.837796,0.394999,0.954672,0.010346,0.172504,0.694304,...,0.790508,0.475819,0.024441,0.222279,0.889552,0.881618,0.864247,0.568674,0.228034,
beer1,0.303772,0.316495,0.516365,0.287974,0.424268,0.975934,0.644433,0.841007,0.527024,0.328699,...,0.061378,0.492408,0.778818,,,,,,,
beer1,0.790811,0.977553,0.101774,0.620481,0.162429,0.494133,0.54437,0.886359,0.366412,0.917214,...,0.694795,0.998286,0.49152,0.695556,0.356364,0.337084,0.216545,0.832724,0.286077,0.41964
beer1,0.205673,0.488927,0.985283,0.55897,0.610014,0.042688,0.121026,0.135113,0.293245,0.260526,...,0.41651,0.278805,0.853946,0.945523,0.026128,0.093799,0.589228,0.219128,0.522375,
beer2,0.168555,0.460979,0.642488,0.108826,0.178112,0.465532,0.463392,0.993544,0.271798,0.581721,...,0.198651,0.768184,0.116079,,,,,,,
beer2,0.720378,0.538853,0.602519,0.018862,0.977563,0.064551,0.446477,0.241716,0.708645,0.213009,...,0.631929,0.186578,0.303336,0.476589,0.636623,0.083604,,,,
beer2,0.898904,0.215737,0.369073,0.96958,0.275314,0.508062,0.051152,0.167952,0.264485,0.568388,...,0.272676,0.129582,0.290253,0.007338,0.750908,0.353223,0.256098,0.223419,,


In [None]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

def filter_df(df, wl_min, wl_max, resample):

    df = df.loc[lambda dfn: dfn['wavelength'] > wl_min-1, :]
    df = df.loc[lambda dfn: dfn['wavelength'] < wl_max+1, :]

    wavel_new = np.arange(wl_min, wl_max, resample)
    result = {}
    for col in df:
        if col == 'wavelength':
            result[col] = wavel_new
        else: 
            maxval = df[col].max()
            df[col] = df[col] / maxval
            f = interp1d(df['wavelength'], df[col], 'linear')
        
            result[col] = f(wavel_new)
    return pd.DataFrame(result)



df = filter_df(df, wl_min=540, wl_max=730, resample=0.3)
df





