In [8]:
import csv_helpers as csv
from IPython.display import display

## Save Dummy Data for all elements into tsv folder structure


In [2]:

import dummydata

print("Generating Dummy tsv files with the following settings")
run_settings = dummydata.defaults
display(run_settings)

run_settings = dummydata.defaults
run_settings['instrument']['sensor'] = 'DUM01'
dummydata.generate_tsv(run_settings, append=False)

print("appending another data set with new sensor name")
run_settings['instrument']['sensor'] = 'DUM02'
dummydata.generate_tsv(run_settings, append=True)


Generating Dummy tsv files with the following settings


{'filename': 'test.hdf5',
 'output_dir': './dummydata',
 'instrument': {'name': 'Lab_setup_02',
  'sensor': 'DUM01',
  'element_rows': 4,
  'element_cols': 4,
  'light Source': 'Stellarnet LED White',
  'spectrometer': 'Stellarnet BlueWave VIS-25'},
 'fluid_list': ['water', 'beer1', 'beer2'],
 'wavelength_range': [400, 420, 0.5],
 'elements': 'all',
 'repeats': 3,
 'measuredOn': '2021_11_02'}

appending another data set with new sensor name


## Preview of csv spectrometer data

In [5]:
import os
import pandas as pd
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

Unnamed: 0,wavelength,rep1,rep2,rep3
0,457.131,98.77,99.22,98.77
1,457.204,99.00,99.16,98.96
2,457.277,95.12,95.17,94.66
3,457.349,93.82,93.88,93.54
4,457.422,93.98,94.03,93.78
...,...,...,...,...
4090,730.534,66.20,65.54,66.02
4091,730.598,66.68,66.14,66.71
4092,730.661,67.71,67.92,67.83
4093,730.724,66.41,67.32,66.72


In [1]:
import csv_helpers as csv

dummy_metadata = {
    'sensor'            : 'HAN24',
    'element'           : 'A02',
    'fluid'             : 'beer',
}


csv.store(df, dummy_metadata)


NameError: name 'df' is not defined

In [6]:
dummy_metadata = {
    'sensor'            : 'HAN24',
    'element'           : 'A02',
    'fluid'             : 'beer',
}
csv.store(df, dummy_metadata)

NameError: name 'csv' is not defined

## Import from .txt or .csv files

### Input File

Requires a text file with at least 2 columns.
(They don't need to be named in the file)

[wavelength]	[transmission data 1]	...[transmission data n (optional)]

The default is for tab separation, but can be specified with:

separator = '\t'

### Regex
A regex must be provided to identify these metadata fields from the filename:
* sensor
* element
* fluid

Optional, will be preserved in the metadata:
* rotation


In [2]:
import shutil
import re
import csv_helpers as csv
import pandas as pd



#Folder containing text/csv files to import
# import_dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
input_dir = "/Users/calum/git/Glasgow/sampleData/combined"
output_dir = './imported'

# Construct a regex to extract metadata from the filename
# example 'HAN24_Sensor9_BeerBitter3_Rotation2.txt'
regex = '(?P<sensor>.+)_Sensor(?P<element>.+)_(?P<fluid>.+)_Rotation(?P<rotation>.+).txt'

csv.import_dir_to_csv(input_dir, regex, output_dir, separator='\t', append=False)




In [None]:
#Folder containing text/csv files to import
# import_dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
import_dir = "/Users/calum/git/Glasgow/sampleData/combined"

# Construct a regex to extract metadata from the filename
# example 'HAN24_Sensor9_BeerBitter3_Rotation2.txt'
regex = '(?P<sensor>.+)_Sensor(?P<element>.+)_(?P<fluid>.+)_Rotation(?P<rotation>.+).txt'

h5.import_dir_to_hdf(import_dir, regex, filename, append=False)

## Inspect an HDF5 file by metadata

In [None]:
h5.inspect('test.hdf5')

## Example of searching by metadata

filter_by_metadata() returns a list of h5 nodes that match the
criteria.

It can search a full h5 file, or if a nodelist is provided, it will work from
that.

Search criteria must include a metadata key, and can optionally include a
metadata value

In [None]:
e01 = h5.filter_by_metadata(filename, 'element', "01")
e01_BeerBitter1 = h5.filter_by_metadata(filename, 'fluid', "BeerBitter1", nodelist=e01)

print(e01_BeerBitter1)


## Export Table

export_dataframes() requires a list of measurements to export

e.g. by filtering for nodes with 'HAN24' in the 'sensor' metadata



In [None]:
measurements = h5.filter_by_metadata(filename, 'sensor', 'HAN24')
export = h5.export_dataframes(filename, measurements, outfile=None)
display(export)

In [None]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

def filter_df(df, wl_min, wl_max, resample):

    df = df.loc[lambda dfn: dfn['wavelength'] > wl_min-1, :]
    df = df.loc[lambda dfn: dfn['wavelength'] < wl_max+1, :]

    wavel_new = np.arange(wl_min, wl_max, resample)
    result = {}
    for col in df:
        if col == 'wavelength':
            result[col] = wavel_new
        else: 
            maxval = df[col].max()
            df[col] = df[col] / maxval
            f = interp1d(df['wavelength'], df[col], 'linear')
        
            result[col] = f(wavel_new)
    return pd.DataFrame(result)



df = filter_df(df, wl_min=540, wl_max=730, resample=0.3)
df





