In [8]:
import csv_helpers as csv
from IPython.display import display

## Save Dummy Data for all elements into tsv folder structure


In [3]:

import dummydata

print("Generating Dummy tsv files with the following settings")
run_settings = dummydata.defaults
display(run_settings)

run_settings = dummydata.defaults
run_settings['instrument']['sensor'] = 'DUM01'
dummydata.generate_tsv(run_settings, append=False)

# print("appending another data set with new sensor name")
# run_settings['instrument']['sensor'] = 'DUM02'
# dummydata.generate_tsv(run_settings, append=True)


Generating Dummy tsv files with the following settings


{'filename': 'test.hdf5',
 'output_dir': './dummydata',
 'instrument': {'name': 'Lab_setup_02',
  'sensor': 'DUM01',
  'element_rows': 4,
  'element_cols': 4,
  'light Source': 'Stellarnet LED White',
  'spectrometer': 'Stellarnet BlueWave VIS-25'},
 'fluid_list': ['water', 'beer1', 'beer2'],
 'wavelength_range': [400, 420, 0.5],
 'elements': 'all',
 'repeats': 3}

## Preview of csv spectrometer data

In [5]:
import os
import pandas as pd
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

Unnamed: 0,wavelength,rep1,rep2,rep3
0,457.131,98.77,99.22,98.77
1,457.204,99.00,99.16,98.96
2,457.277,95.12,95.17,94.66
3,457.349,93.82,93.88,93.54
4,457.422,93.98,94.03,93.78
...,...,...,...,...
4090,730.534,66.20,65.54,66.02
4091,730.598,66.68,66.14,66.71
4092,730.661,67.71,67.92,67.83
4093,730.724,66.41,67.32,66.72


## Import from .txt or .csv files

### Input File

Requires a text file with at least 2 columns.
(They don't need to be named in the file)

[wavelength]	[transmission data 1]	...[transmission data n (optional)]

The default is for tab separation, but can be specified with:

separator = '\t'

### Regex
A regex must be provided to identify these metadata fields from the filename:
* sensor
* element
* fluid

Optional, will be preserved in the metadata:
* rotation


In [11]:
#Folder containing text/csv files to import
# import_dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
input_dir = "/Users/calum/git/Glasgow/sampleData/combined"
output_dir = 'imported'

# Construct a regex to extract metadata from the filename
# example 'HAN24_Sensor9_BeerBitter3_Rotation2.txt'
regex = '(?P<sensor>.+)_Sensor(?P<element>.+)_(?P<fluid>.+)_Rotation(.+).txt'

csv.import_dir_to_csv(input_dir, regex, output_dir, append=False)




## Example of searching by metadata

filter_by_metadata() returns a list of h5 nodes that match the
criteria.

It can search a full h5 file, or if a nodelist is provided, it will work from
that.

Search criteria must include a metadata key, and can optionally include a
metadata value

In [8]:
import csv_helpers as csv
df = csv.filter_by_metadata('sensor', 'HAN24', path = './imported')
df = csv.filter_by_metadata('element', '10', input_df=df)
df = csv.filter_by_metadata('fluid', '.*Corny.*', input_df=df, regex=True)
df


Unnamed: 0_level_0,sensor,element,fluid,repeats,import_date,date,rotation
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-03-HAN24-BeerCorny1-10,HAN24,10,BeerCorny1,6,NaT,2021-11-03,1
2021-11-03-HAN24-BeerCorny2-10,HAN24,10,BeerCorny2,6,NaT,2021-11-03,1
2021-11-03-HAN24-BeerCorny3-10,HAN24,10,BeerCorny3,6,NaT,2021-11-03,1


## Export Table

export_dataframes() requires a list of measurements to export

e.g. by filtering for nodes with 'HAN24' in the 'sensor' metadata



In [7]:
selection = csv.filter_by_metadata('sensor', 'HAN24', path='imported')
export = csv.export_dataframes(selection, path='imported')
display(export)

Unnamed: 0_level_0,Element 1,Element 1,Element 1,Element 1,Element 1,Element 1,Element 1,Element 1,Element 1,Element 1,...,Element 9,Element 9,Element 9,Element 9,Element 9,Element 9,Element 9,Element 9,Element 9,Element 9
wavelength,457.131,457.204,457.277,457.349,457.422,457.495,457.567,457.640,457.712,457.785,...,730.219,730.282,730.345,730.408,730.471,730.534,730.598,730.661,730.724,730.787
BeerBitter1,97.54,97.24,86.29,84.85,84.62,84.40,84.34,84.45,84.53,84.21,...,53.45,53.40,53.67,54.98,54.38,53.29,53.17,54.93,55.25,54.53
BeerBitter1,97.46,97.28,86.65,85.18,85.00,84.75,84.59,84.66,84.56,84.07,...,53.06,54.30,55.30,55.11,54.05,54.16,54.28,54.90,54.64,55.38
BeerBitter1,97.39,97.21,86.43,84.90,84.76,84.59,84.50,84.72,84.80,84.52,...,53.03,53.06,53.37,54.62,53.93,52.81,53.26,55.05,54.61,54.44
BeerBitter1,97.27,97.22,86.82,85.30,84.82,84.68,84.94,85.11,85.14,84.96,...,56.64,56.93,58.13,59.43,58.07,58.09,58.72,59.44,58.64,59.40
BeerBitter1,97.31,97.17,86.51,84.90,84.31,84.00,84.03,84.39,84.48,84.22,...,56.64,57.35,58.61,59.49,58.68,58.27,58.09,59.26,58.91,58.49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Beer,82.30,80.79,80.63,80.56,80.38,80.81,80.90,80.92,81.54,81.85,...,57.23,54.01,55.87,57.14,58.30,55.23,55.51,56.60,57.20,54.90
Beer,81.91,80.21,80.29,80.39,80.42,80.38,80.62,80.77,81.36,81.54,...,56.03,55.08,56.96,57.14,57.29,55.79,56.99,56.09,56.44,55.78
Beer,81.35,79.39,79.18,79.77,80.53,80.52,81.27,82.23,83.28,84.03,...,56.46,57.12,56.69,56.80,58.25,57.49,56.81,56.57,56.98,57.82
Beer,81.35,79.52,78.98,79.76,80.45,80.69,81.30,81.84,83.34,84.29,...,56.28,57.69,57.81,56.89,57.69,57.49,57.28,56.75,57.82,58.59


In [None]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
dir = "/Users/calum/git/Glasgow/sampleData/Beer x Bitter"
testfile = "HAN24_Sensor10_BeerBitter1_Rotation1.txt"
df = pd.read_csv(os.path.join(dir, testfile), sep='\t')

#label columns
reps = len(df.columns)-1
col_names = ['wavelength']
for r in range(reps):
	col_names.append(F"rep{r+1}")
df.columns = col_names
df

def filter_df(df, wl_min, wl_max, resample):

    df = df.loc[lambda dfn: dfn['wavelength'] > wl_min-1, :]
    df = df.loc[lambda dfn: dfn['wavelength'] < wl_max+1, :]

    wavel_new = np.arange(wl_min, wl_max, resample)
    result = {}
    for col in df:
        if col == 'wavelength':
            result[col] = wavel_new
        else: 
            maxval = df[col].max()
            df[col] = df[col] / maxval
            f = interp1d(df['wavelength'], df[col], 'linear')
        
            result[col] = f(wavel_new)
    return pd.DataFrame(result)



df = filter_df(df, wl_min=540, wl_max=730, resample=0.3)
df





