# Proof of concept validation notebook for LEPR upload

In [None]:
import pandas as pd
import numpy as np
import xlrd
import logging

In [None]:
log_filename = 'validation.log'
logging.basicConfig(filename=log_filename,
                    filemode='w',
                    format="%(levelname)s(%(funcName)s): %(message)s")

def print_log_file(log_filename=log_filename):
    with open(log_filename, 'r') as fin:
        print(fin.read())

In [None]:
upload_data = pd.read_excel('../data/upload_validation.xlsx', sheet_name=None)

In [None]:
def extract_chem_dat(upload_data):
    run_products = upload_data['6 Run Products']
    
    header_row_num = 4
    chem_dat_col_index = 13
    run_names = run_products.iloc[header_row_num+1:,0]

    dat = run_products.iloc[:,chem_dat_col_index:]
    dat.columns = dat.iloc[0]
    dat = dat.iloc[1:]
    chem_dat_info = dat.iloc[:2]
    chem_dat_info.index = ['method_id','unit']

    chem_dat = dat.iloc[header_row_num:]
    chem_dat
    chem_dat.index = run_names

    return chem_dat, chem_dat_info

In [None]:
chem_dat, chem_dat_info = extract_chem_dat(upload_data)
chem_dat_info

In [None]:
chem_dat.head()

In [None]:
def validate_chem_error_columns(chem_dat_info):
    columns = chem_dat_info.columns
    meas_cols = [col for col in columns if not col.endswith('_err') ]
    for col in meas_cols:
        if col+'_err' not in columns:
            logging.error(f'{col}_err missing from chemistry data columns')
            

def validate_chem_units(chem_dat_info):
    for (col, dat) in chem_dat_info.T.iterrows():
        if dat.unit is np.nan:
            logging.critical(f'{col} does not provide any units')
            
def validate_chem_method(chem_dat_info):
    for (col, dat) in chem_dat_info.T.iterrows():
        if dat.method_id is np.nan:
            logging.critical(f'{col} does not provide any method id')

In [None]:
validate_chem_error_columns(chem_dat_info)
validate_chem_units(chem_dat_info)
validate_chem_method(chem_dat_info)

In [None]:
print_log_file()