In [34]:
import pickle
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata, interpn
from mixture_composition_regression.examples.cellulose_example.helper_functions import *
from pathlib import Path

### Load models

In [35]:
predictor_files = ['cellulose_predictor.pkl', 'hemi_predictor.pkl', 'lignin_predictor.pkl', 'rot_predictor.pkl']
predictor_files = ['./trained_models/' + p for p in predictor_files]
predictor_metadata_files = [p.split('.pkl')[0] + '_meta.txt' for p in predictor_files] 
predictor_uncertainty_files = [p.split('.pkl')[0] + '_uncertainty.txt' for p in predictor_files]

ranges = read_range_files(predictor_metadata_files)
predictors = read_predictor_files(predictor_files)
uncertainties = read_uncertainty_files(predictor_uncertainty_files)
uncertainties = [i[0] for i in uncertainties]

containers = [[j,i] for i, j in zip(ranges, predictors)]
c_container = containers[0]
h_container = containers[1]
l_container = containers[2]
r_container = containers[3]

#### Get an appropriate x grid

In [36]:
fn = '/Users/ianbillinge/dev/mixture_composition_regression/mixture_composition_regression/examples/cellulose_example/expanded_training_set/lignocellulose_expanded_training_set/xgrid.csv'
xgrid = np.loadtxt(fn, delimiter=',')

### Read in data files

In [37]:
p = Path().resolve() / 'expanded_training_set' # define the path to the current data
extensions = ['.CSV'] #define list of extensions you want to glob
files = [path for path in p.rglob('*') if path.suffix in extensions] #create a list of paths containing that extension

data = [pd.read_csv(file, names=['x', 'y']) for file in files] #read files and trim spectral range
data = [df.where(df['x'] > xgrid.min()).where(df['x'] < xgrid.max()).dropna() for df in data]


data2 = []### for some reason, this does not work inplace -- a new list must be created
for df in data:
    yinterp = griddata(df['x'].values, df['y'].values, xgrid).T
    data2.append(pd.DataFrame([xgrid, yinterp]).T.dropna())

data2 = [df.rename(columns={df.columns[0]:'x',df.columns[1]:'y'}) for df in data2]

#overwrite data
data = data2
# del(data2)

In [53]:
def save_predictions_to_file(sample_names, l_list, c_list, h_list, r_list, uncertainties):
    print('We are in save')
    df = pd.DataFrame([sample_names, l_list, c_list, h_list, r_list],
                      # columns = ['sample', 'lignin', 'cellulose', 'hemicellulose', 'white rot']
                      )
    df = df.T
    df = df.rename(columns={df.columns[0]:'sample', df.columns[1]:'lignin', df.columns[2]: 'cellulose',
                            df.columns[3]: 'hemicellulose', df.columns[4]: 'white rot'})
    df=df.dropna()
    regressands = ['lignin', 'cellulose', 'hemicellulose', 'white rot']
    for i, dy in zip(regressands, uncertainties):
        df['uncertainty' + i] = 2 * float(dy) * np.ones(len(c_list))
    # df['total_predicted_weight_fraction'] = df['lignin'] + df['cellulose']+ df['hemicellulose'] + df['white rot']
    df.to_csv('results.csv')
    return df

In [54]:
files = p.rglob('*.CSV' and '*-*')
fpaths = [str(file) for file in list(files)]
c_list, h_list, l_list, r_list = [], [], [], []
sample_names = [i.split('.')[0].split('/')[-1] for i in fpaths]
regressand = 'da'
to_print=False
for f, n in zip(fpaths, sample_names):
    print(n)
    try:
        l = predict_on_test_csvs(f, l_container, regressand, 'lignin', sample_name=n,
                                 printres=to_print, xgrid=xgrid, print_sample=True)
        l_list.append(l)

        c = predict_on_test_csvs(f, c_container, regressand, 'cellulose',sample_name=n,
                                 xgrid = xgrid,
                                 printres=to_print)
        c_list.append(c)

        h = predict_on_test_csvs(f, h_container, regressand, 'hemicellulose',sample_name=n,
                                 xgrid = xgrid, printres=to_print)
        h_list.append(h)

        r = predict_on_test_csvs(f, r_container, regressand, 'rot', sample_name=n,
                                 xgrid = xgrid, printres=to_print)
        r_list.append(h)
    except ValueError as ve:
        print(ve)
        # print('This occurred while working on {}'.format(n))

df = save_predictions_to_file(sample_names, l_list, c_list, h_list, r_list, uncertainties)


Day 21-1
Sample: Day 21-1
Day 23-3
Sample: Day 23-3
Day 23-2
Sample: Day 23-2
Day 21-2
Sample: Day 21-2
Day 23-1
Sample: Day 23-1
Day 21-3
Sample: Day 21-3
Day 18-3
Sample: Day 18-3
Day 18-2
Sample: Day 18-2
Day 18-1
Sample: Day 18-1
Day 20-2
Sample: Day 20-2
Day 20-3
Sample: Day 20-3
Day 22-1
Sample: Day 22-1
Day 22-3
Sample: Day 22-3
Day 20-1
Sample: Day 20-1
Day 22-2
Sample: Day 22-2
Day 19-1
Sample: Day 19-1
Day 24-1
Sample: Day 24-1
Day 24-3
Sample: Day 24-3
Day 19-3
Sample: Day 19-3
Day 22-5
Sample: Day 22-5
Day 22-4
Sample: Day 22-4
Day 19-2
Sample: Day 19-2
Day 24-2
Sample: Day 24-2
Day 28-2
Sample: Day 28-2
Day 15-2
Sample: Day 15-2
Day 15-3
Sample: Day 15-3
Day 17-1
Sample: Day 17-1
Day 28-3
Sample: Day 28-3
Day 28-1
Sample: Day 28-1
Day 17-3
Sample: Day 17-3
Day 15-1
Sample: Day 15-1
Day 17-2
Sample: Day 17-2
Day 13-2
Sample: Day 13-2
CF11-3
Sample: CF11-3
Day 15-4
Sample: Day 15-4
CF11-2
Sample: CF11-2
Day 11-1
Sample: Day 11-1
Day 13-3
Sample: Day 13-3
Day 13-1
Sample: Day