In [1]:
import pickle
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
from mixture_composition_regression.examples.cellulose_example.helper_functions import *
from pathlib import Path

### Load models

In [2]:
predictor_files = ['cellulose_predictor.pkl', 'hemi_predictor.pkl', 'lignin_predictor.pkl']
predictor_files = ['./trained_models/' + p for p in predictor_files]
predictor_metadata_files = [p.split('.pkl')[0] + '_meta.txt' for p in predictor_files] 

ranges = read_range_files(predictor_metadata_files)
predictors = read_predictor_files(predictor_files)
containers = [[j,i] for i, j in zip(ranges, predictors)]
c_container = containers[0]
h_container = containers[1]
l_container = containers[2]

### Read in data files

In [3]:
p = Path('.').resolve() / 'data' # define the path to the current data
files = p.rglob('*.CSV' and '*-*') # find all the files with *.CSV in their name and a dash (i.e. to exclude composition.csv)
file_list = [str(file) for file in list(files)] # create a list with the string version of the filepath

# creat a list of dataframes


# Get the x coordinate that we want to re-grid our data onto. 
# This x coordinate *must* be the same as what the model was trained on.
# In this case, the training data is in the same folder, so we can use the x coordinate of one of the
# files as our xgrid.
data = [pd.read_csv(file) for file in file_list]
data = [df.rename(columns={df.columns[0]:'x', df.columns[1]:'y'}) for df in data]
xgrid = data[0]['x'] 


In [4]:
# files = p.rglob('*.CSV' and '*-*')
# fpaths = [str(file) for file in list(files)]
c_list, h_list, l_list = [], [], []
sample_names = [i.split('.')[0].split('/')[-1] for i in file_list]
regressand = 'da'
for f, n in zip(file_list, sample_names):
    
    l = predict_on_test_csvs(f, l_container, regressand, 'lignin', sample_name=n, 
                             printres=True, xgrid=xgrid, print_sample=True)
    l_list.append(l)
    
    c = predict_on_test_csvs(f, c_container, regressand, 'cellulose',sample_name=n, 
                             xgrid = xgrid,
                             printres=True)
    c_list.append(c)
    
    h = predict_on_test_csvs(f, h_container, regressand, 'hemicellulose',sample_name=n, 
                             xgrid = xgrid, printres=True)
    h_list.append(h)


Sample: T2-1
predicted composition lignin 0.542
predicted composition cellulose 0.320
predicted composition hemicellulose 0.111
Sample: T2-3
predicted composition lignin 0.723
predicted composition cellulose 0.212
predicted composition hemicellulose -0.042
Sample: T2-2
predicted composition lignin 0.723
predicted composition cellulose 0.306
predicted composition hemicellulose 0.085
Sample: T6-2
predicted composition lignin -0.174
predicted composition cellulose 0.434
predicted composition hemicellulose 0.648
Sample: T6-3
predicted composition lignin -0.650
predicted composition cellulose 0.760
predicted composition hemicellulose 1.474
Sample: T4-1
predicted composition lignin 0.320
predicted composition cellulose 0.298
predicted composition hemicellulose 0.295
Sample: T4-3
predicted composition lignin 0.653
predicted composition cellulose 0.179
predicted composition hemicellulose 0.146
Sample: T6-1
predicted composition lignin -0.207
predicted composition cellulose 0.541
predicted comp

In [5]:
h_list

[0.11148708376677083,
 -0.041539163256267664,
 0.08546475232822473,
 0.6478641250537006,
 1.47412161625826,
 0.29464558152375936,
 0.1456686723073324,
 0.6217946121625197,
 0.24207842646643238,
 0.3427904920292675,
 0.491973355774596,
 0.38691271919039477,
 0.4486038848687533,
 0.5131299732690165,
 0.34325488672969606,
 0.6706717281066712,
 0.14639589469835368,
 0.19602295736657027,
 0.7043913851568123,
 0.025602825046464384,
 0.5638744342172191,
 0.40239462855739083,
 0.3612796135195795]