In [1]:
import pickle
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
from mixture_composition_regression.examples.cellulose_example.helper_functions import *
from pathlib import Path

### Load models

In [2]:
predictor_files = ['cellulose_predictor.pkl', 'hemi_predictor.pkl', 'lignin_predictor.pkl']
predictor_files = ['./trained_models/' + p for p in predictor_files]
predictor_metadata_files = [p.split('.pkl')[0] + '_meta.txt' for p in predictor_files] 

ranges = read_range_files(predictor_metadata_files)
predictors = read_predictor_files(predictor_files)
containers = [[j,i] for i, j in zip(ranges, predictors)]
c_container = containers[0]
h_container = containers[1]
l_container = containers[2]

### Read in data files

In [5]:
p = Path('.').resolve() / 'data' # define the path to the current data
files = p.rglob('*.CSV' and '*-*') # find all the files with *.CSV in their name and a dash (i.e. to exclude composition.csv)
file_list = [str(file) for file in list(files)] # create a list with the string version of the filepath

# creat a list of dataframes


# Get the x coordinate that we want to re-grid our data onto. 
# This x coordinate *must* be the same as what the model was trained on.
# In this case, the training data is in the same folder, so we can use the x coordinate of one of the
# files as our xgrid.
data = [pd.read_csv(file) for file in file_list]
data = [df.rename(columns={df.columns[0]:'x', df.columns[1]:'y'}) for df in data]
xgrid = data[0]['x'] 


In [6]:
# files = p.rglob('*.CSV' and '*-*')
# fpaths = [str(file) for file in list(files)]
c_list, h_list, l_list = [], [], []
sample_names = [i.split('.')[0].split('/')[-1] for i in file_list]
regressand = 'da'
for f, n in zip(file_list, sample_names):
    
    l = predict_on_test_csvs(f, l_container, regressand, 'lignin', sample_name=n, 
                             printres=True, xgrid=xgrid, print_sample=True)
    l_list.append(l)
    
    c = predict_on_test_csvs(f, c_container, regressand, 'cellulose',sample_name=n, 
                             xgrid = xgrid,
                             printres=True)
    c_list.append(c)
    
    h = predict_on_test_csvs(f, h_container, regressand, 'hemicellulose',sample_name=n, 
                             xgrid = xgrid, printres=True)
    h_list.append(h)


Sample: T2-1
predicted composition lignin 0.466
predicted composition cellulose 0.339
predicted composition hemicellulose 0.119
Sample: T2-3
predicted composition lignin 0.700
predicted composition cellulose 0.173
predicted composition hemicellulose 0.099
Sample: T2-2
predicted composition lignin 0.633
predicted composition cellulose 0.257
predicted composition hemicellulose 0.184
Sample: T6-2
predicted composition lignin -0.223
predicted composition cellulose 0.449
predicted composition hemicellulose 0.014
Sample: T6-3
predicted composition lignin -0.915
predicted composition cellulose 0.076
predicted composition hemicellulose -0.060
Sample: T4-1
predicted composition lignin 0.284
predicted composition cellulose 0.203
predicted composition hemicellulose 0.357
Sample: T4-3
predicted composition lignin 0.496
predicted composition cellulose 0.061
predicted composition hemicellulose 0.472
Sample: T6-1
predicted composition lignin -0.375
predicted composition cellulose 0.410
predicted comp

In [7]:
h_list

[0.11883140343335175,
 0.09931611050524802,
 0.18382854433949802,
 0.014497000810912652,
 -0.059724562029515704,
 0.35737488936021733,
 0.4722829371776869,
 -0.07689059619481942,
 0.4047921992139709,
 0.523516496588706,
 0.6811049475584094,
 0.6342952455821715,
 0.47497540775788294,
 0.6911096635972733,
 0.4676437163990741,
 0.7112970402139769,
 0.16716471684538514,
 0.1392320707640875,
 0.4718438618195939,
 0.163402615849072,
 0.21079345463045818,
 -0.08880360557277989,
 -0.08271016069858383]