In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython
import os
import time
import re
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from pearlsim.ml_utilities import *
import pickle

# Load Data
The data used in this notebook is created in the generate_kernal_burnup_data notebook. It requires running a lot of Serpent calculations, hence why I made it a separate notebook. First we load and examine the data.

The input features to this model include:
- Depletion Time: the time in days that the pebble has been exposed to flux
- Surface current: Labeled by energy bin, the number of incoming neutrons at a certain energy group imposing on the surface of the pebble per second
- Power: The power produced by the pebble in Watts
- Starting isotope concentrations: The amount of each nuclide present in the pebble in atom density. 

The output features to this model include:
- Ending isotope concentrations: The amount of each nuclide present in the pebble AFTER a burnup step.

Note that the nuclide concentrations and surface current values can be quite large and span orders of magnitude.

In [2]:
features = pd.read_csv("training_data/burnup_features.csv", index_col=0)
targets = pd.read_csv("training_data/burnup_target.csv", index_col=0)

In [3]:
features.iloc[0:10]

Unnamed: 0,depletion_time,2.9005e-08,9.9e-08,2.1e-07,4.525e-07,7.985000000000001e-07,1.061e-06,1.5025e-06,2.9275e-06,6.9385e-06,...,62147<lib>,62148<lib>,62149<lib>,62150<lib>,62151<lib>,621520,631510,92235<lib>,92238<lib>,power
0,4.6,212615000000000.0,519518000000000.0,574816000000000.0,397387000000000.0,126460000000000.0,23760000000000.0,89569500000000.0,141658000000000.0,162429000000000.0,...,2.135803e-08,3.553372e-08,8.929475e-08,1e-06,2.015638e-07,2.152646e-07,7.349988e-11,0.004016,0.018843,1896
1,3.0,230194000000000.0,565397000000000.0,615775000000000.0,417881000000000.0,113175000000000.0,28290200000000.0,112128000000000.0,164923000000000.0,191586000000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00467,0.018973,1760
2,4.6,136129000000000.0,336587000000000.0,360700000000000.0,182265000000000.0,26720600000000.0,1511560000000.0,29705300000000.0,28703600000000.0,51003800000000.0,...,2.226552e-07,5.75714e-07,8.451241e-08,4e-06,2.399743e-07,9.411487e-07,2.997949e-10,0.003077,0.018654,2002
3,6.525,443310000000000.0,981535000000000.0,947602000000000.0,565041000000000.0,133709000000000.0,59457000000000.0,108609000000000.0,172727000000000.0,224572000000000.0,...,4.947674e-08,1.259036e-07,7.752272e-08,3e-06,2.264037e-07,5.364294e-07,9.774726e-11,0.003546,0.018794,1052
4,6.525,209367000000000.0,452159000000000.0,434186000000000.0,247601000000000.0,50476900000000.0,23741900000000.0,50771400000000.0,96928800000000.0,87056200000000.0,...,7.428893e-07,2.32859e-06,9.011014e-08,8e-06,2.453769e-07,1.935729e-06,6.223075e-10,0.002208,0.018457,1362
5,0.2,297101000000000.0,655505000000000.0,667310000000000.0,340663000000000.0,42865100000000.0,8646920000000.0,46686200000000.0,71351800000000.0,71755400000000.0,...,9.186741e-07,3.03658e-06,9.194794e-08,9e-06,2.463628e-07,2.384741e-06,6.883e-10,0.001904,0.018371,1553
6,6.525,293695000000000.0,781394000000000.0,891775000000000.0,515080000000000.0,166490000000000.0,53423900000000.0,154078000000000.0,207320000000000.0,227259000000000.0,...,7.769062e-07,2.086122e-06,8.009705e-08,7e-06,2.43098e-07,1.784846e-06,6.126424e-10,0.002343,0.018508,2989
7,6.525,216425000000000.0,478366000000000.0,628317000000000.0,412866000000000.0,119512000000000.0,26987900000000.0,85269100000000.0,139531000000000.0,176797000000000.0,...,7.91255e-07,2.363068e-06,9.769413e-08,8e-06,2.466771e-07,1.971257e-06,6.331971e-10,0.002185,0.018455,2137
8,6.525,427532000000000.0,1207050000000000.0,1235520000000000.0,722365000000000.0,165815000000000.0,60697400000000.0,170488000000000.0,223405000000000.0,246294000000000.0,...,9.082656e-07,2.97432e-06,8.184917e-08,9e-06,2.48232e-07,2.467873e-06,7.002601e-10,0.001854,0.018356,1231
9,6.525,237863000000000.0,653853000000000.0,641459000000000.0,424504000000000.0,112248000000000.0,36079200000000.0,134768000000000.0,172272000000000.0,189337000000000.0,...,5.987565e-07,2.172235e-06,8.440447e-08,8e-06,2.528084e-07,1.95494e-06,4.990086e-10,0.002222,0.018485,2211


In [4]:
targets.iloc[0:10]

Unnamed: 0,10010,10020,10030,20030,20040,40090,50110,50120,6012<lib>,60130,...,621460,62147<lib>,62148<lib>,62149<lib>,62150<lib>,62151<lib>,621520,631510,92235<lib>,92238<lib>
0,6.769255e-11,3.56374e-13,2.012836e-15,7.172889e-19,6.572792e-06,1.104684e-06,0.0,0.0,0.01169,0.000131,...,9.818149e-12,3.272728e-08,5.299093e-08,3.045631e-07,1.151985e-06,2.522159e-07,2.157895e-07,9.579983e-11,0.003985,0.018783
1,6.265255e-13,5.075438e-15,5.1063750000000007e-17,7.772492e-21,4.492448e-06,7.602572e-07,0.0,0.0,0.01169,0.000129,...,1.066661e-15,1.498533e-11,4.693565e-12,2.435013e-08,5.899767e-11,1.229243e-08,6.458702e-11,3.052743e-13,0.004651,0.018936
2,3.201253e-10,2.142781e-12,4.02788e-15,1.995947e-18,8.79422e-06,1.482491e-06,0.0,0.0,0.01169,0.000133,...,1.156139e-10,2.579135e-07,6.506087e-07,3.573398e-07,3.861202e-06,2.933327e-07,9.4155e-07,3.259746e-10,0.003052,0.018581
3,1.098132e-10,5.730922e-13,2.965919e-15,1.533621e-18,6.058036e-06,1.017553e-06,0.0,0.0,0.01169,0.00013,...,2.201921e-11,8.205858e-08,1.770346e-07,4.442153e-07,2.539061e-06,2.942143e-07,5.367994e-07,1.34972e-10,0.003525,0.018743
4,6.473544e-10,5.617058e-11,5.426978e-15,3.6288810000000005e-17,9.110679e-06,1.52663e-06,0.0,1.0289019999999999e-240,0.01169,0.000133,...,3.855325e-10,8.198505e-07,2.496571e-06,3.790535e-07,7.64124e-06,2.922954e-07,1.936297e-06,6.596372e-10,0.002188,0.01838
5,2.050532e-10,7.132536e-11,8.495759e-15,1.182674e-16,4.444962e-07,6.089275e-08,0.0,7.296525e-199,0.011691,0.000126,...,1.729434e-11,9.210681e-07,3.049576e-06,1.202929e-07,9.176598e-06,2.49617e-07,2.384757e-06,6.893464e-10,0.001903,0.018368
6,1.634065e-09,1.208342e-11,7.38626e-15,1.4080290000000002e-17,2.017378e-05,3.395273e-06,0.0,0.0,0.011688,0.000142,...,8.979034e-10,8.551341e-07,2.183547e-06,3.474216e-07,7.201413e-06,3.178252e-07,1.786114e-06,6.509571e-10,0.002297,0.018341
7,1.251478e-09,4.898536e-11,8.102288e-15,3.8508850000000006e-17,1.522162e-05,2.551214e-06,0.0,0.0,0.011689,0.000138,...,6.849625e-10,8.678753e-07,2.591637e-06,5.308206e-07,7.720657e-06,3.169369e-07,1.972139e-06,6.723985e-10,0.002154,0.018333
8,8.057072e-10,1.063038e-11,4.186106e-15,1.9210460000000002e-17,9.005983e-06,1.508669e-06,0.0,0.0,0.01169,0.000133,...,4.625929e-10,9.924497e-07,3.169487e-06,4.024991e-07,9.157709e-06,2.940064e-07,2.468391e-06,7.379829e-10,0.001838,0.018283
9,7.822523e-10,6.315778e-12,7.560371e-15,4.899945e-18,1.521735e-05,2.554512e-06,0.0,0.0,0.011689,0.000138,...,5.286155e-10,6.770015e-07,2.3392e-06,4.478601e-07,7.965891e-06,3.224564e-07,1.955839e-06,5.388983e-10,0.00219,0.01836


# Data Standardization
Simple standardization is performed here along each column. Log-standardization on the nuclide concentrations and current values gave a massive improvement to the model accuracy for me. It may not be necessary for the current values.

In [5]:
train_split = 0.8
np.random.seed(42)

def standardize(raw_data, mean=None, std=None, axis=0):
    if mean is None:
        mean = np.mean(raw_data, axis = axis)
    if std is None:
        std = np.std(raw_data, axis = axis)
        std[ std==0 ] = 0.1
    result = (raw_data - mean) / std
    return result, mean, std

def unstandardize(standardized_data, mean, std):
    raw_data = (standardized_data*std)+mean
    return raw_data

log_features = features.apply(lambda x: np.log10(x + 1))
log_targets = targets
log_targets = targets.apply(lambda x: np.log10(x + 1))
log_features['power'] = features['power']
log_features['depletion_time'] = features['depletion_time']

num_data = len(features)
training_size = int(num_data*train_split)
testing_size = num_data - training_size
data_indices = np.arange(num_data)
training_indices = np.random.choice(num_data, training_size, replace=False)
testing_indices = data_indices[np.in1d(data_indices, training_indices, invert=True)]

training_data, data_mean, data_std = standardize(log_features.iloc[training_indices])
training_target, target_mean, target_std = standardize(log_targets.iloc[training_indices])
testing_data, _, _  = standardize(log_features.iloc[testing_indices], mean=data_mean, std=data_std)
testing_target, _, _  = standardize(log_targets.iloc[testing_indices], mean=target_mean, std=target_std)

print(np.shape(training_data))
print(np.shape(training_target))
print(np.shape(testing_data))
print(np.shape(testing_target))

(400, 424)
(400, 404)
(100, 424)
(100, 404)


# Model Training
I threw together a quick RFR model and got some results. You're free to change to any other type of model, as long as its something I can save and load into other modules. Things to try:
- Properly using cross validation
- Tuning the hyper parameters
- Trying a different model, probably a neural net

In [6]:
best_params = {'max_depth': 10, 
               'n_estimators': 1000, 
               'n_jobs': 20,} 
rfr_model = RandomForestRegressor(random_state=0)
rfr_model.set_params(**best_params)
rfr_model.fit(training_data, training_target)
rfr_model_test_score = rfr_model.score(testing_data, testing_target)
print(f"RFR score: {rfr_model_test_score}")

RFR score: 0.9097377320041374




Next we save the models and standardization parameters so the model can be used in the simulation.

In [7]:
model_data = pickle.dumps(rfr_model)
with open("ml_models/burnup_rfr.pkl", 'wb') as f:
    f.write(model_data)
data_mean.to_csv("ml_models/burnup_rfr_data_mean.csv", header=True)
data_std.to_csv("ml_models/burnup_rfr_data_std.csv", header=True)
target_mean.to_csv("ml_models/burnup_rfr_target_mean.csv", header=True)
target_std.to_csv("ml_models/burnup_rfr_target_std.csv", header=True)

KeyboardInterrupt: 