In [3]:
import numpy as np
import pandas as pd
import sys
import os
sys.path.append(os.path.abspath("..")) 
from pybmc.bmc import BayesianModelCombination
from pybmc.inference_utils import USVt_hat_extraction
from pybmc.data import Dataset

models = [
    'AME2020', 'ME2', 'MEdelta', 'PC1', 'NL3S', 'SKMS', 'SKP', 'SLY4',
    'SV', 'UNEDF0', 'UNEDF1', 'UNEDF2', 'FRDM12', 'HFB24', 'BCPM', 'D1M'
]
properties = ["BE"]
domain_keys = ["N", "Z"]

# Load property DataFrames
dataset = Dataset("selected_data.h5")



property_data = dataset.load_data(models=models, keys=properties, domain_keys=domain_keys) 
print('property data:', property_data.keys())
for prop, df in property_data.items():
    print(f"{prop} DataFrame shape: {df.shape}")
    print(df.head())

# Use .get_subset() to filter by Z range for BE
filtered_df = dataset.get_subset(
    property_name="BE",
    filters={"Z": (26, 28)},
    models_to_include=['ME2', 'NL3S', 'SKP']  # Optional
)

print("\nFiltered BE data from get_subset:")
print(filtered_df.head())

print("\n========== Testing `view_data` Method ==========")
# 1. View available models and keys
info = dataset.view_data()
print("Available models and keys:")
print(info)

# 2. View full data for a specific model 
print("\nFull DataFrame for model 'ME2':")
df_me2 = dataset.view_data(model_name='ME2')
print(df_me2)

# 3. View 'BE' key across all models
print("\n'BE' values across all models:")
be_values = dataset.view_data(property_name='BE')
print(be_values.head())

# 4. View 'BE' values for model 'SKP'
print("\n'BE' values for model 'SKP':")
be_skp = dataset.view_data(model_name='SKP', property_name='BE')
print(be_skp.head())

# Split data using the updated split_data method
train_data_be, val_data_be, test_data_be = dataset.split_data( 
    data_dict=property_data,
    property_name="BE",
    splitting_algorithm="random",
    train_size=0.7, val_size=0.15, test_size=0.15
)

print("\nTrain data:")
print(train_data_be.head())
print("\nValidation data:")
print(val_data_be.head())
print("\nTest data:")
print(test_data_be.head())

# For BMC, use all model columns except AME2020 (which is used as truth)
models_list = train_data_be.columns.tolist()
#print("\nModel columns for BMC:", models_list)

# Initialize BMC, orthogonalize, train, and predict
bmc = BayesianModelCombination(models_list=models_list, data_dict=property_data, truth_column_name="AME2020") 
bmc.orthogonalize(property="BE", train_df=train_data_be, components_kept=3) 
bmc.train(training_options={"iterations": 10000, "sampler": 'Gibbs_sampling'}) 
print(f"\nNumber of models used: {bmc.Vt_hat.shape[1]}")


# Predict
rndm_m, lower_df, median_df, upper_df = bmc.predict2(property="BE") 


print("\nBayesianModelCombination results:")
print("Predicted mean:", rndm_m)
print("Predicted upper CI:", upper_df.head())
print("Predicted median:", median_df.head())
print("Predicted lower CI:", lower_df.head())

# Evaluate
eval_results=bmc.evaluate() #type: ignore 
print("\nEvaluation results:")
print(eval_results)




property data: dict_keys(['BE'])
BE DataFrame shape: (629, 18)
    N  Z     AME2020      ME2  MEdelta      PC1     NL3S        SKMS  \
0   8  8  127.619315  126.738  129.026  127.455  128.114  128.856436   
1  10  8  139.807766  140.156  141.992  141.423  141.715  144.746257   
2  12  8  151.371414  151.224  152.793  153.215  153.432  158.460613   
3  14  8  162.027188  160.513  161.884  163.303  163.311  170.486446   
4  16  8  168.952452  167.472  168.465  170.768  170.970  178.908577   

          SKP        SLY4          SV      UNEDF0      UNEDF1      UNEDF2  \
0  128.904993  129.839429  127.368236  126.708495  121.705460  121.950687   
1  144.484846  144.734392  141.963546  142.040342  136.123181  136.259511   
2  156.849135  156.488630  153.746333  154.121072  147.620242  148.128389   
3  167.278899  166.483309  163.855872  164.192707  157.219361  158.409799   
4  174.420231  173.303716  170.930478  171.196394  163.998623  165.167568   

   FRDM12       HFB24        BCPM        

### Workflow: 

1. Initialize Dataset class
2. Use load_data method to load the data
3. Use split_data to get training data
4. Get a list of the models being used (this is needed for BMC initialization)
5. Initialize BMC class
6. Orthogonalize
7. Train
8. Predict
9. Evaluate

In [2]:
from sampling_utils import rndm_m_random_calculator

preds = test_data_be[models_list].to_numpy()  

samples = bmc.samples  

VT_hat = bmc.Vt_hat  

%time rndm_m_random_calculator(preds, samples, VT_hat)


ValueError: a must be a sequence or an integer, not <class 'NoneType'>

In [3]:
from sampling_utils import coverage
rndm_m, (lower, median, upper) = rndm_m_random_calculator(preds, samples, VT_hat)
df=bmc.data_dict["BE"]
truth_column_name = bmc.truth_column_name

%time coverage(np.arange(0, 101, 5), rndm_m, df, truth_column=truth_column_name)

ValueError: a must be a sequence or an integer, not <class 'NoneType'>