In [42]:
import mlflow
from lib.reproduction import training_info
from pathlib import Path
import pickle

mlflow.set_tracking_uri("http://localhost:5000")

In [43]:
client = mlflow.MlflowClient()

kv = {}
# for oxide, v in training_info.items():
#     sub = {}
#     for comp_range_name, vv in v.items():
#         model_name = f"{oxide}_{comp_range_name}_outlier_removal"
#         models = client.search_model_versions(f"name = '{model_name}'")
#         model = mlflow.pyfunc.load_model(models[0].source)
#         sub[comp_range_name] = model
        
#     kv[oxide] = sub


models_path = Path("../models/PLS_Models_12-11-23_174732/")
for oxide, v in training_info.items():
    sub = {}
    for comp_range_name, vv in v.items():
        model_name = f"{oxide}_{comp_range_name}/model.pkl"
        model = pickle.load(open(models_path / model_name, "rb"))
        sub[comp_range_name] = model

    kv[oxide] = sub


kv

{'SiO2': {'Full': PLSRegression(n_components=6),
  'Low': PLSRegression(n_components=9),
  'Mid': PLSRegression(n_components=6),
  'High': PLSRegression(n_components=5)},
 'TiO2': {'Full': PLSRegression(n_components=5),
  'Low': PLSRegression(n_components=7),
  'Mid': PLSRegression(n_components=5),
  'High': PLSRegression(n_components=3)},
 'Al2O3': {'Full': PLSRegression(n_components=6),
  'Low': PLSRegression(n_components=6),
  'Mid': PLSRegression(n_components=8),
  'High': PLSRegression(n_components=6)},
 'FeOT': {'Full': PLSRegression(n_components=8),
  'Low': PLSRegression(n_components=3),
  'Mid': PLSRegression(n_components=8),
  'High': PLSRegression(n_components=3)},
 'MgO': {'Full': PLSRegression(n_components=7),
  'Low': PLSRegression(n_components=6),
  'Mid': PLSRegression(n_components=9),
  'High': PLSRegression(n_components=8)},
 'CaO': {'Full': PLSRegression(n_components=8),
  'Low': PLSRegression(n_components=9),
  'Mid': PLSRegression(n_components=9),
  'High': PLSRegr

In [44]:
import dotenv
from lib.data_handling import load_data
env = dotenv.dotenv_values(dotenv.find_dotenv())

dataset_loc = env.get('DATA_PATH') or ""
print(dataset_loc)

smol_data = load_data(
    "../data/data/calib/calib_2015/1600mm/pls/",
    10
)

data/data/calib/calib_2015/1600mm/pls


Loading data: 100%|██████████| 10/10 [00:02<00:00,  4.24it/s]


In [45]:
from lib.data_handling import CustomSpectralPipeline
from lib.reproduction import masks, major_oxides

compositon_data_path = env.get('COMPOSITION_DATA_PATH') or ""
print(compositon_data_path)

pipeline = CustomSpectralPipeline(
    masks,
    "../data/data/calib/ccam_calibration_compositions.csv",
    major_oxides
)

processed_data = pipeline.fit_transform(smol_data)
processed_data.head()

data/data/calib/ccam_calibration_compositions.csv


Transforming samples: 100%|██████████| 10/10 [00:00<00:00, 14.08it/s]


Unnamed: 0,246.688,246.741,246.79401,246.847,246.89999,246.953,247.007,247.06,247.11301,247.166,...,848.89642,SiO2,TiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,Sample Name
0,399858900000.0,362310200000.0,312583400000.0,254814300000.0,238410900000.0,253572700000.0,286705500000.0,318151900000.0,322830800000.0,310822000000.0,...,22935410000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
1,310111500000.0,276084000000.0,229415300000.0,178971400000.0,160254600000.0,173736500000.0,203343300000.0,230790300000.0,238090000000.0,228648400000.0,...,22246580000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
2,478439500000.0,436485900000.0,378726100000.0,300735100000.0,286992900000.0,306050200000.0,343736600000.0,386255000000.0,389610300000.0,375093400000.0,...,22395920000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
3,540226600000.0,497416500000.0,426643200000.0,335467000000.0,323240200000.0,343319800000.0,384717500000.0,429803100000.0,431385900000.0,417308900000.0,...,1634240000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
4,492592700000.0,445791700000.0,386244100000.0,306331800000.0,298727300000.0,319164800000.0,356717200000.0,390994000000.0,397830100000.0,383662200000.0,...,22341640000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399


In [46]:
from lib.norms import Norm1Scaler, Norm3Scaler
import lib.norms
import importlib
importlib.reload(lib.norms)

drop_cols = major_oxides + ['Sample Name']

scaler = Norm1Scaler(reshaped=True)
X = scaler.fit_transform(processed_data.drop(drop_cols, axis=1))

y = processed_data['SiO2']
X[0:5]

Unnamed: 0,246.688,246.741,246.79401,246.847,246.89999,246.953,247.007,247.06,247.11301,247.166,...,847.10272,847.30212,847.50153,847.70081,847.90009,848.09943,848.29871,848.49799,848.6972,848.89642
0,0.000583,0.000528,0.000456,0.000371,0.000348,0.00037,0.000418,0.000464,0.000471,0.000453,...,3.6e-05,3.4e-05,3.2e-05,3.1e-05,3e-05,3e-05,3.00305e-05,3.051429e-05,3.1e-05,3.3e-05
1,0.000515,0.000458,0.000381,0.000297,0.000266,0.000288,0.000338,0.000383,0.000395,0.00038,...,3.9e-05,3.7e-05,3.5e-05,3.4e-05,3.4e-05,3.4e-05,3.333443e-05,3.391175e-05,3.5e-05,3.7e-05
2,0.000778,0.00071,0.000616,0.000489,0.000467,0.000498,0.000559,0.000628,0.000634,0.00061,...,3.8e-05,3.6e-05,3.4e-05,3.4e-05,3.3e-05,3.4e-05,3.290123e-05,3.329541e-05,3.4e-05,3.6e-05
3,0.000794,0.000731,0.000627,0.000493,0.000475,0.000505,0.000565,0.000632,0.000634,0.000613,...,9e-06,6e-06,4e-06,3e-06,2e-06,1e-06,8.131556e-07,6.993754e-07,1e-06,2e-06
4,0.000755,0.000683,0.000592,0.000469,0.000458,0.000489,0.000546,0.000599,0.000609,0.000588,...,3.6e-05,3.4e-05,3.2e-05,3.2e-05,3.1e-05,3.1e-05,3.073763e-05,3.11551e-05,3.2e-05,3.4e-05


In [47]:
from sklearn.metrics import mean_squared_error
predictions = model.predict(X[:1])
predictions
# out = mean_squared_error(predictions, y[:1], squared=False)
# out/100




array([1.43265354])

In [50]:
def get_weights(y_full, blend_range_min, blend_range_max):
    """
    Helper function to calculate weights for blending predictions.
    """
    w_upper = (y_full - blend_range_min) / (blend_range_max - blend_range_min)
    w_lower = 1 - w_upper
    return w_lower, w_upper


def predict_composition_with_blending(oxide, y_full, X, models, optimized_blending_ranges):
    """
    Predicts the composition of the given oxide based on the full model prediction (y_full)
    and the optimized blending ranges, including blending between "mid" and "high" models as well as "low" and "mid" models.
    """
    # Check for non-blending range predictions first
    blend_ranges = ["Low-Mid", "Mid-High"]

    for range_name, (range_min, range_max) in optimized_blending_ranges[oxide].items():
        if range_min <= y_full <= range_max and range_name not in blend_ranges:
            return models[oxide][range_name](y_full)

    for blend_range in blend_ranges:
        # Check if blend_range is defined for the given oxide
        if blend_range in optimized_blending_ranges[oxide]:
            blend_range_min, blend_range_max = optimized_blending_ranges[oxide][blend_range]

            # Check if y_full is within the defined blending range
            if blend_range_min <= y_full <= blend_range_max:
                w_lower, w_upper = get_weights(y_full, blend_range_min, blend_range_max)

                y_lower = models[oxide][blend_range.split("-")[0]].predict(X)
                y_upper = models[oxide][blend_range.split("-")[1]].predict(X)

                y_final = w_lower * y_lower + w_upper * y_upper
                print(f"y_lower: {y_lower}, y_upper: {y_upper}, y_final: {y_final}")
                print(f"w_lower: {w_lower}, w_upper: {w_upper}")

                return y_final

    # Error if y_full is outside any defined range
    raise ValueError(f"y_full value {y_full} for oxide {oxide} is outside the defined blending ranges.")

In [52]:
from lib.reproduction import optimized_blending_ranges  # noqa: E402

y_full = kv['SiO2']['Full'].predict(X[:1])
print(y_full)
kv['SiO2']['Low'].predict(X[:1])
# predict_composition_with_blending('SiO2', y_full, X[:1], kv, optimized_blending_ranges)

[54.13805783]




array([3792.26896365])