In [None]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("../..")
print(os.getcwd())
import torch
import numpy as np
import random

os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' 
seed = 21
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.use_deterministic_algorithms(False)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

# Select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

In [None]:
# Load model
from modules.model_loader import load_model
model = load_model('notebooks/inference_phase/model_weights/hard_constrained_final.pt', constraint_type='hard')
model = model.to(device)

# TEST SUBSET EVALUATION

In [None]:
from modules.data_pipeline import DataPipeline
from torch_geometric.loader import DataLoader
import modules.datasplit_module as dsm
pipeline = DataPipeline(components_csv='datasets/components.csv')
_ , graph_list = pipeline.run_pipeline(raw_csv='datasets/mixed/all_data.csv')
# --- Split graphs ---
random.shuffle(graph_list)
sampled_graph_list = graph_list
train, val, test = \
    dsm.system_disjoint_split(sampled_graph_list, random_state=seed, stratify_by_components=True)

In [None]:
# 2. --- Evaluate based on mixture type ---
test_binary = [data for data in test if data.component_mole_frac.shape[0] == 2]
test_ternary = [data for data in test if data.component_mole_frac.shape[0] == 3]
test_binary  = [d.to('cpu') for d in test_binary]
test_ternary = [d.to('cpu') for d in test_ternary]

print(f"Original Test Size: {len(test)}")
print(f"Binary Test Size:   {len(test_binary)}")
print(f"Ternary Test Size:  {len(test_ternary)}")

# 3. Create Loaders
train_loader = DataLoader(
    dataset=train,
    batch_size=1024,
    shuffle=True,
    follow_batch=['component_batch']
)

val_loader = DataLoader(
    dataset=val,
    batch_size=1024,
    shuffle=False,
    follow_batch=['component_batch']
)

# --- Binary Test Loader ---
test_loader_binary = DataLoader(
    dataset=test_binary,
    batch_size=1024,
    shuffle=False,
    follow_batch=['component_batch']
)

# --- Ternary Test Loader ---
test_loader_ternary = DataLoader(
    dataset=test_ternary,
    batch_size=1024,
    shuffle=False,
    follow_batch=['component_batch']
)

In [None]:
from modules.utils import ComputeMetric

evaluator_bin = ComputeMetric(model, test_loader_binary, device=device)
evaluator_ter = ComputeMetric(model, test_loader_ternary, device=device)

# --- 1. Run Binary Evaluation ---
print("\n" + "="*40)
print("       BINARY MIXTURE RESULTS")
print("="*40)
df_str_bin, df_raw_bin, rmse_bin, mae_bin, r2_bin = evaluator_bin.run_evaluation()

# --- 2. Run Ternary Evaluation ---
print("\n" + "="*40)
print("       TERNARY MIXTURE RESULTS")
print("="*40)
df_str_ter, df_raw_ter, rmse_ter, mae_ter, r2_ter = evaluator_ter.run_evaluation()

torch.cuda.empty_cache()

In [None]:
print("\n--- Raw Format (For Plotting) ---")
print(df_raw_bin.head())

import seaborn as sns
import matplotlib.pyplot as plt

# Example: Visualize Parity Plot immediately
sns.scatterplot(data=df_raw_bin, x='ln_gamma_exp', y='ln_gamma_pred', alpha=0.4)
plt.plot([-1000, 1000], [-1000, 1000], 'r--')
plt.xlim(min(df_raw_bin['ln_gamma_pred']) - 10.0 , max(df_raw_bin['ln_gamma_pred']) + 10.0)
plt.ylim(min(df_raw_bin['ln_gamma_pred']) - 10.0 , max(df_raw_bin['ln_gamma_pred']) + 10.0)
plt.title(f"Binary system -> RMSE: {rmse_bin:.3f} and r2: {r2_bin:.3f}")
plt.show()

In [None]:
print("\n--- Raw Format (For Plotting) ---")
print(df_raw_ter.head())

import seaborn as sns
import matplotlib.pyplot as plt

# Example: Visualize Parity Plot immediately
sns.scatterplot(data=df_raw_ter, x='ln_gamma_exp', y='ln_gamma_pred', alpha=0.4)
plt.plot([-1000, 1000], [-1000, 1000], 'r--')
plt.xlim(min(df_raw_ter['ln_gamma_pred']) - 10.0 , max(df_raw_ter['ln_gamma_pred']) + 10.0)
plt.ylim(min(df_raw_ter['ln_gamma_pred']) - 10.0 , max(df_raw_ter['ln_gamma_pred']) + 10.0)
plt.title(f"Ternary system -> RMSE: {rmse_ter:.3f} and r2: {r2_ter:.3f}")
plt.show()

# Infinite Dilution Activity Coefficient

In [None]:
from modules.data_pipeline import DataPipeline
pipeline = DataPipeline(components_csv='datasets/components.csv')
_ , benchmark_list = pipeline.run_pipeline(raw_csv='datasets/binary_only/aci_binary_compare.csv')
benchmark_loader = DataLoader(
    dataset=benchmark_list,
    batch_size=len(benchmark_list),
    shuffle=False,
    follow_batch=['component_batch']
)

In [None]:
from modules.utils import ComputeMetric
evaluator_aci = ComputeMetric(model, benchmark_loader, device=device)

# --- 1. Run Binary Evaluation ---
print("\n" + "="*40)
print("INFINITE DILUTION RESULTS")
print("="*40)
df_str_aci, df_raw_aci, rmse_aci, mae_aci, r2_aci = evaluator_aci.run_evaluation()

torch.cuda.empty_cache()

In [None]:
print("\n--- Raw Format (For Plotting) ---")
print(df_raw_aci.head())

import seaborn as sns
import matplotlib.pyplot as plt

# Example: Visualize Parity Plot immediately
sns.scatterplot(data=df_raw_aci, x='ln_gamma_exp', y='ln_gamma_pred', alpha=0.4)
plt.plot([-1000, 1000], [-1000, 1000], 'r--')
plt.xlim(min(df_raw_aci['ln_gamma_pred']) - 10.0 , max(df_raw_aci['ln_gamma_pred']) + 10.0)
plt.ylim(min(df_raw_aci['ln_gamma_pred']) - 10.0 , max(df_raw_aci['ln_gamma_pred']) + 10.0)
plt.title(f"Infinite dilution -> RMSE: {rmse_aci:.3f} and r2: {r2_aci:.3f}")
plt.show()