In [1]:

import sys
sys.path.append("..")
from config import *
from scripts.compare_func_8points_mixed import *
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


# Uncertainty Sampling

In [2]:
# List of datasets from config
datasets = [
    DATASET_10272_Ag_Au_Pd_RT,
    DATASET_10275_Ag_Au_Pd_Pt_Rh_RT,
    DATASET_10304_Au_Pd_Pt_Rh_RT,
    DATASET_10311_Au_Pd_Pt_Rh_Ru_RT,
    DATASET_10403_Ag_Au_Cu_Pd_Pt_RT, 
    DATASET_10402_Ag_Au_Pd_Pt_RT,
    DATASET_10399_Au_Cu_Pd_Pt_RT,
    DATASET_10374_Ir_Pd_Pt_Rh_Ru
]

In [3]:
material_ids = ["10374", "10399", "10402", "10403", "10311", "10304", "10275", "10272"]

base_root = UNCERTAINTY_PATH_8
mixed_folder =UNCERTAINTY_PATH

base_strategies = [
    "Centroids_saturation_high", "Random", "LHS", 
    "K-Means", "Farthest", "K-Center", "ODAL", 
    "Centroids_saturation_medium", "Centroids_saturation_low", "Top5Similarity"
]


# Folder where results will be saved
os.makedirs(base_mixed_Uncertainty, exist_ok=True)


df_comparison, overall_improvement_Similarity = compare_all_top5similarity_mix_with_top8(
    material_ids=material_ids,
    base_root=base_root,
    mixed_folder=mixed_folder
)
df_comparison.head()

Overall Improvement Rate: 58.82% (40/68)


Unnamed: 0,MaterialLibrary,MixedStrategy,MixedStoppingIteration,Top8StoppingIteration,ComparisonResult,ImprovementPercentage
0,10374,Top5Similarity+Max Comp,100,81,Worse,-19.0
1,10374,Top5Similarity+Min Comp,52,81,Improved,29.0
2,10374,Top5Similarity+Centroids_saturation_high,77,81,Improved,4.0
3,10374,Top5Similarity+Centroids_saturation_medium,47,81,Improved,34.0
4,10374,Top5Similarity+Centroids_saturation_low,49,81,Improved,32.0


In [4]:

summary = []
# Loop through each mixing strategy
for strategy in base_strategies:
    output_file_path = os.path.join(base_mixed_Uncertainty, f"{strategy}_mixed_vs_others.csv")

    df_comparison, overall_improvement, _ = compare_fixed_base_mixed_strategies(
            material_ids=material_ids,
            base_root=base_root,
            mixed_folder=mixed_folder,
            base_strategies=base_strategies,
            mixing_strategy=strategy,
            output_file=output_file_path
        )
    summary.append({
        "MixingStrategy": strategy,
        "OverallImprovementPercent": overall_improvement
    })

# Save summary of improvements

summary_df = pd.DataFrame(summary)

summary_df.loc[summary_df["MixingStrategy"] == "Top5Similarity", "OverallImprovementPercent"] = overall_improvement_Similarity

summary_df.to_csv(os.path.join(base_mixed_Uncertainty, "overall_summary.csv"), index=False)

save_path = os.path.join(base_mixed_Uncertainty, "Top5Similarity_mixed_vs_other.csv")

# Save DataFrame
df_comparison.to_csv(save_path, index=False)


Overall Improvement Rate for Centroids_saturation_high: 37.78% (17/45)

Overall Improvement Rate for Random: 46.67% (21/45)

Overall Improvement Rate for LHS: 43.75% (21/48)

Overall Improvement Rate for K-Means: 66.0% (33/50)

Overall Improvement Rate for Farthest: 61.54% (32/52)

Overall Improvement Rate for K-Center: 60.78% (31/51)

Overall Improvement Rate for ODAL: 67.31% (35/52)

Overall Improvement Rate for Centroids_saturation_medium: 35.71% (15/42)

Overall Improvement Rate for Centroids_saturation_low: 38.1% (16/42)

Overall Improvement Rate for Top5Similarity: 56.6% (30/53)


In [5]:
merge_all_summaries(input_folder=base_mixed_Uncertainty)

Unnamed: 0,CanonicalMixedStrategy,MaxImprovedCount
2,Centroids_saturation_high+Farthest,6
25,Farthest+K-Means,6
4,Centroids_saturation_high+K-Means,6
17,Centroids_saturation_medium+Farthest,6
39,LHS+ODAL,6
38,K-Means+Top5Similarity,5
11,Centroids_saturation_low+K-Center,5
35,K-Means+LHS,5
21,Centroids_saturation_medium+ODAL,5
18,Centroids_saturation_medium+K-Center,5


# SWEI 

In [6]:
base_root = SAWEI_PATH_8
mixed_folder =SAWEI_PATH
# Folder where results will be saved
os.makedirs(base_mixed_SAWEI, exist_ok=True)


df_comparison, overall_improvement_Similarity = compare_all_top5similarity_mix_with_top8(
    material_ids=material_ids,
    base_root=base_root,
    mixed_folder=mixed_folder
)
df_comparison.head()

Overall Improvement Rate: 61.19% (41/67)


Unnamed: 0,MaterialLibrary,MixedStrategy,MixedStoppingIteration,Top8StoppingIteration,ComparisonResult,ImprovementPercentage
0,10374,Top5Similarity+Max Comp,100,81,Worse,-19.0
1,10374,Top5Similarity+Min Comp,52,81,Improved,29.0
2,10374,Top5Similarity+Centroids_saturation_high,77,81,Improved,4.0
3,10374,Top5Similarity+Centroids_saturation_medium,47,81,Improved,34.0
4,10374,Top5Similarity+Centroids_saturation_low,49,81,Improved,32.0


In [7]:
summary = []

# Loop through each mixing strategy
for strategy in base_strategies:
    output_file_path = os.path.join(base_mixed_SAWEI, f"{strategy}_mixed_vs_others.csv")

    df_comparison, overall_improvement, _ = compare_fixed_base_mixed_strategies(
            material_ids=material_ids,
            base_root=base_root,
            mixed_folder=mixed_folder,
            base_strategies=base_strategies,
            mixing_strategy=strategy,
            output_file=output_file_path
        )
    summary.append({
        "MixingStrategy": strategy,
        "OverallImprovementPercent": overall_improvement
    })

# Save summary of improvements

summary_df = pd.DataFrame(summary)

summary_df.loc[summary_df["MixingStrategy"] == "Top5Similarity", "OverallImprovementPercent"] = overall_improvement_Similarity

summary_df.to_csv(os.path.join(base_mixed_SAWEI, "overall_summary.csv"), index=False)

save_path = os.path.join(base_mixed_SAWEI, "Top5Similarity_mixed_vs_other.csv")

# Save DataFrame
df_comparison.to_csv(save_path, index=False)


Overall Improvement Rate for Centroids_saturation_high: 37.21% (16/43)

Overall Improvement Rate for Random: 61.36% (27/44)

Overall Improvement Rate for LHS: 41.46% (17/41)

Overall Improvement Rate for K-Means: 56.25% (27/48)

Overall Improvement Rate for Farthest: 64.15% (34/53)

Overall Improvement Rate for K-Center: 64.0% (32/50)

Overall Improvement Rate for ODAL: 67.35% (33/49)

Overall Improvement Rate for Centroids_saturation_medium: 40.48% (17/42)

Overall Improvement Rate for Centroids_saturation_low: 38.46% (15/39)

Overall Improvement Rate for Top5Similarity: 58.18% (32/55)


In [8]:
merge_all_summaries(input_folder=base_mixed_SAWEI)

Unnamed: 0,CanonicalMixedStrategy,MaxImprovedCount
10,Centroids_saturation_low+Farthest,6
17,Centroids_saturation_medium+Farthest,6
4,Centroids_saturation_high+K-Means,6
26,Farthest+LHS,6
21,Centroids_saturation_medium+ODAL,5
30,K-Center+K-Means,5
14,Centroids_saturation_low+ODAL,5
38,K-Means+Top5Similarity,5
11,Centroids_saturation_low+K-Center,5
22,Centroids_saturation_medium+Random,5
