# Automated IQM Evaluation of Stream Files


In [None]:
from automate_evaluation import automate_evaluation

stream_file_folder = "/home/bubl3932/files/UOX1/UOX1_original_IQM_v3"   # Folder with stream file results from indexamajig. 
                                                                        # Note that all stream files in the folder will be processed.

weights_list = [
    (1, 1, 1, 1, 1, 1)
]

"""
Each weight corresponds to one of the six metrics used in calculating the combined IQM value.
The combined IQM is computed by first normalizing each metric across all stream files, then 
multiplying each normalized metric by its assigned weight, and finally summing the results.
The order (or keys) of the weights must match the following metrics:

- 'weighted_rmsd'
- 'fraction_outliers'
- 'length_deviation'
- 'angle_deviation'
- 'peak_ratio'
- 'percentage_indexed'

Multiple weight combinations can be specified if needed.
"""

automate_evaluation(stream_file_folder, weights_list)


# Plot Histogram of IQM Values with Normal Distribution and an Eventual Cut-off

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm  # optional, if you want to plot the fitted PDF

# Load your data defined in previous cell
weights_string = "_".join(map(str, weights_list[0]))
csv_path = f"{stream_file_folder}/metric_values_IQM_{weights_string}.csv"
df = pd.read_csv(csv_path)

# Group by event_number and get the minimum combined_metric for each event.
grouped_series = df.groupby("event_number")["combined_metric"].min()

# Compute statistics of the combined_metric
mean_metric = grouped_series.mean()
std_metric = grouped_series.std()
std_multiplier = 1.0  # you can change this multiplier to adjust the cutoff threshold

# Define cutoff as mean + (std_multiplier * standard deviation)
cutoff = mean_metric + std_multiplier * std_metric
print(f"Mean combined_metric: {mean_metric:.4f}")
print(f"Standard deviation: {std_metric:.4f}")
print(f"Cutoff threshold (mean + {std_multiplier} std): {cutoff:.4f}")

# Identify events with a combined_metric above the cutoff threshold.
cutoff_series = grouped_series[grouped_series > cutoff]
cutoff_events = cutoff_series.index.tolist()
cutoff_metrics = cutoff_series.values.tolist()

cutoff_number = len(cutoff_events)

print(f"Event numbers ({cutoff_number}) and combined metric values above the cutoff threshold:")
for event, metric in zip(cutoff_events, cutoff_metrics):
    print(f"Event {event}: Combined Metric = {metric:.4f}")

# Optionally, write out the events that exceed the cutoff threshold to a CSV file.
output_df = pd.DataFrame({
    "event_number": cutoff_events,
    "combined_metric": cutoff_metrics
})
output_csv_path = f"{stream_file_folder}/cutoff_events_with_metric_values_IQM_{weights_string}.csv"
output_df.to_csv(output_csv_path, index=False)
print(f"\nCut-off Event and their metrics have been written to: {output_csv_path}")

# Plot the histogram of the data with the cutoff threshold indicated.
plt.figure(figsize=(10, 6))
plt.hist(grouped_series.values, bins=100, edgecolor='black', alpha=0.6, label='Data')
plt.axvline(cutoff, color='red', linestyle='dashed', linewidth=2, label=f'Cutoff = {cutoff:.2f}')

# Optionally, overlay the fitted normal distribution curve.
x_values = np.linspace(grouped_series.min(), grouped_series.max(), 1000)
pdf_values = norm.pdf(x_values, loc=mean_metric, scale=std_metric)
# Scale the PDF to match the histogram
bin_width = (grouped_series.max() - grouped_series.min()) / 100.0
pdf_values_scaled = pdf_values * len(grouped_series) * bin_width
plt.plot(x_values, pdf_values_scaled, color='green', linewidth=2, label='Normal Distribution Fit')

plt.title("Histogram with Normal Distribution Cutoff")
plt.xlabel("Combined Metric")
plt.ylabel("Frequency")
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


# Running Partialator for Stream File Merging (Merge the Merged Stream File :D)

In [7]:
from run_partialator_and_convert import run_partialator_and_convert

stream_file = "/Users/xiaodong/Desktop/simulations/LTA/simulation-29/LTAsim_from_file_-512.5_-512.5.stream" # Stream file to be merged
pointgroup = "m-3m"  # Point group of the crystal
num_threads = 8
iterations = 5

output_dir = run_partialator_and_convert(
    stream_file,
    pointgroup=pointgroup,
    num_threads=num_threads,
    iterations=iterations,
)

if output_dir is not None:
    print("All done. Results are in:", output_dir)


Running partialator for stream file: /Users/xiaodong/Desktop/simulations/LTA/simulation-29/LTAsim_from_file_-512.5_-512.5.stream


Partialator Progress: 100%|██████████| 7/7 [00:25<00:00,  3.59s/Residuals]

Partialator completed for stream file: /Users/xiaodong/Desktop/simulations/LTA/simulation-29/LTAsim_from_file_-512.5_-512.5.stream
All done. Results are in: /Users/xiaodong/Desktop/simulations/LTA/simulation-29/LTAsim_from_file_-512.5_-512.5_merge_5_iter



