In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
from tqdm import tqdm
from functions import shift_atoms, transform_list_hkl_p63, get_structure_factor


# Get the number of logical CPU cores available
num_threads = 8  # Automatically use all available CPU cores

# Configure TensorFlow to use multiple threads
tf.config.threading.set_intra_op_parallelism_threads(num_threads)
tf.config.threading.set_inter_op_parallelism_threads(num_threads)

print(f"Using {num_threads} threads for intra-op and inter-op parallelism.")




Using 8 threads for intra-op and inter-op parallelism.


In [3]:
def fun_tf(x, pars):
    """
    Function depending on parameters to be fitted.

    Parameters
    ----------
    x : Tensor
        3D Tensor of shape [N, 3] representing the hkl vectors
    pars : List or Tensor
        1D Tensor with six elements representing the parameters
    """
    modified_struct  = shift_atoms(*pars)
    struct_self = tf.map_fn(
    lambda hkl: get_structure_factor(hkl, modified_struct), 
    x, 
    fn_output_signature=tf.complex64  
    )
    
    # Compute intensity
    intensity = tf.abs(struct_self) ** 2
    # Normalize the intensity to the maximum value
    return intensity / tf.reduce_max(intensity)

In [4]:
# Define your data pipeline with tf.data
def preprocess_data(features, labels):
    # Perform any necessary preprocessing steps here
    return features, labels

In [5]:
experimental_data = pd.read_csv('/home/mariolb/repos/CrystalClearFit/DistortionFit/new_data.csv')

hkl_list = experimental_data[["h", "k", "l"]].values.tolist()
hkl_list_trans = transform_list_hkl_p63(hkl_list)

features = hkl_list_trans
labels = tf.convert_to_tensor(experimental_data["intensity_exp"].tolist(), dtype=tf.float32)

n_features = experimental_data.shape[0]
n_dim = 3

max_par_value = 0.1

In [6]:
class FunAsLayer(tf.keras.layers.Layer):
    def __init__(self, max_par_value=0.1, **kwargs):
        super().__init__(**kwargs)
        self.max_par_value = max_par_value

    def build(self, input_shape):
        # Define six trainable parameters
        self.a = self.add_weight(name='a', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        self.b = self.add_weight(name='b', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        self.c = self.add_weight(name='c', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        self.d = self.add_weight(name='d', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        self.e = self.add_weight(name='e', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        self.f = self.add_weight(name='f', shape=(), initializer=tf.keras.initializers.he_uniform(), trainable=True)
        super().build(input_shape)

    def call(self, inputs):
        # Apply tanh to ensure parameters stay within the [-0.1, 0.1] range
        a = self.max_par_value * tf.tanh(self.a)
        b = self.max_par_value * tf.tanh(self.b)
        c = self.max_par_value * tf.tanh(self.c)
        d = self.max_par_value * tf.tanh(self.d)
        e = self.max_par_value * tf.tanh(self.e)
        f = self.max_par_value * tf.tanh(self.f)
        
        # Call your fun_tf function to get the output
        return fun_tf(inputs, [a, b, c, d, e, f])

    def compute_output_shape(self, input_shape):
        # If the input has shape (None, 3), return the same shape
        return (input_shape[0], 1)  # This assumes your output is of shape (None, 1)


In [7]:
# Define the custom loss function
class RFactorLoss(tf.keras.losses.Loss):
    def call(self, y_true, y_pred):
        return tf.reduce_sum(tf.abs(y_true - y_pred)) / tf.reduce_sum(tf.abs(y_true))

# Define the custom metric function
def r_factor_metric(y_true, y_pred):
    return tf.reduce_sum(tf.abs(y_true - y_pred)) / tf.reduce_sum(tf.abs(y_true))

# Instantiate the Adam optimizer
optim = tf.keras.optimizers.Adam(learning_rate=1e-3)


In [9]:
# Create a TensorFlow Dataset with parallel loading
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
min_loss = 1e10
# Optimize data pipeline by parallelizing data loading
dataset = dataset.map(preprocess_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(64)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Prefetch to optimize I/O
# Create the model
inputs = tf.keras.Input(shape=(n_dim,))
outputs = FunAsLayer(max_par_value)(inputs)
model = tf.keras.Model(inputs, outputs)

# Compile the model with the custom loss function and metric
model.compile(
    optimizer=optim,
    loss=RFactorLoss(),
    metrics=[r_factor_metric]
)

# Store loss values for this iteration
iteration_losses = []

# Start training loop
n_iter = 5  # Example: running for 5 iterations
for i in range(n_iter):
    histories = []
    # Use tqdm to create a custom progress bar for each epoch
    with tqdm(total=500, desc=f"Iteration {i+1}") as pbar:
        for epoch in range(500):
            # Train the model for one step (epoch)
            history = model.fit(
                x=features,  
                y=labels,    
                batch_size=64,
                epochs=1,  # Train for one epoch at a time
                verbose=0  # No output during training
            )
            epoch_loss = history.history['loss'][-1]
            histories.append(epoch_loss)
            pbar.update(1)  # Update progress bar
            pbar.set_postfix(loss=epoch_loss)  # Optionally display loss
    
    # Store the loss values for this iteration
    iteration_losses.append(histories)
    # Check final loss
    final_loss = iteration_losses[-1]

    if final_loss < min_loss:
        # Update best model parameters
        best_model_pars = [max_par_value * tf.sigmoid(model.layers[-1].get_weights()[i]) for i in range(6)]
        min_loss = final_loss
        rf = r_factor_metric(labels, fun_tf(features, best_model_pars))
        print(f"Iteration {i+1} - New best loss: {min_loss:.2e} (R-factor: {rf:.2e})")
    

ValueError: Optimizer (<keras.optimizers.optimizer_v2.adam.Adam object at 0x000001CB5731E3B0>) passed to `model.compile` was created inside a different distribution strategy scope than the model. All optimizers must be created in the same distribution strategy scope as the model (in this case <tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy object at 0x000001CB5731F250>). If you pass in a string identifier for an optimizer to compile, the optimizer will automatically be created in the correct distribution strategy scope.

In [None]:
# Plotting the loss values for all iterations
plt.figure(figsize=(10, 6))

# Plot loss values for each iteration
for i, loss_values in enumerate(all_losses):
    plt.plot(loss_values, label=f'Iteration {i+1}')

plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Print the best parameters
print("Best parameters found:")
print(best_pars)