# Desarrollo del script de ejecución por el usuario

## Procesamiento datos

In [None]:
from LIACEI_workflow.data.DinamicaMolecular import DinamicaMolecular
from LIACEI_workflow.data.DinamicaMolecular import ElementoQuimico
from LIACEI_workflow.utils.utils import crear_carpeta
from LIACEI_workflow.codes.torchmd_net.utils import guardar_a_TorchMDNet
from LIACEI_workflow.codes.torchmd_net.utils import obtener_ruta_modelo_entrenado
from LIACEI_workflow.codes.torchmd_net.utils import cargar_desde_TorchMDNet
from LIACEI_workflow.codes.torchmd_net.utils import guardar_metricas_en_data
from LIACEI_workflow.codes.torchmd_net.utils import obtener_indices_test
from LIACEI_workflow.data.DinamicaMolecular import DinamicaMolecular
from LIACEI_workflow.codes.torchmd_net.utils import cargar_desde_TorchMDNet
from LIACEI_workflow.codes.torchmd_net.utils import obtener_indices_test
from LIACEI_workflow.data.DinamicaMolecular import DinamicaMolecular
from LIACEI_workflow.codes.torchmd_net.utils import generar_inferencias
from LIACEI_workflow.data.DinamicaMolecular import ElementoQuimico
from LIACEI_workflow.data.DinamicaMolecular import Vector
from LIACEI_workflow.data.DinamicaMolecular import ElementoQuimico
from LIACEI_workflow.data.DinamicaMolecular import Vector
from LIACEI_workflow.data.DinamicaMolecular import Fuerza
from LIACEI_workflow.data.DinamicaMolecular import ElementoQuimico
from LIACEI_workflow.data.DinamicaMolecular import Vector
from LIACEI_workflow.data.DinamicaMolecular import ElementoQuimico
from LIACEI_workflow.data.DinamicaMolecular import Vector
import numpy as np

In [None]:
# cargar y convertir datos .data
base_de_datos=DinamicaMolecular.cargar_desde_data("input/input.data")

In [None]:
for frame in base_de_datos.frames.values():
    # Calcular la suma de las energías atómicas de los elementos
    energia_total = sum(ElementoQuimico(e).energia_atomica() for e in frame.elementos)
    
    # Restar la energía atómica total de la energía actual del frame
    frame.energia -= energia_total  # Actualizar directamente el atributo

In [None]:
crear_carpeta("transform")

In [None]:
guardar_a_TorchMDNet(base_de_datos, "transform/input_torchmd-net.h5")

```bash
torchmd-train --conf input/config.yaml
```

## Generar inferencias

In [None]:
ruta_modelo = obtener_ruta_modelo_entrenado("train")

In [None]:
base_de_datos = cargar_desde_TorchMDNet("transform/input_torchmd-net.h5")

In [None]:
test_idx = obtener_indices_test("train/splits.npz")

In [None]:
subconjunto_test = DinamicaMolecular.subconjunto(base_de_datos, test_idx)

In [None]:

inferencias = generar_inferencias(subconjunto_test,ruta_modelo, gpu='0' )

In [None]:
# Guardar Inferencias
DinamicaMolecular.guardar_a_hdf5(inferencias,"train/output_LIACEI.h5")

## Generación datos para graficas

In [None]:
# Cargar referencias
referencias = cargar_desde_TorchMDNet("transform/input_torchmd-net.h5")
test_idx = obtener_indices_test("train/splits.npz")
referencias = DinamicaMolecular.subconjunto(referencias, test_idx)

In [None]:
# Cargar inferencias
inferencias = DinamicaMolecular.cargar_desde_hdf5("train/output_LIACEI.h5")

In [None]:
guardar_metricas_en_data("train/metrics.csv","analysis/epoch_vs_loss.data")

In [None]:
# Verificar que ambos tienen la misma cantidad de frames
if len(referencias.frames) != len(inferencias.frames):
    raise ValueError("❌ Error: Los arreglos tienen diferente número de frames.")

# Recorrer los frames comparando los arrays directamente
for index_frame in referencias.frames.keys():
    elementos_referencias = referencias.frames[index_frame].elementos
    elementos_inferencias = inferencias.frames[index_frame].elementos

    # Comparar los arrays directamente (manteniendo el orden)
    if elementos_referencias != elementos_inferencias:
        raise ValueError(f"❌ Error: Diferencia detectada en el frame {index_frame}.")

In [None]:
# Crear conjuntos vacíos para almacenar elementos únicos
elementos_referencias = set()
elementos_inferencias = set()

# Recorrer todos los frames en referencias
for frame in referencias.frames.values():
    elementos_referencias.update(frame.elementos)

# Recorrer todos los frames en inferencias
for frame in inferencias.frames.values():
    elementos_inferencias.update(frame.elementos)

# Obtener la lista de elementos únicos combinados
elementos_unicos = list(elementos_referencias | elementos_inferencias)

In [None]:
for elemento in elementos_unicos:
    with open(f"analysis/forces_angles_{ElementoQuimico(elemento).simbolo_str}.data","a") as archivo:
        archivo.write("theta(grades)"+"\n")

# Diccionario para almacenar datos antes de escribir en archivos
datos_por_elemento = {}

# Iterar sobre cada frame en inferencias
for key, frame_inferencia in inferencias.frames.items():
    frame_referencia = referencias.frames[key]  # Acceder al frame de referencia

    # Iterar sobre los átomos del frame
    for index in range(len(frame_inferencia.atomos)):
        elemento = ElementoQuimico(frame_inferencia.atomos[index]['numero_atomico']).simbolo_str
        f_inferencia = frame_inferencia.atomos[index]['fuerza']
        f_referencia = frame_referencia.atomos[index]['fuerza']
        angulo = Vector.angulo(f_inferencia, f_referencia)

        # Almacenar en un diccionario por tipo de elemento
        if elemento not in datos_por_elemento:
            datos_por_elemento[elemento] = []
        datos_por_elemento[elemento].append(f"{angulo}\n")

# Escribir todos los datos en archivos con un buffer de 1MB para evitar bloqueos de I/O
for elemento, datos in datos_por_elemento.items():
    with open(f"analysis/forces_angles_{elemento}.data", "a", buffering=1024*1024) as archivo:
        archivo.writelines(datos)  # Escribe toda la lista de una sola vez

In [None]:
for elemento in elementos_unicos:
    elemento = ElementoQuimico(elemento).simbolo_str
    with open(f"analysis/forces_modules_{elemento}.data","a") as archivo:
        archivo.write("f_reference(eV/A)"+"\t"+"f_inference(eV/A)"+"\n")

# Diccionario para almacenar los datos antes de escribir en archivos
datos_por_elemento = {}

# Iterar sobre cada frame en inferencias
for key, frame_inferencia in inferencias.frames.items():
    frame_referencia = referencias.frames[key]  # Acceder al frame de referencia

    # Iterar sobre los átomos del frame
    for index, atomo in enumerate(frame_inferencia.atomos):
        elemento = ElementoQuimico(atomo['numero_atomico']).simbolo_str

        # Extraer fuerzas de inferencia y referencia
        fx, fy, fz = atomo['fuerza']
        f_inferencia_magnitud = Fuerza.magnitud(Fuerza(fx, fy, fz))

        fx, fy, fz = frame_referencia.atomos[index]['fuerza']
        f_referencia_magnitud = Fuerza.magnitud(Fuerza(fx, fy, fz))

        # Almacenar en un diccionario por tipo de elemento
        if elemento not in datos_por_elemento:
            datos_por_elemento[elemento] = []
        datos_por_elemento[elemento].append(f"{f_referencia_magnitud}\t{f_inferencia_magnitud}\n")

# Escribir todos los datos en archivos de manera eficiente
for elemento, datos in datos_por_elemento.items():
    with open(f"analysis/forces_modules_{elemento}.data", "a", buffering=1024*1024) as archivo:
        archivo.writelines(datos)  # Escribe toda la lista en una sola operación


In [None]:
# Lista para acumular los datos antes de escribir en el archivo
datos = []

# Iterar sobre cada frame en inferencias
for key, frame_inferencia in inferencias.frames.items():
    frame_referencia = referencias.frames[key]  # Acceder al frame de referencia

    energia_referencia = frame_referencia.energia
    energia_inferencia = frame_inferencia.energia

    # Almacenar los datos en la lista
    datos.append(f"{energia_referencia}\t{energia_inferencia}\n")

# Escribir todos los datos en el archivo de manera eficiente
with open("analysis/refence_energy_vs_inference_energy.data", "a", buffering=1024*1024) as archivo:
    archivo.writelines(datos)  # Escribe toda la lista en una sola operación

In [None]:
with open("analysis/refence_energy_vs_inference_energy.data","a") as archivo:
    archivo.write("refernce_energy(eV)"+"\t"+"inference_energy(eV)"+"\n")

# Lista para acumular los datos antes de escribir en el archivo
datos = []

# Iterar sobre cada frame en inferencias
for key, frame_inferencia in inferencias.frames.items():
    frame_referencia = referencias.frames[key]  # Acceder al frame de referencia

    energia_referencia = frame_referencia.energia
    energia_inferencia = frame_inferencia.energia
    diferencia = np.abs(energia_referencia - energia_inferencia)

    # Almacenar los datos en la lista
    datos.append(f"{energia_referencia}\t{energia_inferencia}\t{diferencia}\n")

# Escribir todos los datos en el archivo de manera eficiente
with open("analysis/diference_refence_energy_and_inference_energy.data", "a", buffering=1024*1024) as archivo:
    archivo.writelines(datos)  # Escribe toda la lista en una sola operación

## Graficas

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_epoch_vs_loss(
    path_data_file="analysis/epoch_vs_loss.data", 
    output_path_tif="analysis/epoch_vs_loss.tiff",
    xlim=(0, 500), 
    ylim=(0, 3)
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', comment='#', header=None, 
        names=['Epoch', 'MAE_Train', 'MAE_Val'], engine='python'
    )
    
    if datos.shape[1] != 3:
        print("Error: El archivo no tiene 3 columnas (Epoch, MAE_Train, MAE_Val).")
        return
    
    datos[['MAE_Train', 'MAE_Val']] *= 1000.0 / 222
    
    plt.figure(figsize=(6, 4))
    plt.plot(datos['Epoch'], datos['MAE_Train'], label='MAE_Train')
    plt.plot(datos['Epoch'], datos['MAE_Val'], label='MAE_Val')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error (meV)')
    plt.title('Epoch vs Mean Squared Error')
    plt.legend()
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.grid(True)
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_epoch_vs_loss()

In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_energy(
    path_data_file="analysis/refence_energy_vs_inference_energy.data",
    output_path_tif="analysis/ref_vs_inf_energy.tiff",
    xlim=None,
    ylim=None
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference Energy (eV)', 'Inference Energy (eV)']
    
    if xlim is None or ylim is None:
        xy_min = min(datos['Reference Energy (eV)'].min(), datos['Inference Energy (eV)'].min())
        xy_max = max(datos['Reference Energy (eV)'].max(), datos['Inference Energy (eV)'].max())
        xlim = xlim or (xy_min, xy_max)
        ylim = ylim or (xy_min, xy_max)
    
    plt.figure(figsize=(6, 6))
    plt.scatter(datos['Reference Energy (eV)'], datos['Inference Energy (eV)'])
    plt.xlabel('Reference Energy (eV)')
    plt.ylabel('Inference Energy (eV)')
    plt.title('Reference Energy vs Inference Energy')
    plt.grid(True)
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_energy()



In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_energy_hist_2d(
    path_data_file="analysis/refence_energy_vs_inference_energy.data",
    output_path_tif="analysis/ref_vs_inf_energy_hist_2d.tiff",
    scale_factor=222,
    bins=50,
    xlim=None,
    ylim=None
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference Energy (eV)', 'Inference Energy (eV)']
    
    datos['Reference Energy (eV)'] /= scale_factor
    datos['Inference Energy (eV)'] /= scale_factor
    
    if xlim is None or ylim is None:
        xy_min = min(datos['Reference Energy (eV)'].min(), datos['Inference Energy (eV)'].min())
        xy_max = max(datos['Reference Energy (eV)'].max(), datos['Inference Energy (eV)'].max())
        xlim = xlim or (xy_min, xy_max)
        ylim = ylim or (xy_min, xy_max)
    
    hist, xedges, yedges = np.histogram2d(
        datos['Reference Energy (eV)'], datos['Inference Energy (eV)'],
        bins=bins, range=[xlim, ylim], density=True
    )
    
    hist_filtered = np.where(hist != 0, hist, np.nan)
    
    plt.figure(figsize=(6, 6))
    plt.imshow(hist_filtered.T, extent=[xlim[0], xlim[1], ylim[0], ylim[1]],
               cmap='Blues', origin='lower', aspect='auto')
    plt.colorbar(label='Density')
    plt.xlabel('Reference Energy (eV)')
    plt.ylabel('Inference Energy (eV)')
    plt.title('Reference Energy vs Inferred Energy (Density)')
    plt.grid(True)
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_energy_hist_2d()

In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits.mplot3d import Axes3D

def grafica_ref_vs_inf_energy_hist_3d(
    path_data_file="analysis/refence_energy_vs_inference_energy.data",
    output_path_tif="analysis/ref_vs_inf_energy_hist_3d.tiff",
    scale_factor=222,
    bins=50
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference Energy (eV)', 'Inference Energy (eV)']
    
    datos['Reference Energy (eV)'] /= scale_factor
    datos['Inference Energy (eV)'] /= scale_factor
    
    xy_min = min(datos['Reference Energy (eV)'].min(), datos['Inference Energy (eV)'].min())
    xy_max = max(datos['Reference Energy (eV)'].max(), datos['Inference Energy (eV)'].max())
    
    hist, xedges, yedges = np.histogram2d(
        datos['Reference Energy (eV)'], datos['Inference Energy (eV)'],
        bins=bins, range=[[xy_min, xy_max], [xy_min, xy_max]], density=True
    )
    
    non_zero_indices = hist != 0
    xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
    xpos = xpos[non_zero_indices]
    ypos = ypos[non_zero_indices]
    dz = hist[non_zero_indices]
    
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    
    ax.plot_trisurf(xpos.ravel(), ypos.ravel(), dz.ravel(), cmap='Blues')
    
    ax.set_xlim(xy_min, xy_max)
    ax.set_ylim(xy_min, xy_max)
    
    ax.set_xlabel('DFT Energy (eV)')
    ax.set_ylabel('TorchMD-Net Energy (eV)')
    ax.set_zlabel('Density')
    ax.set_title('Reference Energy vs Inferred Energy (Density Surface)')
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_energy_hist_3d()

In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_energy_hist(
    path_data_file="analysis/refence_energy_vs_inference_energy.data",
    output_path_tif="analysis/ref_vs_inf_energy_hist.tiff",
    scale_factor=222,
    bins=50
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference Energy (eV)', 'Inference Energy (eV)']
    
    datos['Reference Energy (eV)'] /= scale_factor
    datos['Inference Energy (eV)'] /= scale_factor
    
    plt.figure(figsize=(8, 6))
    plt.hist(datos['Reference Energy (eV)'], bins=bins, edgecolor='red', alpha=0.5, label='Reference', align='mid', density=True, color='red')
    plt.hist(datos['Inference Energy (eV)'], bins=bins, edgecolor='green', alpha=0.5, label='Inference', align='mid', density=True, color='green')
    
    plt.xlabel('Energy (eV)')
    plt.ylabel('Density')
    plt.title('Reference and Inference Energy Distribution')
    plt.legend()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_energy_hist()


In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_energy_abs_dif(
    path_data_file="analysis/refence_energy_vs_inference_energy.data",
    output_path_tif="analysis/ref_vs_inf_energy_abs_dif.tiff",
    scale_factor=222,
    bins=50
):
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep=r'\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference Energy (eV)', 'Inference Energy (eV)']
    
    datos['Reference Energy (eV)'] /= scale_factor
    datos['Inference Energy (eV)'] /= scale_factor
    
    abs_diff = np.abs(datos['Reference Energy (eV)'] - datos['Inference Energy (eV)'])
    
    plt.figure(figsize=(8, 6))
    plt.hist(abs_diff, bins=bins, edgecolor='black', density=True)
    plt.xlabel('Difference between the reference energy and the energy inferred (eV)')
    plt.ylabel('Density')
    plt.title('Absolute Energy Difference Distribution')
    plt.grid(True)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_energy_abs_dif()

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_angles_abs_dif_hydrogen(
    path_data_file="analysis/forces_angles_H.data",
    output_path_tif="analysis/ref_vs_inf_forces_angles_abs_dif_Hydrogen.tiff",
    bins=200,
    xlim=None,
    ylim=(0, 4.5)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, header=None, skiprows=1, delim_whitespace=True
    )
    
    hist, bin_edges = np.histogram(datos.values.flatten(), bins=bins, density=True)
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    plt.figure(figsize=(8, 6))
    plt.scatter(bin_centres, hist * 100)
    
    plt.xlabel('Angle between DFT and TorchMD-Net forces (degrees)')
    plt.ylabel('Frequency of occurrence (%)')
    plt.title('Angles Distribution (Hydrogen)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    
    plt.grid(True)
    plt.tight_layout()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_angles_abs_dif_hydrogen()


  datos = pd.read_csv(


In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_angles_abs_dif_oxygen(
    path_data_file="analysis/forces_angles_O.data",
    output_path_tif="analysis/ref_vs_inf_forces_angles_abs_dif_Oxygen.tiff",
    bins=200,
    xlim=None,
    ylim=(0, 4.5)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, header=None, skiprows=1, delim_whitespace=True
    )
    
    hist, bin_edges = np.histogram(datos.values.flatten(), bins=bins, density=True)
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    plt.figure(figsize=(8, 6))
    plt.scatter(bin_centres, hist * 100)
    
    plt.xlabel('Angle between DFT and TorchMD-Net forces (degrees)')
    plt.ylabel('Frequency of occurrence (%)')
    plt.title('Angles Distribution (Oxygen)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    
    plt.grid(True)
    plt.tight_layout()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_angles_abs_dif_oxygen()


  datos = pd.read_csv(


In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_angles_abs_dif_aluminium(
    path_data_file="analysis/forces_angles_Al.data",
    output_path_tif=None,
    bins=200,
    xlim=None,
    ylim=(0, 4)
):
    if output_path_tif is None:
        output_path_tif = "analysis/ref_vs_inf_forces_angles_abs_dif_Aluminium.tiff"
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, header=None, skiprows=1, delim_whitespace=True
    )
    
    hist, bin_edges = np.histogram(datos.values.flatten(), bins=bins, density=True)
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    plt.figure(figsize=(8, 6))
    plt.scatter(bin_centres, hist * 100)
    
    plt.xlabel('Angle between DFT and TorchMD-Net forces (degrees)')
    plt.ylabel('Frequency of occurrence (%)')
    plt.title('Angles Distribution (Aluminium)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    
    plt.grid(True)
    plt.tight_layout()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_angles_abs_dif_aluminium()

  datos = pd.read_csv(


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_hydrogen(
    path_data_file="analysis/forces_modules_H.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_hydrogen.tiff",
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.scatter(datos.iloc[:, 0], datos.iloc[:, 1], s=25)
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Hydrogen)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_hydrogen()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_hydrogen_hist_2d(
    path_data_file="analysis/forces_modules_H.data",
    output_path_tif= "analysis/ref_vs_inf_forces_modules_hydrogen_hist_2d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    hist_filtered = np.where(hist != 0, hist, np.nan)
    
    plt.figure(figsize=(8, 6))
    plt.imshow(hist_filtered.T, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], 
               cmap='Blues', origin='lower', aspect='auto')
    plt.colorbar(label='Density')
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Hydrogen)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_hydrogen_hist_2d()


In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits.mplot3d import Axes3D

def grafica_ref_vs_inf_forces_modules_hydrogen_hist_3d(
    path_data_file="analysis/forces_modules_H.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_hydrogen_hist_3d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
    xpos = xpos.ravel()
    ypos = ypos.ravel()
    dz = hist.ravel()
    
    non_zero_indices = dz != 0
    xpos = xpos[non_zero_indices]
    ypos = ypos[non_zero_indices]
    dz = dz[non_zero_indices]
    
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    
    ax.plot_trisurf(xpos, ypos, dz, cmap='Blues')
    
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    
    ax.set_xlabel('DFT modules (eV/$\AA$)')
    ax.set_ylabel('TorchMD-net modules (eV/$\AA$)')
    ax.set_zlabel('Density')
    ax.set_title('Reference modules vs Inferred modules (Hydrogen) (Surface)')
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_hydrogen_hist_3d()


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def graficar_ref_vs_inf_forces_modules_hydrogen_hist(
    path_data_file="analysis/forces_modules_H.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_hydrogen_hist.tiff",
    bins=100,
    xlim=None,
    ylim=None
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente dos columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.hist(datos['Reference modules (eV/$\AA$)'], bins=bins, edgecolor='red', alpha=0.5, label='Reference', align='mid', color='red', density=True)
    plt.hist(datos['Inferred modules (eV/$\AA$)'], bins=bins, edgecolor='green', alpha=0.5, label='Inference', align='mid', color='green', density=True)
    
    plt.xlabel('Modules (eV/$\AA$)')
    plt.ylabel('Density')
    plt.title('Distribution of the module of reference and inferred forces (Hydrogen)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)

    plt.legend()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

graficar_ref_vs_inf_forces_modules_hydrogen_hist()


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_oxygen(
    path_data_file="analysis/forces_modules_O.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_oxygen.tiff",
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.scatter(datos.iloc[:, 0], datos.iloc[:, 1], s=25)
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Oxygen)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_oxygen()


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_oxygen_hist_2d(
    path_data_file="analysis/forces_modules_O.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_oxygen_hist_2d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    hist_filtered = np.where(hist != 0, hist, np.nan)
    
    plt.figure(figsize=(8, 6))
    plt.imshow(hist_filtered.T, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], 
               cmap='Blues', origin='lower', aspect='auto')
    plt.colorbar(label='Density')
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Oxygen)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_oxygen_hist_2d()

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits.mplot3d import Axes3D

def grafica_ref_vs_inf_forces_modules_oxygen_hist_3d(
    path_data_file="analysis/forces_modules_O.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_oxygen_hist_3d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
    xpos = xpos.ravel()
    ypos = ypos.ravel()
    dz = hist.ravel()
    
    non_zero_indices = dz != 0
    xpos = xpos[non_zero_indices]
    ypos = ypos[non_zero_indices]
    dz = dz[non_zero_indices]
    
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    
    ax.plot_trisurf(xpos, ypos, dz, cmap='Blues')
    
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    
    ax.set_xlabel('DFT modules (eV/$\AA$)')
    ax.set_ylabel('TorchMD-net modules (eV/$\AA$)')
    ax.set_zlabel('Density')
    ax.set_title('Reference modules vs Inferred modules (Oxygen) (Surface)')
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_oxygen_hist_3d()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def graficar_ref_vs_inf_forces_modules_oxygen_hist(
    path_data_file="analysis/forces_modules_O.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_oxygen_hist.tiff",
    bins=100,
    xlim=None,
    ylim=None
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente dos columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.hist(datos['Reference modules (eV/$\AA$)'], bins=bins, edgecolor='red', alpha=0.5, label='Reference', align='mid', color='red', density=True)
    plt.hist(datos['Inferred modules (eV/$\AA$)'], bins=bins, edgecolor='green', alpha=0.5, label='Inference', align='mid', color='green', density=True)
    
    plt.xlabel('Modules (eV/$\AA$)')
    plt.ylabel('Density')
    plt.title('Distribution of the module of reference and inferred forces (Oxygen)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    
    plt.legend()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

graficar_ref_vs_inf_forces_modules_oxygen_hist()

In [None]:
# Ruta del archivo para los módulos
MODULE_O_PATH = 'graficas-inferencias/input-graficas/modules-test-8.dat'
x_limit=12
y_limit=12
# Cargar los datos con Pandas
data = pd.read_csv(MODULE_O_PATH, header=None, delim_whitespace=True)

# Dividir los datos en dos columnas
ref_data_array = data.iloc[:, 0]  # Primera columna
inf_data_array = data.iloc[:, 1]   # Segunda columna

# Graficar histogramas
plt.hist(ref_data_array, bins=100, edgecolor='red', alpha=0.5, label='Reference', align='mid', color='red',density="True")
plt.hist(inf_data_array, bins=100, edgecolor='green', alpha=0.5, label='Inference', align='mid', color='green',density="True")

# Agregar etiquetas y título
plt.xlabel('Modules (eV/$\AA$)')
plt.ylabel('Density')
plt.title('Distribution of the module of reference and inferred forces (Oxygen)')

# Mostrar leyenda
plt.legend()

# Mostrar el histograma
plt.savefig("graficas-inferencias/output-graficas/figura-17.png")
plt.show()

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_aluminium(
    path_data_file="analysis/forces_modules_Al.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_aluminium.tiff",
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.scatter(datos.iloc[:, 0], datos.iloc[:, 1], s=25)
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Aluminium)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_aluminium()


In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

def grafica_ref_vs_inf_forces_modules_aluminium_hist_2d(
    path_data_file="analysis/forces_modules_Al.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_aluminium_hist_2d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    hist_filtered = np.where(hist != 0, hist, np.nan)
    
    plt.figure(figsize=(8, 6))
    plt.imshow(hist_filtered.T, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], 
               cmap='Blues', origin='lower', aspect='auto')
    plt.colorbar(label='Density')
    
    plt.xlabel('DFT modules (eV/$\AA$)')
    plt.ylabel('TorchMD-net modules (eV/$\AA$)')
    plt.title('Reference modules vs Inferred modules (Aluminium)')
    
    plt.xlim(xlim)
    plt.ylim(ylim)
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_aluminium_hist_2d()


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits.mplot3d import Axes3D

def grafica_ref_vs_inf_forces_modules_aluminium_hist_3d(
    path_data_file="analysis/forces_modules_Al.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_aluminium_hist_3d.tiff",
    bins=50,
    xlim=(0, 12),
    ylim=(0, 12)
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente 2 columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    hist, xedges, yedges = np.histogram2d(
        datos.iloc[:, 0], datos.iloc[:, 1], bins=bins, density=True
    )
    
    xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
    xpos = xpos.ravel()
    ypos = ypos.ravel()
    dz = hist.ravel()
    
    non_zero_indices = dz != 0
    xpos = xpos[non_zero_indices]
    ypos = ypos[non_zero_indices]
    dz = dz[non_zero_indices]
    
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    
    ax.plot_trisurf(xpos, ypos, dz, cmap='Blues')
    
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    
    ax.set_xlabel('DFT modules (eV/$\AA$)')
    ax.set_ylabel('TorchMD-net modules (eV/$\AA$)')
    ax.set_zlabel('Density')
    ax.set_title('Reference modules vs Inferred modules (Aluminium) (Surface)')
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

grafica_ref_vs_inf_forces_modules_aluminium_hist_3d()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

def graficar_ref_vs_inf_forces_modules_aluminium_hist(
    path_data_file="analysis/forces_modules_Al.data",
    output_path_tif="analysis/ref_vs_inf_forces_modules_aluminium_hist.tiff",
    bins=100,
    xlim=None,
    ylim=None
):
    
    os.makedirs(os.path.dirname(output_path_tif), exist_ok=True)
    
    datos = pd.read_csv(
        path_data_file, sep='\s+', header=0, engine='python'
    )
    
    if datos.shape[1] != 2:
        print("Error: El archivo no tiene exactamente dos columnas.")
        return
    
    datos.columns = ['Reference modules (eV/$\AA$)', 'Inferred modules (eV/$\AA$)']
    
    plt.figure(figsize=(8, 6))
    plt.hist(datos['Reference modules (eV/$\AA$)'], bins=bins, edgecolor='red', alpha=0.5, label='Reference', align='mid', color='red', density=True)
    plt.hist(datos['Inferred modules (eV/$\AA$)'], bins=bins, edgecolor='green', alpha=0.5, label='Inference', align='mid', color='green', density=True)
    
    plt.xlabel('Modules (eV/$\AA$)')
    plt.ylabel('Density')
    plt.title('Distribution of the module of reference and inferred forces (Aluminium)')
    
    if xlim:
        plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    
    plt.legend()
    
    plt.savefig(output_path_tif, dpi=600, bbox_inches='tight', format='tiff')
    plt.close()

graficar_ref_vs_inf_forces_modules_aluminium_hist()