# Comparison between my code and bibliography: The Journal of Physical Chemistry A
## Vol 123 / Issue 38
### On-Top Ratio for Atoms and Molecules

**Article Subscribed: August 22, 2019**  
*Authors: Rebecca K. Carlson, Donald G. Truhlar, Laura Gagliardi*

In [1]:
# Imports
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
sys.path.append("src")

from file_management import FileManager
from cluster_connection import ClusterConnection
from job_manager import JobManager
from slurm_manager import SlurmManager
from flux_correlation_indicator import FluxCorrelationIndicator
from molecule import Molecule
from basis import BasisSet
from method import Method
from input_specification import InputSpecification

In [2]:
# First cell - Import necessary libraries
import concurrent.futures
import logging
import re
from IPython.display import display, HTML

# Set up logging with IPython display
class IPythonLogger(logging.Handler):
    def emit(self, record):
        msg = self.format(record)
        color = {
            logging.ERROR: 'red',
            logging.WARNING: 'orange',
            logging.INFO: 'blue',
            logging.DEBUG: 'grey'
        }.get(record.levelno, 'black')
        
        display(HTML(f'<div style="color: {color};">{msg}</div>'))

# Configure logger
logger = logging.getLogger('CalcLogger')
logger.setLevel(logging.INFO)
logger.addHandler(IPythonLogger())

In [3]:
# Second cell - Helper functions
def parse_method_name(method_name):
    """Parse method name to extract components."""
    match = re.search(r'\(([^,]+),\s*([^)]+)\)', method_name)
    if not match:
        raise ValueError(f"Invalid method name format: {method_name}")
    return match.group(1).strip(), match.group(2).strip()

def generate_job_name(molecule_name, method_parts, basis_name):
    """Generate standardized job name."""
    return f"{molecule_name[:2]}{method_parts[0]}{method_parts[1]}{basis_name.replace('-', '').replace('*','p')}"


In [4]:
def process_single_calculation(params, config_file="utils/cluster_config.json"):
    """Process a single calculation."""
    try:
        params['status'] = 'running'
        molecule = Molecule(name=params['molecule_name'])
        method = Method(params['method_name'])
        basis = BasisSet(params['basis_name'])
        
        with ClusterConnection(config_file=config_file) as connection:
            file_manager = FileManager(connection)
            job_manager = JobManager(connection, file_manager, SlurmManager())
            flux_manager = FluxCorrelationIndicator(connection, file_manager, job_manager)
            
            logger.info(f"Starting calculation for job: {params['job_name']}")
            flux_manager.handle_flux(params['job_name'], molecule, method, basis)
            result = file_manager.get_results(params['job_name'])
            
            params['status'] = 'completed'
            logger.info(f"Completed calculation for job: {params['job_name']}")
            return result
    except Exception as e:
        params['status'] = 'failed'
        logger.error(f"Error processing calculation for {params['job_name']}: {str(e)}")
        return None

In [5]:
def launch_ontop_calculation_parallel(molecule_name, method_name, basis, max_workers=3):
    """Launch multiple calculations in parallel with status tracking."""
    try:
        method_parts = parse_method_name(method_name)
        
        # Prepare calculation parameters
        calc_params = []
        for basis_name in basis:
            job_name = generate_job_name(molecule_name, method_parts, basis_name)
            calc_params.append({
                'molecule_name': molecule_name,
                'method_name': method_name,
                'basis_name': basis_name,
                'job_name': job_name,
                'status': 'pending'
            })
        
        def update_status_display():
            status_html = f"""
            <div style="margin: 10px 0;">
            <b>Progress: {sum(1 for p in calc_params if p['status'] == 'completed')} / {len(calc_params)} completed</b>
            </div>
            <table style="width:100%; border-collapse: collapse;">
            <tr style="background-color: #f2f2f2;">
                <th style="border: 1px solid #ddd; padding: 8px;">Job Name</th>
                <th style="border: 1px solid #ddd; padding: 8px;">Basis</th>
                <th style="border: 1px solid #ddd; padding: 8px;">Status</th>
            </tr>
            """
            for param in calc_params:
                status_color = {
                    'pending': '#fff',
                    'running': '#fff3cd',
                    'completed': '#d4edda',
                    'failed': '#f8d7da'
                }.get(param['status'], '#fff')
                
                status_html += f"""
                <tr style="background-color: {status_color}">
                    <td style="border: 1px solid #ddd; padding: 8px;">{param['job_name']}</td>
                    <td style="border: 1px solid #ddd; padding: 8px;">{param['basis_name']}</td>
                    <td style="border: 1px solid #ddd; padding: 8px;">{param['status']}</td>
                </tr>
                """
            status_html += "</table>"
            display(HTML(status_html), clear=True)
        
        # Launch calculations in parallel
        results = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_params = {
                executor.submit(process_single_calculation, params): params
                for params in calc_params
            }
            
            for future in concurrent.futures.as_completed(future_to_params):
                params = future_to_params[future]
                try:
                    result = future.result()
                    if result is not None:
                        results.append((params['basis_name'], result))
                    update_status_display()
                except Exception as e:
                    logger.error(f"Calculation failed for {params['job_name']}: {str(e)}")
                
        # Sort results by basis name to maintain order
        results.sort(key=lambda x: basis.index(x[0]))
        return [r[1] for r in results]
    
    except Exception as e:
        logger.error(f"Error in parallel calculation launch: {str(e)}")
        raise

In [6]:
def launch_ontop_calculation(molecule_name, method_name, basis):
        # Code to define molecule, method, and basis, then execute job and retrieve results
    
    # Placeholder: Move to project root if needed
    # os.chdir("..")  # Uncomment if needed to change directory
    
    # Define molecule, method, basis
    molecule = Molecule(name=molecule_name)  # Example placeholder
    method = Method(method_name)
    import re
    df_list = []
    for basis_name in basis:

        basis = BasisSet(basis_name)

    # Regular expression to match parts between ( and , and between , and )
        match = re.search(r'\(([^,]+),\s*([^)]+)\)', method_name)
        if match:
            part1 = match.group(1).strip()
            part2 = match.group(2).strip()
    
        
        job_name = molecule_name[:2]+part1+part2+basis_name.replace('-','')
    
        
        # Initialize dataframe to None
        df = None
        with ClusterConnection(config_file="utils/cluster_config.json") as connection:
            file_manager = FileManager(connection)
            job_manager = JobManager(connection, file_manager, SlurmManager())
            flux_manager = FluxCorrelationIndicator(connection, file_manager, job_manager)
        
            # Define the job name and execute the flux
            flux_manager.handle_flux(job_name, molecule, method, basis)
            df = file_manager.get_results(job_name)
            df_list.append(df)

    return df_list


In [7]:
# Function to obtain atomic indicator
def obtain_atomic_indicator(df):
    # Filter data where y and z are approximately 0

    # Convert x and y from Hartree to Ångstroms
    df['x'] = df['x'] * 0.529177
    df['y'] = df['y'] * 0.529177
    
    # Create indicator column
    df['indicator'] = 2 * df['on_top'] / (df['density']) ** 2
    
    # Select relevant columns
    result_df = df[['x', 'y', 'indicator']].dropna(subset=['indicator'])
    return result_df 


In [8]:
def plot_comparison(df_list, limit):
    """
    Displays line plots comparing multiple DataFrames for Ar.
    
    Parameters:
    - df_list (list): List of DataFrames, each containing 'x', 'y', and other columns needed for indicator calculation
    - limit (float): Upper limit for x coordinates to filter the data
    """    
    # Set up the figure
    fig, ax = plt.subplots(figsize=(10, 6))
    fig.suptitle('Line Plot Comparison for Ar', fontsize=16)
    
    # Process each DataFrame and plot
    colors = plt.cm.tab10(np.linspace(0, 1, len(df_list)))  # Different color for each DataFrame
    
    for df, color in zip(df_list, colors):
        # Calculate indicators for current DataFrame
        calculated_indicator = obtain_atomic_indicator(df)
        
        # Filter data for line plot (y ≈ 0)
        line_data = calculated_indicator[
            (calculated_indicator['y'] <= 0.000001) & 
            (calculated_indicator['x'] <= limit)
        ]
        
        # Sort data by x to ensure smooth line
        line_data = line_data.sort_values('x')
        
        # Plot line
        ax.plot(line_data['x'], line_data['indicator'], color=color, 
               label=f'Dataset {df_list.index(df) + 1}')
    
    # Set titles and labels
    ax.set_title('Calculated OTR Line Plots: Ar')
    ax.set_xlabel('z(Å)')
    ax.set_ylabel('R')
    ax.legend()
    
    plt.tight_layout()
    plt.show()

## Atoms

### He

### Ne

In [None]:
# Example usage for Neon CASSCF calculations
molecule_name = "neon"
method_name = "CASSCF(10,10)"
basis_sets = [
   # '6-311+G*',
   # 'cc-pVTZ', 
   # 'aug-cc-pVDZ',
   # 'dec-cc-pV6Z',
    'aug-pc-3',
    'dec-pc-4'
]

try:
    results = launch_ontop_calculation_parallel(
        molecule_name=molecule_name,
        method_name=method_name,
        basis=basis_sets,
        max_workers=1  # You can adjust this based on your cluster's capacity
    )
    
    # Display summary of results
    print(f"\nCompleted {len(results)} calculations successfully")
    
    # Display results for each calculation
    for i, df in enumerate(results):
        print(f"\nResults for {basis_sets[i]} basis:")
        display(df.head())
        
    # Optional: Save results to files
    for i, df in enumerate(results):
        filename = f"neon_CASSCF10-10_{basis_sets[i].replace('-', '_')}_results.csv"
        df.to_csv(filename)
        print(f"Saved results to {filename}")
        
except Exception as e:
    print(f"Error running calculations: {str(e)}")

Geometry for neon successfully loaded.
Connected to atlas.


Basis coefficients for Ne from ./utils/basis_sets/aug-pc-3.gbs written successfully.
Gaussian input file './test/ne1010augpc3.com' generated successfully.
Command output: 
Uploaded test/ne1010augpc3.com to /dipc/javidom/proyect-3-indicator/ne1010augpc3/ne1010augpc3.com on the cluster.
Uploaded slurm_scripts/ne1010augpc3.slurm to /dipc/javidom/proyect-3-indicator/ne1010augpc3/ne1010augpc3.slurm on the cluster.
Command output: 
Command output: 
Command output: 
Command output: 
Command output: 
Command output: Submitted batch job 2219095

Command output:                JOBID   PARTITION         QOS                  NAME          USER    ST         TIME  NODES  NODELIST(REASON)
             2219095     general     regular          ne1010augpc3       javidom    PD         0:00      1  (Priority)

Command output:                JOBID   PARTITION         QOS                  NAME          USER    ST         TIME  NODES  NODELIST(REASON)
             2219095     general     regular          n

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Uploaded /tmp/ne1010augpc3.log to /dipc/javidom/proyect-3-indicator/ne1010augpc3/ne1010augpc3.log on the cluster.
Command output: 
Command output: 
Command output: 
Command output: 
Command output: Submitted batch job 2219097

Command output:                JOBID   PARTITION         QOS                  NAME          USER    ST         TIME  NODES  NODELIST(REASON)
             2219097     general     regular      ne1010augpc3_dmn       javidom    PD         0:00      1  (Priority)

Command output:                JOBID   PARTITION         QOS                  NAME          USER    ST         TIME  NODES  NODELIST(REASON)
             2219097     general     regular      ne1010augpc3_dmn       javidom    PD         0:00      1  (Priority)

Command output:                JOBID   PARTITION         QOS                  NAME          USER    ST         TIME  NODES  NODELIST(REASON)
             2219097     general     regular      ne1010augpc3_dmn       javidom     R         0:01      1  at

In [None]:
plot_comparison(molecule, obtain_atomic_indicator(df),limit=1.09)

### Ar

In [None]:
molecule = 'argon'
df = launch_ontop_calculation(molecule,'CASSCF(18,18)','dec-pc-4');

In [None]:
plot_comparison(molecule, obtain_atomic_indicator(df),limit=1.59)

In [None]:
## Molecules