In [19]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
#!/usr/bin/env python3
"""
Site-specific SQS Generator for High-Entropy LDH
Fixes O and H atoms while distributing metals according to specified fractions
Modified to select 5 best structures based on lowest objective function
"""

import os
import multiprocessing
from pathlib import Path
from typing import Dict, Union, Optional, List, Tuple
from sqsgenerator import sqs_optimize, read_settings_file, export_structures
from sqsgenerator.public import Structure, from_ase_atoms
import numpy as np

In [15]:
class LDHSQSGenerator:
    """
    SQS generator specifically designed for high-entropy LDH structures
    with site-specific atomic distribution control and best structure selection
    """
    
    def __init__(self, structure_file: str, num_cores: Optional[int] = None):
        """
        Initialize LDH SQS generator
        
        Args:
            structure_file: Path to the primitive LDH structure file (.cif, .vasp, etc.)
            num_cores: Number of cores to use (defaults to all available)
        """
        self.structure_file = structure_file
        self.num_cores = num_cores or multiprocessing.cpu_count()
        
        # Set OpenMP environment variable for parallel processing
        os.environ['OMP_NUM_THREADS'] = str(self.num_cores)
        
    def calculate_metal_composition(self, 
                                  metal_fractions: Dict[str, float],
                                  total_metal_sites: int) -> Dict[str, int]:
        """
        Calculate integer atom counts from fractional compositions
        
        Args:
            metal_fractions: Dictionary of metal fractions {element: fraction}
            total_metal_sites: Total number of metal sites in supercell
            
        Returns:
            Dictionary of metal atom counts {element: count}
        """
        # Validate fractions sum to 1.0
        total_fraction = sum(metal_fractions.values())
        if not np.isclose(total_fraction, 1.0, atol=1e-6):
            raise ValueError(f"Metal fractions must sum to 1.0, got {total_fraction}")
        
        # Calculate integer counts
        metal_counts = {}
        remaining_sites = total_metal_sites
        
        # Sort by fraction to handle rounding consistently
        sorted_metals = sorted(metal_fractions.items(), key=lambda x: x[1], reverse=True)
        
        for i, (metal, fraction) in enumerate(sorted_metals):
            if i == len(sorted_metals) - 1:  # Last element gets remaining sites
                metal_counts[metal] = remaining_sites
            else:
                count = int(round(fraction * total_metal_sites))
                metal_counts[metal] = count
                remaining_sites -= count
        
        return metal_counts
    
    def create_ldh_sqs_config(self,
                             metal_fractions: Dict[str, float],
                             supercell: tuple = (2, 2, 1),
                             iterations: float = 1e6,
                             shell_weights: Optional[Dict[int, float]] = None,
                             metal_site_symbol: str = 'M',
                             num_best_structures: int = 5) -> Dict:
        """
        Create SQS configuration for LDH with corrected shell weights
        """
        if shell_weights is None:
            # Corrected: Skip first shell (metal-O), focus on metal-metal interactions
            shell_weights = {
                3: 1.0,    # Second coordination shell
                4: 0.6,    # Third coordination shell (primary metal-metal)
            }
        
        # Rest of the configuration remains the same
        metal_sites_per_unit = 1
        total_metal_sites = metal_sites_per_unit * supercell[0] * supercell[1] * supercell[2]
        metal_composition = self.calculate_metal_composition(metal_fractions, total_metal_sites)
        
        config = {
            'structure': {
                'file': self.structure_file,
                'supercell': list(supercell)
            },
            'which': metal_site_symbol,
            'composition': metal_composition,
            'iterations': int(iterations),
            'shell_weights': shell_weights,  # Updated shell weights
            'mode': 'random',
            'max_output_configurations': num_best_structures,
            'similar': False
        }
        
        return config

    def analyze_coordination_shells(self, structure_file: str):
        """
        Analyze coordination shells to determine metal-metal distances
        """
        import subprocess
        import tempfile
        import yaml
        
        # Create temporary config file
        temp_config = {
            'structure': {'file': structure_file},
            'composition': {'M': 1}  # Dummy composition
        }
        
        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
            yaml.dump(temp_config, f)
            temp_file = f.name
        
        # Run sqsgenerator to compute shell distances
        try:
            result = subprocess.run(
                ['sqsgen', 'compute', 'shell-distances', temp_file],
                capture_output=True, text=True
            )
            
            distances = eval(result.stdout.strip())
            print("Coordination shell distances (Å):")
            for i, dist in enumerate(distances):
                print(f"Shell {i}: {dist:.3f} Å")
                
            return distances
        except Exception as e:
            print(f"Could not analyze coordination shells: {e}")
            return None


    
    def run_ldh_sqs_simulation(self,
                              metal_fractions: Dict[str, float],
                              supercell: tuple = (2, 2, 1),
                              iterations: float = 1e6,
                              shell_weights: Optional[Dict[int, float]] = None,
                              metal_site_symbol: str = 'M',
                              num_best_structures: int = 5,
                              export_structures: bool = True,
                              output_format: str = 'cif') -> Tuple[Dict, Dict]:
        """
        Run SQS simulation for high-entropy LDH and select best structures
        
        Args:
            metal_fractions: Metal fractions {element: fraction}
            supercell: Supercell dimensions
            iterations: Number of iterations
            shell_weights: Shell weights for SRO calculation
            metal_site_symbol: Symbol for metal sites in primitive cell
            num_best_structures: Number of best structures to select (default: 5)
            export_structures: Whether to export generated structures
            output_format: Output file format
            
        Returns:
            Tuple of (results, timings) from sqs_optimize
        """
        print(f"Running LDH SQS simulation with {self.num_cores} cores...")
        print(f"Structure file: {self.structure_file}")
        print(f"Metal fractions: {metal_fractions}")
        print(f"Supercell: {supercell}")
        print(f"Iterations: {int(iterations):,}")
        print(f"Metal site symbol: {metal_site_symbol}")
        print(f"Number of best structures to select: {num_best_structures}")
        
        # Validate metal fractions
        total_fraction = sum(metal_fractions.values())
        if not np.isclose(total_fraction, 1.0, atol=1e-6):
            raise ValueError(f"Metal fractions must sum to 1.0, got {total_fraction}")
        
        # Create configuration
        config = self.create_ldh_sqs_config(
            metal_fractions=metal_fractions,
            supercell=supercell,
            iterations=iterations,
            shell_weights=shell_weights,
            metal_site_symbol=metal_site_symbol,
            num_best_structures=num_best_structures
        )
        
        print(f"Metal composition: {config['composition']}")
        
        # Run SQS optimization
        results, timings = sqs_optimize(
            config,
            make_structures=export_structures,
            structure_format='default'
        )
        
        print(f"SQS optimization completed!")
        print(f"Generated {len(results)} optimized structures")
        
        # Display objective function values for selected structures
        self._display_objective_values(results)
        
        # Export structures if requested
        if export_structures and results:
            self._export_results(results, output_format, num_best_structures)
            
        return results, timings
    
    def _display_objective_values(self, results: Dict):
        """Display objective function values for the selected structures"""
        print("\n=== Selected Structures (Ranked by Objective Function) ===")
        for i, (config_id, result) in enumerate(results.items(), 1):
            objective_value = result.get('objective', 'N/A')
            print(f"Structure {i}: ID={config_id}, Objective Function={objective_value}")
    
    def _export_results(self, results: Dict, output_format: str = 'cif', num_structures: int = 5):
        """Export generated structures with multiple fallback methods"""
        from operator import itemgetter
        
        # Method 1: Try with 'format' parameter
        try:
            export_structures(
                results,
                functor=itemgetter('structure'),
                format=output_format,
                compress=False
            )
            print(f"All {num_structures} structures exported in {output_format} format")
            return
            
        except TypeError:
            print("Trying alternative parameter names...")
        
        # Method 2: Try with 'file_format' parameter (older versions)
        try:
            export_structures(
                results,
                functor=itemgetter('structure'),
                file_format=output_format,
                compress=False
            )
            print(f"All {num_structures} structures exported in {output_format} format")
            return
            
        except TypeError:
            print("Trying manual export...")
        
        # Method 3: Manual export
        try:
            for i, (config_id, result) in enumerate(results.items(), 1):
                if i <= num_structures:
                    structure = result['structure']
                    objective = result.get('objective', 0.0)
                    filename = f"ldh_sqs_structure_{i:02d}_obj_{objective:.6f}.{output_format}"
                    
                    # Try different export methods
                    if hasattr(structure, 'to_file'):
                        structure.to_file(filename, format=output_format)
                    elif hasattr(structure, 'write'):
                        structure.write(filename)
                    elif hasattr(structure, 'to_ase_atoms'):
                        from ase.io import write
                        atoms = structure.to_ase_atoms()
                        write(filename, atoms)
                    else:
                        print(f"No export method found for structure {i}")
                        continue
                        
                    print(f"Exported structure {i} with objective {objective:.6f}")
                    
        except Exception as e:
            print(f"All export methods failed: {e}")
            print("Structures were generated successfully but could not be exported to files")
            print("You can access the structures from the returned results dictionary")


In [16]:
def main():
    """
    Example usage for high-entropy LDH with 5 best structures selection
    """
    print("=== High-Entropy LDH SQS Generation - 5 Best Structures ===")
    
    # Initialize generator
    structure_file = "Zn1H2O2.cif"  # Your LDH primitive cell
    num_cores = 64
    
    ldh_generator = LDHSQSGenerator(structure_file, num_cores=num_cores)
    
    # Define metal fractions as specified
    metal_fractions = {
        'Fe': 0.05,
        'Cr': 0.20,
        'Zn': 0.25,
        'Ni': 0.25,
        'Co': 0.25
    }
    
    # Validate fractions
    total = sum(metal_fractions.values())
    print(f"Total metal fractions: {total}")
    
    # Run SQS simulation to get 5 best structures
    results, timings = ldh_generator.run_ldh_sqs_simulation(
        metal_fractions=metal_fractions,
        supercell=(4, 5, 1),  # Common for LDH: expand in a,b but not c
        iterations=1e8,
        shell_weights={3: 1.0, 4: 0.6},  # First two coordination shells
        metal_site_symbol='Zn',  # Adjust based on your primitive cell
        num_best_structures=5,  # Select 5 best structures
        export_structures=True,
        output_format='cif'
    )
    
    # Display timing information
    print(f"\nTiming information:")
    for rank, times in timings.items():
        if times:
            avg_time = sum(times) / len(times)
            print(f"Rank {rank}: Average time per iteration: {avg_time:.2f} μs")

In [18]:
if __name__ == "__main__":
    try:
        import sqsgenerator
        print("sqsgenerator package available")
        main()
    except ImportError:
        print("Error: sqsgenerator not found. Please install it using:")
        print("conda install -c conda-forge sqsgenerator")

sqsgenerator package available
=== High-Entropy LDH SQS Generation - 5 Best Structures ===
Total metal fractions: 1.0
Running LDH SQS simulation with 64 cores...
Structure file: Zn1H2O2.cif
Metal fractions: {'Fe': 0.05, 'Cr': 0.2, 'Zn': 0.25, 'Ni': 0.25, 'Co': 0.25}
Supercell: (4, 5, 1)
Iterations: 100,000,000
Metal site symbol: Zn
Number of best structures to select: 5
Metal composition: {'Zn': 5, 'Ni': 5, 'Co': 5, 'Cr': 4, 'Fe': 1}
SQS optimization completed!
Generated 5 optimized structures

=== Selected Structures (Ranked by Objective Function) ===
Structure 1: ID=881347480, Objective Function=3.9599999999999995
Structure 2: ID=38488367236, Objective Function=3.9599999999999995
Structure 3: ID=47233383500, Objective Function=3.9599999999999995
Structure 4: ID=29314929789, Objective Function=3.9599999999999995
Structure 5: ID=15708547973, Objective Function=3.9599999999999995
All 5 structures exported in cif format

Timing information:
Rank 0: Average time per iteration: 0.59 μs
