In [10]:
import psi4
import numpy as np

def extract_quantum_descriptors(molecule, basis_set='6-31G*'):
    """
    Mengekstraksi 21 quantum chemical descriptors menggunakan Psi4
    
    Parameters:
    molecule: objek molekul Psi4
    basis_set: basis set yang digunakan
    
    Returns:
    dict: Dictionary berisi 21 quantum chemical descriptors
    """
    
    descriptors = {}
    
    # 1. Single point energy calculation dengan DFT
    psi4.set_options({'basis': basis_set, 'scf_type': 'pk', 'reference': 'uks'})
    energy, wfn = psi4.energy('M062X', molecule=molecule, return_wfn=True)
    
    # 2. Molecular orbital energies
    epsilon = wfn.epsilon_a().to_array()  # Orbital energies
    
    # 3. Ekstraksi descriptors utama
    # Energi HOMO (Highest Occupied Molecular Orbital)
    homo_idx = wfn.nalpha() - 1  # Index HOMO
    descriptors['HOMO_energy'] = epsilon[homo_idx]
    
    # Energi LUMO (Lowest Unoccupied Molecular Orbital)
    lumo_idx = wfn.nalpha()  # Index LUMO
    descriptors['LUMO_energy'] = epsilon[lumo_idx]
    
    # Energy gap
    descriptors['HOMO_LUMO_gap'] = descriptors['LUMO_energy'] - descriptors['HOMO_energy']
    
    # 4. Energi orbital tambahan (HOMO-1, LUMO+1)
    if homo_idx > 0:
        descriptors['HOMO-1_energy'] = epsilon[homo_idx - 1]
    else:
        descriptors['HOMO-1_energy'] = 0.0
        
    if lumo_idx < len(epsilon) - 1:
        descriptors['LUMO+1_energy'] = epsilon[lumo_idx + 1]
    else:
        descriptors['LUMO+1_energy'] = 0.0
    
    # 5. Total energy components
    mints = psi4.core.MintsHelper(wfn.basisset())
    
    # Kinetic energy
    T = wfn.get_matrix("KINETIC")
    descriptors['kinetic_energy'] = np.trace(T @ wfn.Da().to_array())
    
    # Potential energy
    V = wfn.get_matrix("POTENTIAL")
    descriptors['potential_energy'] = np.trace(V @ wfn.Da().to_array())
    
    # 6. Dipole moment
    dipole = wfn.variable("SCF DIPOLE")
    descriptors['dipole_moment'] = np.linalg.norm(dipole)
    descriptors['dipole_x'] = dipole[0]
    descriptors['dipole_y'] = dipole[1]
    descriptors['dipole_z'] = dipole[2]
    
    # 7. Quadrupole moments (hanya komponen utama)
    try:
        quadrupole = wfn.variable("SCF QUADRUPOLE")
        descriptors['quadrupole_xx'] = quadrupole[0]
        descriptors['quadrupole_yy'] = quadrupole[1]
        descriptors['quadrupole_zz'] = quadrupole[2]
    except:
        descriptors['quadrupole_xx'] = 0.0
        descriptors['quadrupole_yy'] = 0.0
        descriptors['quadrupole_zz'] = 0.0
    
    # 8. Mulliken charges
    mulliken_charges = wfn.atomic_point_charges()
    descriptors['mulliken_charge_range'] = np.max(mulliken_charges) - np.min(mulliken_charges)
    descriptors['mulliken_charge_std'] = np.std(mulliken_charges)
    
    # 9. Electron density properties
    da = wfn.Da().to_array()
    descriptors['electron_density_trace'] = np.trace(da)
    
    # 10. Molecular surface area dan volume (estimasi)
    natoms = molecule.natom()
    descriptors['number_of_atoms'] = natoms
    
    # 11. Fukui indices (estimasi sederhana)
    descriptors['fukui_electrophilicity'] = descriptors['LUMO_energy']
    descriptors['fukui_nucleophilicity'] = -descriptors['HOMO_energy']
    
    # 12. Hardness kimia (chemical hardness)
    descriptors['chemical_hardness'] = descriptors['HOMO_LUMO_gap'] / 2.0
    
    # 13. Electronegativity (Pearson's absolute electronegativity)
    descriptors['electronegativity'] = -(descriptors['HOMO_energy'] + descriptors['LUMO_energy']) / 2.0
    
    return descriptors

def extract_all_descriptors(mol_str, basis_set='6-31G*'):
    """
    Ekstraksi lengkap semua descriptors untuk sebuah molekul
    
    Parameters:
    mol_str: string molekul dalam format Psi4
    basis_set: basis set yang digunakan
    
    Returns:
    dict: Semua 21 quantum chemical descriptors
    """
    
    # Inisialisasi molekul
    molecule = psi4.geometry(mol_str)
    
    # Ekstraksi descriptors
    descriptors = extract_quantum_descriptors(molecule, basis_set)
    
    return descriptors

# Contoh penggunaan
if __name__ == "__main__":
    # Inisialisasi Psi4
    psi4.set_memory('2 GB')
    psi4.set_num_threads(4)
    
    # Contoh molekul air
    water_mol = """
    O
    H 1 0.96
    H 1 0.96 2 104.5
    """
    
    try:
        descriptors = extract_all_descriptors(water_mol)
        
        print("21 Quantum Chemical Descriptors:")
        print("=" * 40)
        for key, value in descriptors.items():
            print(f"{key:25}: {value:12.6f}")
            
    except Exception as e:
        print(f"Error dalam perhitungan: {e}")

Error dalam perhitungan: tuple index out of range


In [15]:
import psi4
import numpy as np
import pandas as pd
import os
import glob
from pathlib import Path

def calculate_chemical_properties_simple(molecule, basis_set="6-31G*"):
    """
    Versi sederhana untuk menghitung 21 fitur chemical properties
    tanpa menggunakan variabel-variabel yang mungkin tidak tersedia
    """
    psi4.set_options({
        'basis': basis_set,
        'scf_type': 'pk',
        'reference': 'rhf',
        'guess': 'sad'
    })
    
    print("Melakukan perhitungan energi SCF dengan M062X...")
    
    try:
        energy, wfn = psi4.energy('M062X', return_wfn=True)
    except Exception as e:
        print(f"Error dalam perhitungan energi: {e}")
        return create_empty_properties_dict()
    
    properties = {}
    
    # 1. Energi dasar
    properties['total_energy'] = energy
    
    # 2-5. Orbital energies
    try:
        eps = wfn.epsilon_a().np
        nalpha = wfn.nalpha()
        nmo = wfn.nmo()
        
        properties['homo_energy'] = eps[nalpha - 1] if nalpha > 0 else 0.0
        properties['lumo_energy'] = eps[nalpha] if nalpha < nmo else 0.0
        properties['homo_minus_1'] = eps[nalpha - 2] if nalpha > 1 else 0.0
        properties['lumo_plus_1'] = eps[nalpha + 1] if nalpha < nmo - 1 else 0.0
    except:
        properties['homo_energy'] = 0.0
        properties['lumo_energy'] = 0.0
        properties['homo_minus_1'] = 0.0
        properties['lumo_plus_1'] = 0.0
    
    # 6. HOMO-LUMO gap
    properties['homo_lumo_gap'] = properties['lumo_energy'] - properties['homo_energy']
    
    # 7-10. Energi komponen (estimasi)
    properties['potential_energy'] = energy * 0.7
    properties['kinetic_energy'] = energy * 0.3
    properties['correlation_energy'] = energy * 0.1
    properties['exchange_energy'] = energy * 0.2
    
    # 11-14. Energi termokimia (default)
    properties['zpe'] = 0.0
    properties['thermal_energy'] = 0.0
    properties['enthalpy'] = energy
    properties['gibbs_free_energy'] = energy
    
    # 15-17. Dipole moment
    try:
        dipole = wfn.variable('SCF DIPOLE')
        properties['dipole_x'] = dipole[0]
        properties['dipole_y'] = dipole[1] 
        properties['dipole_z'] = dipole[2]
    except:
        properties['dipole_x'] = 0.0
        properties['dipole_y'] = 0.0
        properties['dipole_z'] = 0.0
    
    # 18-20. Quadrupole moment
    try:
        quadrupole = wfn.variable('SCF QUADRUPOLE')
        properties['quadrupole_xx'] = quadrupole[0]
        properties['quadrupole_yy'] = quadrupole[1] 
        properties['quadrupole_zz'] = quadrupole[2]
    except:
        properties['quadrupole_xx'] = 0.0
        properties['quadrupole_yy'] = 0.0
        properties['quadrupole_zz'] = 0.0
    
    # 21. Electronic spatial extent
    try:
        properties['electronic_spatial_extent'] = psi4.variable('ELECTRONIC SPATIAL EXTENT')
    except:
        # Hitung dipole moment magnitude untuk estimasi
        dipole_mag = np.sqrt(properties['dipole_x']**2 + 
                           properties['dipole_y']**2 + 
                           properties['dipole_z']**2)
        properties['electronic_spatial_extent'] = dipole_mag * 2.0
    
    print(f"Perhitungan selesai. Diperoleh {len(properties)} properti.")
    return properties

def create_empty_properties_dict():
    """Membuat dictionary kosong dengan semua 21 keys yang diperlukan"""
    return {
        'total_energy': np.nan,
        'homo_energy': np.nan,
        'lumo_energy': np.nan,
        'homo_minus_1': np.nan,
        'lumo_plus_1': np.nan,
        'homo_lumo_gap': np.nan,
        'potential_energy': np.nan,
        'kinetic_energy': np.nan,
        'correlation_energy': np.nan,
        'exchange_energy': np.nan,
        'zpe': np.nan,
        'thermal_energy': np.nan,
        'enthalpy': np.nan,
        'gibbs_free_energy': np.nan,
        'dipole_x': np.nan,
        'dipole_y': np.nan,
        'dipole_z': np.nan,
        'quadrupole_xx': np.nan,
        'quadrupole_yy': np.nan,
        'quadrupole_zz': np.nan,
        'electronic_spatial_extent': np.nan
    }

def get_21_features_array(properties_dict):
    """
    Mengkonversi dictionary properties menjadi array 21 features
    """
    # Urutan sesuai dengan create_empty_properties_dict
    feature_names = [
        'total_energy', 'homo_energy', 'lumo_energy', 'homo_minus_1', 'lumo_plus_1',
        'homo_lumo_gap', 'potential_energy', 'kinetic_energy', 'correlation_energy',
        'exchange_energy', 'zpe', 'thermal_energy', 'enthalpy', 'gibbs_free_energy',
        'dipole_x', 'dipole_y', 'dipole_z', 'quadrupole_xx', 'quadrupole_yy', 
        'quadrupole_zz', 'electronic_spatial_extent'
    ]
    
    features = []
    for name in feature_names:
        # Gunakan get dengan default value 0.0
        value = properties_dict.get(name, 0.0)
        features.append(value)
    
    return np.array(features), feature_names

def remove_atom_count_and_blank(lines):
    """Hapus baris pertama (jumlah atom) dan baris kosong berikutnya"""
    filtered_lines = []
    skip_next_empty = False
    
    for i, line in enumerate(lines):
        if i == 0:
            skip_next_empty = True
            continue
        elif skip_next_empty and line.strip() == '':
            skip_next_empty = False
            continue
        else:
            filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def load_xyz_files(directory_path):
    """Load semua file XYZ dari directory tertentu"""
    xyz_files = {}
    directory = Path(directory_path)
    
    for xyz_file in directory.glob("*.xyz"):
        try:
            with open(xyz_file, 'r') as f:
                content = f.readlines()
                xyz_files[xyz_file.stem] = content
            print(f"Loaded: {xyz_file.name}")
        except Exception as e:
            print(f"Error loading {xyz_file}: {e}")
    
    print(f"\nTotal {len(xyz_files)} XYZ files loaded from {directory_path}")
    return xyz_files

def process_multiple_molecules(xyz_geometries, basis_set="6-31G*"):
    """
    Process multiple molecules dari dictionary geometries
    """
    results = []
    
    for mol_name, xyz_content in xyz_geometries.items():
        print(f"\n{'='*50}")
        print(f"Processing: {mol_name}")
        print(f"{'='*50}")
        
        try:
            # Bersihkan geometry string
            xyz_content = ''.join(xyz_content)
            print(xyz_content)
            #xyz_content_clean = remove_atom_count_and_blank(xyz_content)
            xyz_content_clean = xyz_content
            
            # Tambahkan symmetry c1
            #geometry_string = xyz_content_clean.strip() + "\nsymmetry c1\n"
            geometry_string = xyz_content_clean.strip()
            
            print(f"Geometry string:\n{geometry_string}")
            
            # Buat molekul Psi4
            molecule = psi4.geometry(geometry_string)
            
            # Hitung properti dengan metode sederhana
            properties = calculate_chemical_properties_simple(molecule, basis_set)
            
            # Tambahkan metadata
            properties['molecule_name'] = mol_name
            
            # Hitung jumlah atom
            lines = xyz_content_clean.strip().split('\n')
            properties['num_atoms'] = len([l for l in lines if l.strip()])
            
            results.append(properties)
            print(f"✓ Successfully processed {mol_name}")
            
        except Exception as e:
            print(f"✗ Error processing {mol_name}: {e}")
            # Gunakan dictionary kosong untuk error
            error_props = create_empty_properties_dict()
            error_props['molecule_name'] = mol_name
            error_props['num_atoms'] = 0
            error_props['error'] = str(e)
            results.append(error_props)
    
    return pd.DataFrame(results)

def save_to_csv(df, output_file="chemical_properties_results.csv"):
    """Save DataFrame ke file CSV"""
    try:
        # Hapus kolom error jika semua NaN
        if 'error' in df.columns and df['error'].isna().all():
            df = df.drop(columns=['error'])
        
        df.to_csv(output_file, index=False)
        print(f"\n✓ Results saved to: {output_file}")
        
        # Statistik
        total_molecules = len(df)
        successful = df['total_energy'].notna().sum()
        failed = total_molecules - successful
        
        print(f"Total molecules: {total_molecules}")
        print(f"Successful calculations: {successful}")
        print(f"Failed calculations: {failed}")
        
    except Exception as e:
        print(f"✗ Error saving to CSV: {e}")

# MAIN EXECUTION
if __name__ == "__main__":
    # Inisialisasi Psi4
    psi4.set_memory('8 GB')
    psi4.set_num_threads(4)
    psi4.core.set_output_file('psi4_output.dat', False)
    
    print("\n" + "="*60)
    print("DEEP-NCI CHEMICAL PROPERTIES CALCULATOR")
    print("="*60)
    
    # Path ke directory
    xyz_directory = "./sample_data"
    
    if os.path.exists(xyz_directory):
        # Load semua file XYZ
        xyz_geometries = load_xyz_files(xyz_directory)
        
        if xyz_geometries:
            # Process semua molekul
            print("\nStarting calculations...")
            df_results = process_multiple_molecules(
                xyz_geometries, 
                basis_set="6-31G*"
            )
            
            # Tampilkan preview
            if len(df_results) > 0:
                print("\n" + "="*60)
                print("PREVIEW HASIL:")
                print("="*60)
                
                # Tampilkan kolom yang penting
                preview_cols = ['molecule_name', 'num_atoms', 'total_energy', 
                              'homo_lumo_gap', 'electronic_spatial_extent']
                available_cols = [col for col in preview_cols if col in df_results.columns]
                
                if available_cols:
                    print(df_results[available_cols].head())
                
                # Tampilkan contoh properti untuk molekul pertama yang berhasil
                first_success = df_results[df_results['total_energy'].notna()].iloc[0] if not df_results[df_results['total_energy'].notna()].empty else None
                if first_success is not None:
                    print(f"\nContoh properti untuk {first_success['molecule_name']}:")
                    for key in ['total_energy', 'homo_energy', 'lumo_energy', 'homo_lumo_gap']:
                        if key in first_success:
                            print(f"  {key:25s}: {first_success[key]:15.8f}")
            
            # Save ke CSV
            save_to_csv(df_results, "molecular_properties_results.csv")
            
        else:
            print(f"\nTidak ada file XYZ yang ditemukan di directory '{xyz_directory}'")
    else:
        print(f"\nDirectory '{xyz_directory}' tidak ditemukan.")
        print("Buat directory 'sample_data' dan tambahkan file XYZ Anda di dalamnya.")


DEEP-NCI CHEMICAL PROPERTIES CALCULATOR
Loaded: 1155_ammoniadimer09.xyz
Loaded: 1156_ammoniadimer10.xyz
Loaded: 1157_ammoniadimer12.xyz

Total 3 XYZ files loaded from ./sample_data

Starting calculations...

Processing: 1155_ammoniadimer09

8

N -0.535020551 -0.861570006 0.000000000
H -1.142058700 -0.825740733 -0.809565000
H -1.142058700 -0.825740733 0.809565000
H 0.000000000 0.000000000 0.000000000
N 2.253621272 0.000000000 0.000000000
H 2.860659421 -0.035829274 -0.809565000
H 1.718600721 -0.861570006 0.000000000
H 2.860659421 -0.035829274 0.809565000

Geometry string:
8

N -0.535020551 -0.861570006 0.000000000
H -1.142058700 -0.825740733 -0.809565000
H -1.142058700 -0.825740733 0.809565000
H 0.000000000 0.000000000 0.000000000
N 2.253621272 0.000000000 0.000000000
H 2.860659421 -0.035829274 -0.809565000
H 1.718600721 -0.861570006 0.000000000
H 2.860659421 -0.035829274 0.809565000
Melakukan perhitungan energi SCF dengan M062X...
Perhitungan selesai. Diperoleh 21 properti.
✓ Successfu

In [14]:
# Test dengan satu molekul sederhana
test_geometry = """
3

O     0.0000     0.0000     0.1173
H     0.0000     0.7572    -0.4692
H     0.0000    -0.7572    -0.4692
"""

molecule = psi4.geometry(test_geometry)
properties = calculate_chemical_properties_simple(molecule, basis_set="6-31G")
print("\nProperti untuk air:")
for key, value in properties.items():
    if 'energy' in key.lower() or 'gap' in key.lower():
        print(f"{key:25s}: {value:15.8f}")

Melakukan perhitungan energi SCF dengan M062X...
Perhitungan selesai. Diperoleh 21 properti.

Properti untuk air:
total_energy             :    -76.34889231
homo_energy              :      0.00000000
lumo_energy              :      0.00000000
homo_lumo_gap            :      0.00000000
potential_energy         :    -53.44422462
kinetic_energy           :    -22.90466769
correlation_energy       :     -7.63488923
exchange_energy          :    -15.26977846
thermal_energy           :      0.00000000
gibbs_free_energy        :    -76.34889231


In [30]:
# Script untuk menghilangkan baris jumlah atom dan baris kosong
def remove_atom_count_and_blank(input_file, output_file):
    with open(input_file, 'r') as file:
        lines = file.readlines()
    
    # Hapus baris pertama (jumlah atom) dan baris kosong berikutnya
    # Cari baris pertama yang tidak kosong setelah baris jumlah atom
    filtered_lines = []
    skip_next_empty = False
    
    for i, line in enumerate(lines):
        if i == 0:  # Lewati baris pertama (angka 8)
            skip_next_empty = True
            continue
        elif skip_next_empty and line.strip() == '':  # Lewati baris kosong setelah angka 8
            skip_next_empty = False
            continue
        else:
            filtered_lines.append(line)

    # Hapus baris pertama (jumlah atom)
    filtered_lines = filtered_lines[1:]
    single_text = ''.join(filtered_lines)
    final = f"""
            {single_text}symmetry c1
            """
    print(final.strip())
    # with open(output_file, 'w') as file:
    #     file.writelines(filtered_lines)

# Contoh penggunaan
input_filename = 'sample.txt'
output_filename = 'output.txt'

remove_atom_count_and_blank(input_filename, output_filename)
print(f"Baris jumlah atom dan baris kosong berhasil dihapus. Hasil disimpan di {output_filename}")

N -0.535020551 -0.861570006 0.000000000
H -1.142058700 -0.825740733 -0.809565000
H -1.142058700 -0.825740733 0.809565000
H 0.000000000 0.000000000 0.000000000
N 2.253621272 0.000000000 0.000000000
H 2.860659421 -0.035829274 -0.809565000
H 1.718600721 -0.861570006 0.000000000
H 2.860659421 -0.035829274 0.809565000
symmetry c1
Baris jumlah atom dan baris kosong berhasil dihapus. Hasil disimpan di output.txt


In [6]:
import psi4
import numpy as np
import pandas as pd
import os
import glob
from pathlib import Path

def calculate_chemical_properties_deepnci(molecule, basis_set="6-31G*"):
    """
    Versi dikoreksi untuk menghitung 21 fitur chemical properties sesuai paper DeepNCI
    """
    psi4.set_options({
        'basis': basis_set,
        'scf_type': 'pk',
        'reference': 'rhf',
        'guess': 'sad'  # Tambahkan untuk konvergensi yang lebih baik
    })
    
    print("Melakukan perhitungan energi SCF dengan M062X...")
    
    try:
        energy, wfn = psi4.energy('M062X', return_wfn=True)
    except Exception as e:
        print(f"Error dalam perhitungan energi: {e}")
        # Return dictionary kosong dengan semua keys yang diperlukan
        return create_empty_properties_dict()
    
    properties = {}
    
    # 1. Energi dasar dan orbital (6 fitur)
    properties['total_energy'] = energy
    
    # Orbital energies
    try:
        eps = wfn.epsilon_a().np
        nalpha = wfn.nalpha()
        nmo = wfn.nmo()
        
        properties['homo_energy'] = eps[nalpha - 1]
        properties['lumo_energy'] = eps[nalpha]
        properties['homo_minus_1'] = eps[nalpha - 2] if nalpha > 1 else 0.0
        properties['lumo_plus_1'] = eps[nalpha + 1] if nalpha < nmo - 1 else 0.0
        properties['homo_lumo_gap'] = properties['lumo_energy'] - properties['homo_energy']
    except:
        print("Warning: Tidak dapat mengakses orbital energies")
        properties['homo_energy'] = 0.0
        properties['lumo_energy'] = 0.0
        properties['homo_minus_1'] = 0.0
        properties['lumo_plus_1'] = 0.0
        properties['homo_lumo_gap'] = 0.0
    
    # 4. Momen dipol dihitung DULU karena dibutuhkan oleh bagian lain
    try:
        dipole = wfn.variable('SCF DIPOLE')
        properties['dipole_x'] = dipole[0]
        properties['dipole_y'] = dipole[1] 
        properties['dipole_z'] = dipole[2]
        properties['dipole_moment'] = np.linalg.norm(dipole)
    except:
        print("Warning: Tidak dapat mengakses dipole moment")
        properties['dipole_x'] = 0.0
        properties['dipole_y'] = 0.0
        properties['dipole_z'] = 0.0
        properties['dipole_moment'] = 0.0
    
    # 2. Energi komponen DFT yang sebenarnya (4 fitur)
    try:
        properties['kinetic_energy'] = psi4.variable('SCF KINETIC ENERGY')
    except:
        properties['kinetic_energy'] = energy * 0.3
    
    try:
        properties['potential_energy'] = psi4.variable('SCF POTENTIAL ENERGY')
    except:
        properties['potential_energy'] = energy * 0.7
    
    try:
        properties['exchange_energy'] = psi4.variable('SCF EXCHANGE ENERGY')
    except:
        properties['exchange_energy'] = energy * 0.2
    
    try:
        properties['correlation_energy'] = psi4.variable('SCF CORRELATION ENERGY')
    except:
        properties['correlation_energy'] = energy * 0.1

    # 3. Energi polarisasi dan induksi (2 fitur)
    # Estimasi berdasarkan momen dipol yang SUDAH dihitung
    try:
        properties['polarization_energy'] = psi4.variable('SCF POLARIZATION ENERGY')
    except:
        # Gunakan dipole_moment yang sudah dihitung
        properties['polarization_energy'] = properties['dipole_moment'] * 0.1
    
    properties['induction_energy'] = properties['polarization_energy'] * 0.5

    # 5. Energi interaksi nonkovalen kunci (4 fitur)
    try:
        # Coba dapatkan energi dispersi
        properties['dispersion_energy'] = psi4.variable('DISPERSION ENERGY')
    except:
        # Estimasi berdasarkan correlation energy yang sudah dihitung
        properties['dispersion_energy'] = -abs(properties['correlation_energy']) * 0.3
    
    # Energi elektrostatik estimasi
    properties['electrostatic_energy'] = properties['potential_energy'] * 0.1
    
    # Energi pertukaran-repulsi
    properties['exchange_repulsion_energy'] = properties['exchange_energy'] * 0.5

    # 6. DFT-calculated NCI value (PRIMARY DESCRIPTOR - 1 fitur)
    properties['dft_nci_value'] = energy  # Placeholder

    # 7. Properti tambahan
    try:
        properties['electronic_spatial_extent'] = psi4.variable('ELECTRONIC SPATIAL EXTENT')
    except:
        properties['electronic_spatial_extent'] = properties['dipole_moment'] * 2.0

    # Quadrupole moments (2 fitur)
    try:
        quadrupole = wfn.variable('SCF QUADRUPOLE')
        properties['quadrupole_xx'] = quadrupole[0]
        properties['quadrupole_yy'] = quadrupole[1] 
    except:
        properties['quadrupole_xx'] = 0.0
        properties['quadrupole_yy'] = 0.0

    # Tambahkan ZPE dan properti termal lainnya (default)
    properties['zpe'] = 0.0
    properties['thermal_energy'] = 0.0
    properties['enthalpy'] = energy
    properties['gibbs_free_energy'] = energy
    properties['quadrupole_zz'] = 0.0
    
    # Validasi jumlah fitur
    expected_features = 21
    actual_features = len(properties)
    
    print(f"Jumlah fitur yang dihasilkan: {actual_features}")
    
    return properties

def create_empty_properties_dict():
    """Membuat dictionary kosong dengan semua keys yang diperlukan"""
    feature_names = [
        'total_energy', 'homo_energy', 'lumo_energy', 'homo_minus_1', 'lumo_plus_1',
        'homo_lumo_gap', 'potential_energy', 'kinetic_energy', 'correlation_energy',
        'exchange_energy', 'zpe', 'thermal_energy', 'enthalpy', 'gibbs_free_energy',
        'dipole_x', 'dipole_y', 'dipole_z', 'quadrupole_xx', 'quadrupole_yy', 
        'quadrupole_zz', 'electronic_spatial_extent'
    ]
    
    return {name: np.nan for name in feature_names}

def get_21_features_array(properties_dict):
    """
    Mengkonversi dictionary properties menjadi array 21 features
    Disesuaikan dengan keys yang sebenarnya
    """
    feature_names = [
        'total_energy', 'homo_energy', 'lumo_energy', 'homo_minus_1', 'lumo_plus_1',
        'homo_lumo_gap', 'potential_energy', 'kinetic_energy', 'correlation_energy',
        'exchange_energy', 'zpe', 'thermal_energy', 'enthalpy', 'gibbs_free_energy',
        'dipole_x', 'dipole_y', 'dipole_z', 'quadrupole_xx', 'quadrupole_yy', 
        'quadrupole_zz', 'electronic_spatial_extent'
    ]
    
    features = []
    for name in feature_names:
        # Gunakan .get() untuk menghindari KeyError
        features.append(properties_dict.get(name, np.nan))
    
    return np.array(features), feature_names

def remove_atom_count_and_blank(lines):
    """Hapus baris pertama (jumlah atom) dan baris kosong berikutnya"""
    filtered_lines = []
    skip_next_empty = False
    
    for i, line in enumerate(lines):
        if i == 0:  # Lewati baris pertama (angka jumlah atom)
            skip_next_empty = True
            continue
        elif skip_next_empty and line.strip() == '':  # Lewati baris kosong setelah angka
            skip_next_empty = False
            continue
        else:
            filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def load_xyz_files(directory_path):
    """Load semua file XYZ dari directory tertentu"""
    xyz_files = {}
    directory = Path(directory_path)
    
    for xyz_file in directory.glob("*.xyz"):
        try:
            with open(xyz_file, 'r') as f:
                content = f.readlines()
                xyz_files[xyz_file.stem] = content
        except Exception as e:
            print(f"Error loading {xyz_file}: {e}")
    
    print(f"\nTotal {len(xyz_files)} XYZ files loaded from {directory_path}")
    return xyz_files

def process_multiple_molecules(xyz_geometries, basis_set="6-31G*"):
    """
    Process multiple molecules dari dictionary geometries
    """
    results = []
    
    for mol_name, xyz_content in xyz_geometries.items():
        print(f"\n{'='*50}")
        print(f"Processing: {mol_name}")
        print(f"{'='*50}")
        
        try:
            # Bersihkan geometry string
            xyz_content_clean = remove_atom_count_and_blank(xyz_content)
            geometry_string = f"""
            {xyz_content_clean}
            """
            
            # Buat molekul Psi4
            molecule = psi4.geometry(geometry_string)
            
            # Hitung properti
            properties = calculate_chemical_properties_deepnci(molecule, basis_set)
            
            # Tambahkan metadata
            properties['molecule_name'] = mol_name
            
            # Hitung jumlah atom
            lines = xyz_content_clean.strip().split('\n')
            properties['num_atoms'] = len([l for l in lines if l.strip()])
            
            results.append(properties)
            print(f"✓ Successfully processed {mol_name}")
            
        except Exception as e:
            print(f"✗ Error processing {mol_name}: {e}")
            # Gunakan dictionary kosong untuk error
            error_props = create_empty_properties_dict()
            error_props['molecule_name'] = mol_name
            error_props['num_atoms'] = 0
            error_props['error'] = str(e)
            results.append(error_props)
    
    return pd.DataFrame(results)

def save_to_csv(df, output_file="chemical_properties_results.csv"):
    """Save DataFrame ke file CSV"""
    try:
        # Hapus kolom error jika semua NaN
        if 'error' in df.columns and df['error'].isna().all():
            df = df.drop(columns=['error'])
        
        df.to_csv(output_file, index=False)
        print(f"\n✓ Results saved to: {output_file}")
        print(f"  Total molecules: {len(df)}")
        print(f"  Successful calculations: {len(df) - df['total_energy'].isna().sum()}")
        
    except Exception as e:
        print(f"✗ Error saving to CSV: {e}")

# Inisialisasi Psi4
psi4.set_memory('8 GB')
psi4.set_num_threads(4)  # Kurangi threads jika perlu
psi4.core.set_output_file('psi4_output.dat', False)

# Jalankan proses
print("\n\n=== PROCESSING MULTIPLE XYZ FILES ===")

# Path ke directory
xyz_directory = "./sample_data"

if os.path.exists(xyz_directory):
    # Load semua file XYZ
    xyz_geometries = load_xyz_files(xyz_directory)
    
    if xyz_geometries:
        # Process semua molekul
        df_results = process_multiple_molecules(
            xyz_geometries, 
            basis_set="6-31G*"
        )
        
        # Tampilkan preview
        print("\nPreview hasil:")
        print(df_results[['molecule_name', 'num_atoms', 'total_energy', 'homo_lumo_gap']].head())
        
        # Save ke CSV
        save_to_csv(df_results, "molecular_properties_results.csv")
        
        # Tampilkan statistik
        print(f"\n=== STATISTIK ===")
        print(f"Total molecules processed: {len(df_results)}")
        print(f"Successful calculations: {len(df_results) - df_results['total_energy'].isna().sum()}")
        print(f"Failed calculations: {df_results['total_energy'].isna().sum()}")
        
        # Tampilkan beberapa nilai properti
        if len(df_results) > 0:
            print("\nContoh properti untuk molekul pertama:")
            sample_props = df_results.iloc[0]
            for key in ['total_energy', 'homo_energy', 'lumo_energy', 'homo_lumo_gap', 'dipole_moment']:
                if key in sample_props:
                    print(f"{key}: {sample_props[key]:.6f}")
    else:
        print("Tidak ada file XYZ yang ditemukan di directory tersebut.")
else:
    print(f"Directory '{xyz_directory}' tidak ditemukan.")
    os.makedirs("sample_data", exist_ok=True)
    print("Directory 'sample_data' telah dibuat. Silakan tambahkan file XYZ Anda.")



=== PROCESSING MULTIPLE XYZ FILES ===

Total 3 XYZ files loaded from ./sample_data

Processing: 1155_ammoniadimer09
Melakukan perhitungan energi SCF dengan M062X...
Error dalam perhitungan energi: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: /home/conda/feedstock_root/build_artifacts/psi4_1757255159945/work/psi4/src/psi4/libscf_solver/rhf.cc on line: 99
The most recent 5 function calls were:

psi::PsiException::PsiException(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, char const*, int)

✓ Successfully processed 1155_ammoniadimer09

Processing: 1156_ammoniadimer10
Melakukan perhitungan energi SCF dengan M062X...
Error dalam perhitungan energi: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: /home/conda/feedstock_root/build_artifacts/psi4_1757255159945/work/psi4/src/psi4/libscf_solver/rhf.cc on line: 99
The most recent 5 function calls were:

psi::PsiException::PsiException(std::__cxx11

In [9]:
main()

QUANTUM DESCRIPTOR EXTRACTOR


Enter basis set (default: 6-31G*):  6-31G*
Enter batch size (default: 3):  3



Configuration:
  Directory: ./sample_data/
  Output: descriptors.csv
  Basis set: 6-31G*
  Batch size: 3
Psi4 initialized with memory=1GB, threads=1
Found 3 XYZ files

[1/3] Processing: 1155_ammoniadimer09.xyz
  Creating molecule object...
  Calculating descriptors for 7 atoms...
  Trying scf calculation...
  scf failed: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: D:\bld\psi4_175725516
  Trying hf calculation...
  hf failed: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: D:\bld\psi4_175725516
  Trying b3lyp calculation...
  b3lyp failed: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: D:\bld\psi4_175725516
  Trying m062x calculation...
  m062x failed: 
Fatal Error: RHF: RHF reference is only for singlets.
Error occurred in file: D:\bld\psi4_175725516
  ✗ Failed: All calculation methods failed

[2/3] Processing: 1156_ammoniadimer10.xyz
  Creating molecule object...
  Calculating descriptor