# Formation Energy Calculator Demo

This notebook demonstrates how to use the Formation Energy Calculator module.

## 1. Import and Load Data

In [1]:
%reload_ext autoreload
%autoreload 2
from formation_energy import FormationEnergyCalculator, calculate_formation_energy
import pandas as pd
import matplotlib.pyplot as plt
# Cell 1: 모듈 리로드
import importlib
import sys

# 기존 모듈 삭제
if 'formation_energy' in sys.modules:
    del sys.modules['formation_energy']

# 다시 import
from formation_energy import FormationEnergyCalculator, calculate_formation_energy

print("✓ Module reloaded!")
from formation_energy import FormationEnergyCalculator

# 데이터 로드 및 디버그
calc = FormationEnergyCalculator('../data/CO2R_Gas_Data.xlsx')
calc.debug_info()

✓ Module reloaded!
Loaded sheet: 'BEEF-vdW'
No correction_energy column found, using raw_energy as is
DEBUG INFORMATION

DataFrame shape: (21, 10)
Columns: ['surface_name', 'site_name', 'species_name', 'type', 'fugacity', 'raw_energy', 'frequencies', 'num_carbonyl', 'correction', 'corrected_energy']

First 5 rows:
   surface_name site_name species_name    type  fugacity  raw_energy  \
0           NaN       gas          CH4     gas   20467.0 -231.576327   
1           NaN       gas         C2H4     gas   13942.0 -394.990434   
2           NaN       gas           H2     gas   30296.0  -32.942254   
3           NaN       gas       H2_ref     gas  101325.0  -32.942254   
4           NaN       gas          H2O  liquid    3534.0 -496.270267   

                                         frequencies  num_carbonyl  \
0  [54.65811394834241, 71.6372154480809, 85.06283...             0   
1  [49.41703231529869, 70.78991483142337, 820.049...             0   
2  [6.609732955609958, 7.412936415532567,

In [2]:

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

## 2. Quick Start - One-Liner

In [5]:
# Calculate formation energies in one step
calc = calculate_formation_energy(
    filepath='../data/CO2R_Gas_Data.xlsx',
    output_filepath='../data/BEEF-vdW.tsv',
    display=False
)

Loaded sheet: 'BEEF-vdW'
Applied correction_energy to raw_energy
  Correction range: 0.000000 to 0.400000 eV
Step 1: Extracting elements from species...
Found elements: C, H, O

Step 2: Calculating reference energies...
  Found 21 gas/liquid phase entries
  Reference species: CH4, C2H4, H2, H2_ref, H2O, CO, CH3OH, CO2, HCOOH, CH3CH2OH
Reference Energies:
  E_ref(H) = -16.426127 eV
  E_ref(O) = -463.418013 eV
  E_ref(C) = -163.369290 eV

Step 3: Calculating formation energies...

Successfully calculated formation energies for 17/21 entries

Saved results to: ../data/BEEF-vdW.tsv


## 3. View Results

In [None]:
# Get database
db = calc.to_db()

# Display first 10 rows
db.head(10)

## 4. Summary Statistics

In [None]:
# Get statistics grouped by phase type
stats = calc.get_summary_statistics()
print(stats)

## 5. Filter Data by Phase Type

In [None]:
# Gas phase species
gas_data = calc.filter_by_type('gas')
print(f"Gas phase species: {len(gas_data)}")
gas_data[['species_name', 'raw_energy', 'formation_energy']].head(10)

In [None]:
# Adsorbate species
ads_data = calc.filter_by_type('ads')
print(f"Adsorbate species: {len(ads_data)}")
if len(ads_data) > 0:
    cols = ['species_name', 'surface_name', 'raw_energy', 'formation_energy']
    display(ads_data[[col for col in cols if col in ads_data.columns]].head(10))

## 6. Visualize Formation Energies

In [None]:
# Plot formation energies by type
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram of formation energies
for phase_type in db['type'].unique():
    data = db[db['type'] == phase_type]['formation_energy'].dropna()
    if len(data) > 0:
        axes[0].hist(data, alpha=0.6, label=phase_type, bins=20)

axes[0].set_xlabel('Formation Energy (eV)', fontsize=12)
axes[0].set_ylabel('Count', fontsize=12)
axes[0].set_title('Distribution of Formation Energies', fontsize=14)
axes[0].legend()
axes[0].grid(alpha=0.3)

# Box plot by type
db_plot = db[db['formation_energy'].notna()]
db_plot.boxplot(column='formation_energy', by='type', ax=axes[1])
axes[1].set_xlabel('Phase Type', fontsize=12)
axes[1].set_ylabel('Formation Energy (eV)', fontsize=12)
axes[1].set_title('Formation Energy by Phase Type', fontsize=14)
plt.suptitle('')  # Remove default title

plt.tight_layout()
plt.show()

## 7. Find Most and Least Stable Species

In [None]:
# Most stable species (most negative formation energy)
valid_data = db[db['formation_energy'].notna()]

if len(valid_data) > 0:
    most_stable = valid_data.nsmallest(5, 'formation_energy')
    print("Top 5 Most Stable Species:")
    display(most_stable[['species_name', 'type', 'formation_energy']])
    
    print("\nTop 5 Least Stable Species:")
    least_stable = valid_data.nlargest(5, 'formation_energy')
    display(least_stable[['species_name', 'type', 'formation_energy']])

## 8. Reference Energies

In [None]:
# Display reference energies used
print("Reference Energies (eV):")
for element, energy in calc.ref_energies.items():
    print(f"  E_ref({element}) = {energy:.6f}")

## 9. Custom Analysis

In [None]:
# Compare raw vs formation energies for gas phase
gas_comparison = gas_data[['species_name', 'raw_energy', 'formation_energy']].dropna()

if len(gas_comparison) > 0:
    plt.figure(figsize=(10, 6))
    x = range(len(gas_comparison))
    width = 0.35
    
    plt.bar([i - width/2 for i in x], gas_comparison['raw_energy'], 
            width, label='Raw Energy', alpha=0.8)
    plt.bar([i + width/2 for i in x], gas_comparison['formation_energy'], 
            width, label='Formation Energy', alpha=0.8)
    
    plt.xlabel('Species', fontsize=12)
    plt.ylabel('Energy (eV)', fontsize=12)
    plt.title('Raw vs Formation Energy (Gas Phase)', fontsize=14)
    plt.xticks(x, gas_comparison['species_name'].values, rotation=45, ha='right')
    plt.legend()
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

## 10. Export Results

In [None]:
# Save in different formats
calc.save('../data/output_formation.tsv', format='tsv')
calc.save('../data/output_formation.csv', format='csv')
calc.save('../data/output_formation.xlsx', format='excel')

print("Results saved in TSV, CSV, and Excel formats!")