# Imports

In [None]:
# Imports
import pandas as pd
from pathlib import Path
import os
import re
from typing import Dict, List, Tuple

## LAMMPS Thermo Log Parser

Parse thermodynamic data from LAMMPS simulation log files and extract specific properties into a pandas DataFrame.

### Function: parse_thermo_log()

**Purpose:** Extract thermo data from a LAMMPS log file

**Parameters:**
- `log_file` (str): Path to the LAMMPS log file
- `properties` (list): List of property names to extract (case-sensitive, e.g., `['temp', 'press', 'pxx']`)
  - Property names must exist in the log file's thermo output columns

**Returns:** pandas DataFrame with 'step' column plus requested properties (only if available in log)

**Example usage:**
```python
PROPERTIES_TO_PARSE = ['temp', 'press', 'pxx']  
df = parse_thermo_log('simulation.log', PROPERTIES_TO_PARSE)
```

In [None]:
def parse_thermo_log(log_file, properties) -> pd.DataFrame:
    """
    Parse LAMMPS log file and extract thermo data.
    
    Returns DataFrame with Step and requested properties that are available.
    """
    data_rows = []
    
    try:
        with open(log_file, 'r') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: File {log_file} not found!")
        return pd.DataFrame()
    
    in_thermo_section = False
    header_cols = []
    
    for line in content.split('\n'):
        line = line.strip()
        
        # Skip WARNING and ERROR lines but continue parsing
        if line.startswith('WARNING') or line.startswith('ERROR'):
            continue
        
        # Detect header line (contains Step and other properties)
        if line.startswith('Step') or (in_thermo_section and 'Step' in line.split()):
            header_cols = [col.lower() for col in line.split()]
            in_thermo_section = True
            continue
        
        # End of thermo section (only on Loop time, not warnings)
        if in_thermo_section and 'Loop time' in line:
            in_thermo_section = False
            continue
        
        # Parse data line
        if in_thermo_section and header_cols:
            try:
                values = line.split()
                if len(values) >= len(header_cols):
                    row_dict = {}
                    for i, col in enumerate(header_cols):
                        if col in ['step'] + [p.lower() for p in properties]:
                            row_dict[col] = float(values[i])
                    if row_dict:  # Only add if we got some data
                        data_rows.append(row_dict)
            except (ValueError, IndexError):
                continue
    
    df = pd.DataFrame(data_rows)
    return df

In [None]:
# Specify the log file to analyze
CUSTOM_LOG_FILE = 'Eql.log'  # <-- Change this to your log file

PROPERTIES = ['pxx', 'pyy', 'pzz']
custom_df = parse_thermo_log(Path(CUSTOM_LOG_FILE), PROPERTIES)

if custom_df.empty:
    print(f"⚠ Error: Could not parse {CUSTOM_LOG_FILE}")
else:
    # Check which properties are available
    available = [p for p in PROPERTIES if p in custom_df.columns]
    missing = [p for p in PROPERTIES if p not in custom_df.columns]
    
    if missing:
        print(f"⚠ Missing properties (not in log): {missing}")
    
    print(f"✓ Loaded {len(custom_df)} data points")
    print(f"✓ Available properties: {available}")
    print(f"✓ Step range: {custom_df['step'].min():.0f} to {custom_df['step'].max():.0f}")
    # Mean of pxx and pyy columns step wise average store in P_Lateral
    P_Lateral = (custom_df["pxx"] + custom_df["pyy"]) / 2
    print(f'Average of lateral pressure components:{P_Lateral.mean()}')
    print(f'Average of pzz pressure component:{custom_df["pzz"].mean()}')