In [3]:
from dataclasses import dataclass
from typing import List, Dict, Optional
import pandas as pd
import os

@dataclass
class SearchSpace:
	features: List[str]
	feature_values: Dict[str, List]
	labels: List[str]
	df: pd.DataFrame

	@staticmethod
	def from_csv(csv_path: str, label_cols: Optional[List[str]] = None) -> "SearchSpace":
		assert os.path.exists(csv_path), f"Scope CSV not found: {csv_path}"
		df = pd.read_csv(csv_path)
		label_cols = [c.strip() for c in (label_cols or []) if c.strip() in df.columns]
		features = [c for c in df.columns if c not in label_cols]
		feature_values = {c: sorted(pd.unique(df[c]).tolist(), key=lambda x: str(x)) for c in features}
		return SearchSpace(features=features, feature_values=feature_values, labels=label_cols, df=df)


In [6]:
SearchSpace.from_csv('../../examples/publication/BMS_yield_cost/data/base_dft.csv')

SearchSpace(features=['base_file_name', 'base_SMILES', 'base_stoichiometry', 'base_number_of_atoms', 'base_charge', 'base_multiplicity', 'base_convergence_criteria', 'base_dipole', 'base_molar_mass', 'base_molar_volume', 'base_electronic_spatial_extent', 'base_homo_energy', 'base_lumo_energy', 'base_electronegativity', 'base_hardness', 'base_electrophilicity', 'base_E_scf', 'base_zero_point_correction', 'base_E_thermal_correction', 'base_H_thermal_correction', 'base_G_thermal_correction', 'base_E_zpe', 'base_E', 'base_H', 'base_G', 'base_ES_root_dipole', 'base_ES_root_molar_volume', 'base_ES_root_electronic_spatial_extent', 'base_ES1_transition', 'base_ES1_osc_strength', 'base_ES1_<S**2>', 'base_ES2_transition', 'base_ES2_osc_strength', 'base_ES2_<S**2>', 'base_ES3_transition', 'base_ES3_osc_strength', 'base_ES3_<S**2>', 'base_ES4_transition', 'base_ES4_osc_strength', 'base_ES4_<S**2>', 'base_ES5_transition', 'base_ES5_osc_strength', 'base_ES5_<S**2>', 'base_ES6_transition', 'base_ES6_

## Run the EDBO LLM with Gemini 2.5 Pro on a scope CSV

Before running, ensure your Gemini API key is available as an environment variable `GEMINI_API_KEY`.

Command (from the project root):

```
python -m edbo.llm.llm \
  --provider gemini \
  --model gemini-2.5-pro \
  --scope ../../examples/publication/Virtual-experimentation/data/data.csv \
  --label-cols I1,I2,I3,I4
```
