<a href="https://colab.research.google.com/github/gw1129/GitHub/blob/master/HOMO_LUMO_Prediction_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔬 전해질 분자의 HOMO / LUMO 예측 실습

이 노트북에서는 간단한 분자 데이터를 기반으로 HOMO / LUMO 에너지를 예측하는 머신러닝 모델을 만들어봅니다.

우리는 다음 단계를 따릅니다:
1. 데이터 생성
2. 분자 특성 벡터화
3. 머신러닝 모델 학습 및 평가

사용 도구: `RDKit`, `scikit-learn`, `RandomForestRegressor`

In [None]:
# STEP 1. 라이브러리 설치 및 임포트
!pip install rdkit-pypi tqdm
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from rdkit import Chem
from rdkit.Chem import Descriptors
from tqdm import tqdm

In [None]:
# STEP 2. 데이터 생성 (가상 전해질 분자)
np.random.seed(42)
n_samples = 500

def random_smiles():
    return 'C' + ''.join(np.random.choice(['C', 'O', 'N', 'F'], size=5))

data = {
    'smiles': [random_smiles() for _ in range(n_samples)],
    'HOMO': np.random.uniform(-11, -5, size=n_samples),
    'LUMO': np.random.uniform(-4, 0, size=n_samples)
}
df = pd.DataFrame(data)
df.head()

In [None]:
# STEP 3. RDKit으로 분자 특성 추출
def compute_features(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return [np.nan] * 5
    mw = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    h_donors = Descriptors.NumHDonors(mol)
    h_acceptors = Descriptors.NumHAcceptors(mol)
    tpsa = Descriptors.TPSA(mol)
    return [mw, logp, h_donors, h_acceptors, tpsa]

feature_names = ['MolWt', 'LogP', 'HDonors', 'HAcceptors', 'TPSA']
features = [compute_features(smi) for smi in tqdm(df['smiles'])]

feature_df = pd.DataFrame(features, columns=feature_names)
df = pd.concat([df, feature_df], axis=1)
df.dropna(inplace=True)
df.head()

In [None]:
# STEP 4. 머신러닝 모델 학습 및 평가
X = df[feature_names]
y_HOMO = df['HOMO']
y_LUMO = df['LUMO']

X_train, X_test, y_HOMO_train, y_HOMO_test, y_LUMO_train, y_LUMO_test = train_test_split(
    X, y_HOMO, y_LUMO, test_size=0.2, random_state=42
)

model_HOMO = RandomForestRegressor(random_state=42)
model_HOMO.fit(X_train, y_HOMO_train)
pred_HOMO = model_HOMO.predict(X_test)

model_LUMO = RandomForestRegressor(random_state=42)
model_LUMO.fit(X_train, y_LUMO_train)
pred_LUMO = model_LUMO.predict(X_test)

print("HOMO MAE:", mean_absolute_error(y_HOMO_test, pred_HOMO))
print("HOMO R²:", r2_score(y_HOMO_test, pred_HOMO))
print("LUMO MAE:", mean_absolute_error(y_LUMO_test, pred_LUMO))
print("LUMO R²:", r2_score(y_LUMO_test, pred_LUMO))