In [1]:
cd ~/MultiFidelity-ProcessOpt/Perovskites/

/cephfs/volumes/hpc_home/k23070952/a774a32b-b8f8-42f8-b661-406a5dd49fd8/MultiFidelity-ProcessOpt/Perovskites


In [2]:
import torch

if torch.cuda.is_available():
    print("CUDA(GPU) 사용 가능합니다.")
    device = torch.device("cuda")
else:
    print("CUDA(GPU) 사용 불가, CPU 사용합니다.")
    device = torch.device("cpu")


CUDA(GPU) 사용 가능합니다.


In [3]:
#!/usr/bin/env python

import json
import pickle
import numpy as np
import pandas as pd
from copy import deepcopy

from olympus.datasets import Dataset
from olympus.objects import (
	ParameterContinuous,
	ParameterDiscrete, 
	ParameterCategorical, 
	ParameterVector
)
from olympus.campaigns import ParameterSpace, Campaign

from atlas.planners.multi_fidelity.planner import MultiFidelityPlanner



In [4]:
# config
dataset = Dataset(kind='perovskites')
NUM_RUNS = 2
# BUDGET = 30
COST_BUDGET = 50 # 200.
NUM_INIT_DESIGN = 10
NUM_CHEAP = 8

# lookup table
# organic --> cation --> anion --> bandgap_hse06/bandgap_gga
LOOKUP = pickle.load(open('0.Data/lookup_table.pkl', 'rb'))
# print(lookup.keys())
# print(lookup['Ethylammonium']['Ge']['F'].keys())



In [5]:


def measure_from_label(label_arr, s, label_maps, LOOKUP):
    """
    label_arr: [organic_label, cation_label, anion_label]
    s: 0.1 or 1.0 (fidelity)
    label_maps: {'organic': {...}, 'cation': {...}, 'anion': {...}}
    LOOKUP: 전체 lookup table
    """
    # 1. label_maps 역변환 사전 생성
    reverse_maps = {
        "organic": {v: k for k, v in label_maps["organic"].items()},
        "cation": {v: k for k, v in label_maps["cation"].items()},
        "anion": {v: k for k, v in label_maps["anion"].items()},
    }
    # 2. label에서 원래 카테고리명으로 변환
    organic = reverse_maps["organic"][int(label_arr[0])]
    cation = reverse_maps["cation"][int(label_arr[1])]
    anion = reverse_maps["anion"][int(label_arr[2])]

    # 3. 기존 measure 함수와 동일
    if s == 1.0:
        measurement = np.amin(
            LOOKUP[organic.capitalize()][cation][anion]['bandgap_hse06']
        )
    elif s == 0.1:
        measurement = np.amin(
            LOOKUP[organic.capitalize()][cation][anion]['bandgap_gga']
        )
    else:
        raise ValueError("s(fidelity)는 0.1 또는 1.0만 가능합니다.")
    return measurement

def get_min_hse06_bandgap(param_space):
	organic_options = [o.capitalize() for o in param_space[1].options]
	cation_options = [o.capitalize() for o in param_space[2].options]
	anion_options = [o.capitalize() for o in param_space[3].options]

	hse06_bandgaps = []
	for organic_option in organic_options:
		for cation_option in cation_options:
			for anion_option in anion_options:
				hse06_bandgaps.append(
					np.amin(
						LOOKUP[organic_option][cation_option][anion_option]['bandgap_hse06']
					)
				)
	min_hse06_bandgap = np.amin(hse06_bandgaps)
	return min_hse06_bandgap

def compute_cost(params):
	costs = params[:,0].astype(float)
	return np.sum(costs)



In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from copy import deepcopy

class TransferLearningDNN:
    def __init__(self, input_dim, hidden_dim=64, device='cuda'):
        self.model = self._build_model(input_dim, hidden_dim).to(device)
        self.input_dim = input_dim
        self.device = device
        self.pretrain_losses = []
        self.finetune_losses = []

    def _build_model(self, input_dim, hidden_dim):
        # 심플한 2층 DNN
        return nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1))

    def pretrain(self, X_low, y_low, epochs=50, lr=1e-3, verbose=False):
        # low-fidelity 데이터로 선학습
        self.model = self.model.float()
        self.pretrain_losses = []
        X_low, y_low = np.asarray(X_low), np.asarray(y_low).flatten()
        X_tensor = torch.tensor(X_low, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y_low, dtype=torch.float32).to(self.device)
        optimizer = optim.Adam(self.model.parameters(), lr=lr)
        loss_fn = nn.MSELoss()
        # print(X_low[:3])
        # print(X_low.dtype)
        # print(X_tensor.dtype)

        self.model.train()
        for epoch in range(epochs):
            optimizer.zero_grad()
            pred = self.model(X_tensor).squeeze()
            loss = loss_fn(pred, y_tensor)
            loss.backward()
            optimizer.step()
            self.pretrain_losses.append(loss.item())
            if verbose and (epoch+1) % 50 == 0:
                print(f'[Pretrain] Epoch {epoch+1}: Loss {loss.item():.4f}')

    def finetune(self, X_high, y_high, epochs=50, lr=1e-4, verbose=False):
        self.finetune_losses = []
        self.model = self.model.float()
        # high-fidelity 데이터로 파인튜닝
        X_high, y_high = np.asarray(X_high), np.asarray(y_high).flatten()
        X_tensor = torch.tensor(X_high, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y_high, dtype=torch.float32).to(self.device)
        optimizer = optim.Adam(self.model.parameters(), lr=lr)
        loss_fn = nn.MSELoss()
        self.model.train()
        for epoch in range(epochs):
            optimizer.zero_grad()
            pred = self.model(X_tensor).squeeze()
            loss = loss_fn(pred, y_tensor)
            loss.backward()
            optimizer.step()
            self.finetune_losses.append(loss.item())
            if verbose and (epoch+1) % 20 == 0:
                print(f'[Finetune] Epoch {epoch+1}: Loss {loss.item():.4f}')

    def predict(self, X):
        # (GA 등에서 호출) 입력 X에 대해 예측값 리턴
        # print('predict input shape:', X.shape)
        X = np.asarray(X)
        
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        self.model.eval()
        with torch.no_grad():
            # y_pred = self.model(X_tensor).cpu().numpy().flatten()
            y_pred = self.model(X_tensor).detach().cpu().numpy().flatten()
        return y_pred

    def get_fitness_func(self, y_best=None, xi=0.01):
        # pymoo 같은 GA에서 쓰기 위한 objective 함수 래퍼
        # 여기선 EI(예상 개선량) 형태로 예시 구현
        def fitness(x):
            mu = self.predict(np.array([x]))[0]
            if y_best is not None:
                return -(y_best - mu - xi)  # maximize EI → minimize -EI
            else:
                return mu  # 단순 예측값 최적화
        return fitness


In [7]:
param_space = {
    "organic": [
        "ethylammonium", "propylammonium", "butylammonium", "isopropylammonium",
        "dimethylammonium", "acetamidinium", "methylammonium", "guanidinium",
        "hydroxylammonium", "formamidinium", "tetramethylammonium", "hydrazinium",
        "ammonium", "trimethylammonium", "azetidinium", "imidazolium"
    ],
    "cation": ["Ge", "Sn", "Pb"],
    "anion": ["F", "Cl", "Br", "I"]
}
label_maps = {
    key: {val: idx for idx, val in enumerate(vals, 1)}   # 1부터 시작하고 싶으면 enumerate(..., 1)
    for key, vals in param_space.items()
}
label_maps

{'organic': {'ethylammonium': 1,
  'propylammonium': 2,
  'butylammonium': 3,
  'isopropylammonium': 4,
  'dimethylammonium': 5,
  'acetamidinium': 6,
  'methylammonium': 7,
  'guanidinium': 8,
  'hydroxylammonium': 9,
  'formamidinium': 10,
  'tetramethylammonium': 11,
  'hydrazinium': 12,
  'ammonium': 13,
  'trimethylammonium': 14,
  'azetidinium': 15,
  'imidazolium': 16},
 'cation': {'Ge': 1, 'Sn': 2, 'Pb': 3},
 'anion': {'F': 1, 'Cl': 2, 'Br': 3, 'I': 4}}

In [9]:
import numpy as np

def sample_param_space(param_space, n_samples, random_state=None):
    rng = np.random.default_rng(random_state)
    param_names = list(param_space.keys())
    samples = []
    for _ in range(n_samples):
        sample = {}
        for key, opts in param_space.items():
            if isinstance(opts, (list, tuple)) and len(opts) > 0 and isinstance(opts[0], (str, int)):  # 범주형
                sample[key] = rng.choice(opts)
            elif isinstance(opts, tuple) and len(opts) == 2 and all(isinstance(x, (int, float)) for x in opts):  # 연속형
                low, high = opts
                sample[key] = float(rng.uniform(low, high))
            else:
                raise ValueError(f"Unknown parameter type for {key}: {opts}")
        samples.append(sample)
    return samples


In [10]:
# NUM_INIT_DESIGN = 10  # 원하는 개수로 지정

# param_space는 이전 단계에서 정의한 딕셔너리 그대로 사용

init_samples = sample_param_space(param_space, NUM_INIT_DESIGN, random_state=42)
for s in init_samples:
    print(s)


{'organic': 'propylammonium', 'cation': 'Pb', 'anion': 'Br'}
{'organic': 'guanidinium', 'cation': 'Sn', 'anion': 'I'}
{'organic': 'propylammonium', 'cation': 'Pb', 'anion': 'F'}
{'organic': 'propylammonium', 'cation': 'Sn', 'anion': 'I'}
{'organic': 'hydrazinium', 'cation': 'Pb', 'anion': 'Br'}
{'organic': 'ammonium', 'cation': 'Sn', 'anion': 'F'}
{'organic': 'trimethylammonium', 'cation': 'Sn', 'anion': 'Br'}
{'organic': 'acetamidinium', 'cation': 'Ge', 'anion': 'I'}
{'organic': 'ammonium', 'cation': 'Sn', 'anion': 'Cl'}
{'organic': 'trimethylammonium', 'cation': 'Sn', 'anion': 'Cl'}


In [11]:
def assign_fidelities(n_samples, high_ratio=0.2, random_state=None):
    """
    전체 n_samples 중 high_ratio 만큼만 high-fidelity(1.0), 나머지는 low-fidelity(0.1)로 할당
    """
    rng = np.random.default_rng(random_state)
    n_high = max(1, int(round(n_samples * high_ratio)))
    n_low = n_samples - n_high
    fids = [1.0]*n_high + [0.1]*n_low
    rng.shuffle(fids)
    return fids

measurements = []
for params, s in zip(init_samples, assign_fidelities(NUM_INIT_DESIGN, high_ratio=0.2, random_state=42)):
    if s == 1.0:
        measurement = np.amin(
            LOOKUP[params['organic'].capitalize()][params['cation']][params['anion']]['bandgap_hse06']
        )
    else:
        measurement = np.amin(
            LOOKUP[params['organic'].capitalize()][params['cation']][params['anion']]['bandgap_gga']
        )
    # 관측값 저장
    measurements.append({"params": params, "s": s, "measurement": measurement})
measurements

[{'params': {'organic': 'propylammonium', 'cation': 'Pb', 'anion': 'Br'},
  's': 0.1,
  'measurement': 2.1145},
 {'params': {'organic': 'guanidinium', 'cation': 'Sn', 'anion': 'I'},
  's': 0.1,
  'measurement': 1.0856},
 {'params': {'organic': 'propylammonium', 'cation': 'Pb', 'anion': 'F'},
  's': 1.0,
  'measurement': 5.2155},
 {'params': {'organic': 'propylammonium', 'cation': 'Sn', 'anion': 'I'},
  's': 0.1,
  'measurement': 1.3516},
 {'params': {'organic': 'hydrazinium', 'cation': 'Pb', 'anion': 'Br'},
  's': 0.1,
  'measurement': 2.3859},
 {'params': {'organic': 'ammonium', 'cation': 'Sn', 'anion': 'F'},
  's': 0.1,
  'measurement': 3.8068},
 {'params': {'organic': 'trimethylammonium', 'cation': 'Sn', 'anion': 'Br'},
  's': 0.1,
  'measurement': 1.7405},
 {'params': {'organic': 'acetamidinium', 'cation': 'Ge', 'anion': 'I'},
  's': 0.1,
  'measurement': 1.6986},
 {'params': {'organic': 'ammonium', 'cation': 'Sn', 'anion': 'Cl'},
  's': 1.0,
  'measurement': 2.5818},
 {'params': {

In [47]:
len(measurements)

10

In [14]:

# 1. 라벨맵 생성
label_maps = {
    key: {val: idx for idx, val in enumerate(vals, 1)}   # 1부터 시작
    for key, vals in param_space.items()
}

# 2. df 변환 및 라벨 적용
df = pd.DataFrame([
    {**obs['params'], 's': obs['s'], 'measurement': obs['measurement']}
    for obs in measurements
])
for col in ['organic', 'cation', 'anion']:
    df[col + '_label'] = df[col].map(label_maps[col])

# 3. 모델 입력/출력 분리
ini_X = df[['organic_label', 'cation_label', 'anion_label', 's']].values
ini_y = df['measurement'].values

print(ini_X[:5])
print(ini_y[:5])

# s 값에 따라 분할
ini_X_low = ini_X[df['s'] == 0.1]
ini_y_low = ini_y[df['s'] == 0.1]

ini_X_high = ini_X[df['s'] == 1.0]
ini_y_high = ini_y[df['s'] == 1.0]


[[ 2.   3.   3.   0.1]
 [ 8.   2.   4.   0.1]
 [ 2.   3.   1.   1. ]
 [ 2.   2.   4.   0.1]
 [12.   3.   3.   0.1]]
[2.1145 1.0856 5.2155 1.3516 2.3859]


In [18]:

# ini_X_low, ini_X_high에서 s를 제외한 3개 컬럼만 남기기
ini_X_low = ini_X_low[:, :3]   # (N_low, 3)
ini_X_high = ini_X_high[:, :3] # (N_high, 3)

# 모델 학습
model = TransferLearningDNN(input_dim=ini_X_low.shape[1], hidden_dim=64, device='cpu')

if len(ini_X_low) > 0:
    model.pretrain(ini_X_low, ini_y_low, epochs=100, lr=1e-3, verbose=True)

if len(ini_X_high) > 0:
    model.finetune(ini_X_high, ini_y_high, epochs=50, lr=1e-3, verbose=True)


[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
[Pretrain] Epoch 50: Loss 0.4414
[Pretrain] Epoch 100: Loss 0.3513
[Finetune] Epoch 20: Loss 1.1981
[Finetune] Epoch 40: Loss 0.0249


In [19]:
from pymoo.core.problem import Problem
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.optimize import minimize
import numpy as np

# 1. 최적화 대상 DNN을 objective로 쓰는 Problem 정의
class DNNProblem(Problem):
    def __init__(self, dnn, param_bounds):
        super().__init__(n_var=len(param_bounds), n_obj=1, xl=[b[0] for b in param_bounds], xu=[b[1] for b in param_bounds])
        self.dnn = dnn

    def _evaluate(self, X, out, *args, **kwargs):
        # DNN의 predict에 여러 샘플 X를 넣으면 shape=(n_pop, n_dim)
        y_pred = self.dnn.predict(X)
        out["F"] = y_pred

# # 2. 입력 범위 정의 (각 feature별 min/max)
# # (예: [organic_label, cation_label, anion_label, s])
# param_bounds = [
#     [1, 16],   # organic_label
#     [1, 3],    # cation_label
#     [1, 4],    # anion_label
# ]

# problem = DNNProblem(model, param_bounds)
# algorithm = GA(pop_size=32)

# # 3. GA로 최적화 실행
# res = minimize(
#     problem,
#     algorithm,
#     ('n_gen', 40), # 세대수/반복수, 늘릴수록 탐색 강함
#     verbose=True
# )

# # 4. 최적화된 추천점
# next_x = res.X
# print("추천점(입력):", next_x)
# print("모델 예측값:", model.predict([next_x]))

# # 실험적 추천을 위한 s(fidelity) 값도 next_x[-1]로 구분 가능
# # 정수 label로 변환 (round)
# next_x_label = [int(round(val)) for val in next_x]
# print("정수 label 추천점:", next_x_label)

In [20]:
def append_measurement_to_data(existing_X_low, existing_y_low,
                               existing_X_high, existing_y_high,
                               label_arr, s, label_maps, LOOKUP):
    """
    label_arr: [organic_label, cation_label, anion_label]
    s: 0.1 or 1.0 (fidelity)
    기존 데이터에 label_arr을 s에 따라 붙여주는 함수
    """
    # 1. 측정값 구하기
    measurement = measure_from_label(label_arr, s, label_maps, LOOKUP)
    
    # 2. 데이터 추가 (s는 X에 넣지 않으므로 3개만!)
    label_arr = np.array(label_arr, dtype=np.float32).reshape(1, -1)
    measurement = np.array([measurement], dtype=np.float32)

    if s == 0.1:
        existing_X_low = np.vstack([existing_X_low, label_arr]) if existing_X_low.size else label_arr
        existing_y_low = np.concatenate([existing_y_low, measurement]) if existing_y_low.size else measurement
    elif s == 1.0:
        existing_X_high = np.vstack([existing_X_high, label_arr]) if existing_X_high.size else label_arr
        existing_y_high = np.concatenate([existing_y_high, measurement]) if existing_y_high.size else measurement
    else:
        raise ValueError("s(fidelity)는 0.1 또는 1.0만 가능합니다.")

    return existing_X_low, existing_y_low, existing_X_high, existing_y_high

# # 추천 label이 [2, 3, 3], s=1.0이라고 가정
# ini_X_low, ini_y_low, ini_X_high, ini_y_high = append_measurement_to_data(
#     ini_X_low, ini_y_low, ini_X_high, ini_y_high,
#     next_x_label, 1.0, label_maps, LOOKUP
# )

In [21]:
import time

### 1. 필수 함수 및 설정
# (여기 measure_from_label, append_measurement_to_data 등 위에서 만든 함수 모두 선언)

### 2. 초기 데이터 생성
NUM_INIT_DESIGN = 10
init_samples = sample_param_space(param_space, NUM_INIT_DESIGN, random_state=42)
init_fids = assign_fidelities(NUM_INIT_DESIGN, high_ratio=0.2, random_state=42)

measurements = []
for params, s in zip(init_samples, init_fids):
    measurement = measure_from_label(
        [label_maps['organic'][params['organic']],
         label_maps['cation'][params['cation']],
         label_maps['anion'][params['anion']]],
        s, label_maps, LOOKUP
    )
    measurements.append({"params": params, "s": s, "measurement": measurement})

# 데이터프레임 변환 및 라벨 적용
df = pd.DataFrame([
    {**obs['params'], 's': obs['s'], 'measurement': obs['measurement']}
    for obs in measurements
])
for col in ['organic', 'cation', 'anion']:
    df[col + '_label'] = df[col].map(label_maps[col])

ini_X = df[['organic_label', 'cation_label', 'anion_label', 's']].values
ini_y = df['measurement'].values

# s 값에 따라 분할 (X에서 s를 제거)
ini_X_low = ini_X[df['s'] == 0.1][:, :3]
ini_y_low = ini_y[df['s'] == 0.1]
ini_X_high = ini_X[df['s'] == 1.0][:, :3]
ini_y_high = ini_y[df['s'] == 1.0]

### 3. 루프 시작
timing_data = []
N_ITER = 10 # 원하는 반복 횟수
for iter_ in range(N_ITER):
    print(f"\n==== Iteration {iter_+1} ====")
    iter_start = time.time()   # ★★★ 타이밍 시작

    # (1) 모델 학습/업데이트
    model = TransferLearningDNN(input_dim=3, hidden_dim=64, device='cuda')
    if len(ini_X_low) > 0:
        model.pretrain(ini_X_low, ini_y_low, epochs=100, lr=1e-3, verbose=False)
    if len(ini_X_high) > 0:
        model.finetune(ini_X_high, ini_y_high, epochs=50, lr=1e-3, verbose=False)

    # (2) GA 추천점 최적화
    param_bounds = [
        [1, 16],   # organic_label
        [1, 3],    # cation_label
        [1, 4],    # anion_label
    ]
    problem = DNNProblem(model, param_bounds)
    algorithm = GA(pop_size=800)  # population 800명
    res = minimize(problem, algorithm, ('n_gen', 1000), verbose=False)  # 1000세대

    next_x = res.X
    next_x_label = [int(round(val)) for val in next_x]
    print("추천 label:", next_x_label)

    # (3) Fidelity 할당 (예시: 8:1 비율로 선택)
    s = 0.1 if (iter_ % 9 != 0) else 1.0

    # (4) 실제 실험값(측정값) 얻기 및 데이터에 추가
    ini_X_low, ini_y_low, ini_X_high, ini_y_high = append_measurement_to_data(
        ini_X_low, ini_y_low, ini_X_high, ini_y_high,
        next_x_label, s, label_maps, LOOKUP
    )
    
    print(f"측정값: {measure_from_label(next_x_label, s, label_maps, LOOKUP):.4f} (fidelity={s})")

    iter_end = time.time()   # ★★★ 타이밍 끝
    time_taken = iter_end - iter_start
    print(f"Iteration {iter_+1} 소요 시간: {time_taken:.2f} 초")
    timing_data.append([0, iter_+1, time_taken])  # 0은 run_ix (실험 반복 번호)

# 전체 타이밍 데이터 저장
timing_df = pd.DataFrame(timing_data, columns=['run_ix', 'iter', 'time_taken'])
timing_df.to_csv('TL_timing_results.csv', index=False)



==== Iteration 1 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [7, 1, 1]
측정값: 4.7182 (fidelity=1.0)
Iteration 1 소요 시간: 52.82 초

==== Iteration 2 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [8, 1, 3]
측정값: 2.0910 (fidelity=0.1)
Iteration 2 소요 시간: 50.61 초

==== Iteration 3 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [3, 1, 1]
측정값: 4.3305 (fidelity=0.1)
Iteration 3 소요 시간: 51.03 초

==== Iteration 4 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [3, 1, 1]
측정값: 4.3305 (fidelity=0.1)
Iteration 4 소요 시간: 51.72 초

==== Iteration 5 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [9, 1, 1]
측정값: 4.1513 (fidelity=0.1)
Iteration 5 소요 시간: 51.77 초

==== Iteration 6 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
float64
torch.float32
추천 label: [8, 1, 1]
측정값: 4.2847 (fidelity=0.1)
Iteration 6 소요 시간: 51.64 초

==== Iteration 7 ====
[[2. 3. 3.]
 [8. 2. 4.]
 [2. 2. 4.]]
floa

In [50]:
measurements = []
for params, s in zip(init_samples, init_fids):
    measurement = measure_from_label(
        [label_maps['organic'][params['organic']],
         label_maps['cation'][params['cation']],
         label_maps['anion'][params['anion']]],
        s, label_maps, LOOKUP
    )
    measurements.append({"params": params, "s": s, "measurement": measurement})

# 데이터프레임 변환 및 라벨 적용
df = pd.DataFrame([
    {**obs['params'], 's': obs['s'], 'measurement': obs['measurement']}
    for obs in measurements
])
for col in ['organic', 'cation', 'anion']:
    df[col + '_label'] = df[col].map(label_maps[col])

ini_X = df[['organic_label', 'cation_label', 'anion_label', 's']].values
ini_y = df['measurement'].values

# s 값에 따라 분할 (X에서 s를 제거)
ini_X_low = ini_X[df['s'] == 0.1][:, :3]
ini_y_low = ini_y[df['s'] == 0.1]
ini_X_high = ini_X[df['s'] == 1.0][:, :3]
ini_y_high = ini_y[df['s'] == 1.0]

timing_data = []
cost_data = []
best_so_far_curve = []
total_cost = 0.0
best_so_far = np.inf
min_hse06_bandgap = 1.5249
COST_BUDGET = 50
N_ITER = 10 # 원하는 반복 횟수
iter_ = 0
all_results = []
while total_cost < COST_BUDGET:
# for iter_ in range(N_ITER):
    iter_ += 1
    print(f"\n==== Iteration {iter_} ====")
    iter_start = time.time()   # ★★★ 타이밍 시작
    print(len())
    # (1) 모델 학습/업데이트
    model = TransferLearningDNN(input_dim=3, hidden_dim=64, device='cpu')
    if len(ini_X_low) > 0:
        model.pretrain(ini_X_low, ini_y_low, epochs=100, lr=1e-3, verbose=False)
    if len(ini_X_high) > 0:
        model.finetune(ini_X_high, ini_y_high, epochs=50, lr=1e-3, verbose=False)

    # X_full shape: (N, 3)
    y_pred_full = model.predict(X_full)
    result_row = {
        "iter": iter_,
        "ini_X_low": ini_X_low.tolist(),
        "ini_y_low": ini_y_low.tolist(),
        "ini_X_high": ini_X_high.tolist(),
        "ini_y_high": ini_y_high.tolist(),
        "y_pred_full": y_pred_full.tolist()
    }
    all_results.append(result_row)

    # (2) GA 추천점 최적화
    param_bounds = [
        [1, 16],   # organic_label
        [1, 3],    # cation_label
        [1, 4],    # anion_label
    ]
    problem = DNNProblem(model, param_bounds)
    algorithm = GA(pop_size=80)
    res = minimize(problem, algorithm, ('n_gen', 100), verbose=False)
    next_x = res.X
    next_x_label = [int(round(val)) for val in next_x]
    print("추천 label:", next_x_label)

    # (3) Fidelity 할당 (예시: 8:1 비율로 선택)
    s = 0.1 if (iter_ % 8 != 0) else 1.0
    print(s)
    # (4) 실제 실험값(측정값) 얻기 및 데이터에 추가
    measurement = measure_from_label(next_x_label, s, label_maps, LOOKUP)
    ini_X_low, ini_y_low, ini_X_high, ini_y_high = append_measurement_to_data(
        ini_X_low, ini_y_low, ini_X_high, ini_y_high,
        next_x_label, s, label_maps, LOOKUP
    )
    
    print(f"측정값: {measurement:.4f} (fidelity={s})")

    iter_end = time.time()   # ★★★ 타이밍 끝
    time_taken = iter_end - iter_start
    
    timing_data.append([0, iter_, time_taken])  # 0은 run_ix (실험 반복 번호)

    # ★ 누적 실험 비용 기록
    total_cost += s
    cost_data.append([0, iter_, total_cost])  # run_ix, iter, cumulative cost
    print(f"Iteration {iter_} 소요 시간: {time_taken:.2f} 초 total_cost: {total_cost}" )
    # ★ best-so-far (regret curve) 기록
    if s == 1:
        if measurement < best_so_far:
            best_so_far = measurement
    best_so_far_curve.append([0, iter_, s,  best_so_far])  # run_ix, iter, best-so-far

    if s == 1.0 and np.isclose(measurement, min_hse06_bandgap, atol=1e-6):
        print('found the min hse06 bandgap!')
        break

    if iter_>10:
        break


# 전체 타이밍 데이터 저장
timing_df = pd.DataFrame(timing_data, columns=['run_ix', 'iter', 'time_taken'])
timing_df.to_csv('TL_timing_results.csv', index=False)

# 누적 실험비용 저장
cost_df = pd.DataFrame(cost_data, columns=['run_ix', 'iter', 'cumulative_cost'])
cost_df.to_csv('TL_cumulative_cost.csv', index=False)

# best-so-far curve 저장 (regret curve)
best_so_far_df = pd.DataFrame(best_so_far_curve, columns=['run_ix', 'iter', 's', 'best_so_far'])
best_so_far_df.to_csv('TL_best_so_far_curve.csv', index=False)

df_results = pd.DataFrame(all_results)
df_results.to_csv("TL_all_iter_results.csv", index=False)


==== Iteration 1 ====


TypeError: len() takes exactly one argument (0 given)

In [38]:
import itertools
import numpy as np

# 파라미터 스페이스 정의
organics = param_space['organic']
cations = param_space['cation']
anions = param_space['anion']

all_combinations = list(itertools.product(organics, cations, anions))
print("총 경우의 수:", len(all_combinations))
X_full = []
for o, c, a in all_combinations:
    o_label = label_maps['organic'][o]
    c_label = label_maps['cation'][c]
    a_label = label_maps['anion'][a]
    X_full.append([o_label, c_label, a_label])
X_full = np.array(X_full, dtype=np.float32)
X_full


총 경우의 수: 192


array([[ 1.,  1.,  1.],
       [ 1.,  1.,  2.],
       [ 1.,  1.,  3.],
       [ 1.,  1.,  4.],
       [ 1.,  2.,  1.],
       [ 1.,  2.,  2.],
       [ 1.,  2.,  3.],
       [ 1.,  2.,  4.],
       [ 1.,  3.,  1.],
       [ 1.,  3.,  2.],
       [ 1.,  3.,  3.],
       [ 1.,  3.,  4.],
       [ 2.,  1.,  1.],
       [ 2.,  1.,  2.],
       [ 2.,  1.,  3.],
       [ 2.,  1.,  4.],
       [ 2.,  2.,  1.],
       [ 2.,  2.,  2.],
       [ 2.,  2.,  3.],
       [ 2.,  2.,  4.],
       [ 2.,  3.,  1.],
       [ 2.,  3.,  2.],
       [ 2.,  3.,  3.],
       [ 2.,  3.,  4.],
       [ 3.,  1.,  1.],
       [ 3.,  1.,  2.],
       [ 3.,  1.,  3.],
       [ 3.,  1.,  4.],
       [ 3.,  2.,  1.],
       [ 3.,  2.,  2.],
       [ 3.,  2.,  3.],
       [ 3.,  2.,  4.],
       [ 3.,  3.,  1.],
       [ 3.,  3.,  2.],
       [ 3.,  3.,  3.],
       [ 3.,  3.,  4.],
       [ 4.,  1.,  1.],
       [ 4.,  1.,  2.],
       [ 4.,  1.,  3.],
       [ 4.,  1.,  4.],
       [ 4.,  2.,  1.],
       [ 4.,  2.

In [43]:
reverse_label_maps = {
    key: {v: k for k, v in value.items()}
    for key, value in label_maps.items()
}
import pandas as pd

rows = []
for row in X_full:
    o_label, c_label, a_label = [int(x) for x in row]
    organic = reverse_label_maps['organic'][o_label]
    cation = reverse_label_maps['cation'][c_label]
    anion = reverse_label_maps['anion'][a_label]
    rows.append({
        'organic': organic,
        'cation': cation,
        'anion': anion,
        'organic_label': o_label,
        'cation_label': c_label,
        'anion_label': a_label
    })
df_X_full = pd.DataFrame(rows)
df_X_full.to_csv("X_full_with_labels.csv", index=False)
