In [9]:
import os, json, random
import numpy as np
import torch

SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

ROOT = "."  # Colabなら基本これでOK（ノート基準）
ARTIFACT_DIR = os.path.join(ROOT, "artifacts")
IMG_DIR      = os.path.join(ROOT, "images")
LOG_DIR      = os.path.join(ROOT, "logs")
os.makedirs(ARTIFACT_DIR, exist_ok=True)
os.makedirs(IMG_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib, os, json

In [11]:
DATA_PATH = "/content/sample_data.csv"

In [12]:
df = pd.read_csv(DATA_PATH, encoding="utf-8-sig")

In [13]:
target_cols = ["収縮期血圧", "拡張期血圧", "脈拍"]
t = df[target_cols].copy()
x = df.drop(target_cols, axis=1).copy()

In [14]:
feature_names = list(x.columns)
with open(os.path.join(ARTIFACT_DIR, "feature_names.json"), "w", encoding="utf-8") as f:
    json.dump(feature_names, f, ensure_ascii=False, indent=2)

In [15]:
# split (test 20%, val 20% of remaining => val=0.2 overall)
x_train_full, x_test, t_train_full, t_test = train_test_split(
    x, t, test_size=0.2, random_state=SEED
)
x_train, x_val, t_train, t_val = train_test_split(
    x_train_full, t_train_full, test_size=0.25, random_state=SEED
)

In [16]:
# scaler: fit ONLY on train
scaler_x = StandardScaler().fit(x_train)
scaler_t = StandardScaler().fit(t_train)

X_train = scaler_x.transform(x_train).astype("float32")
X_val   = scaler_x.transform(x_val).astype("float32")
X_test  = scaler_x.transform(x_test).astype("float32")

T_train = scaler_t.transform(t_train).astype("float32")
T_val   = scaler_t.transform(t_val).astype("float32")
T_test  = scaler_t.transform(t_test).astype("float32")

np.save(os.path.join(ARTIFACT_DIR, "X_train.npy"), X_train)
np.save(os.path.join(ARTIFACT_DIR, "X_val.npy"),   X_val)
np.save(os.path.join(ARTIFACT_DIR, "X_test.npy"),  X_test)

np.save(os.path.join(ARTIFACT_DIR, "T_train.npy"), T_train)
np.save(os.path.join(ARTIFACT_DIR, "T_val.npy"),   T_val)
np.save(os.path.join(ARTIFACT_DIR, "T_test.npy"),  T_test)

joblib.dump(scaler_x, os.path.join(ARTIFACT_DIR, "scaler_x.pkl"))
joblib.dump(scaler_t, os.path.join(ARTIFACT_DIR, "scaler_t.pkl"))

print("Train:", X_train.shape, T_train.shape)
print("Val:  ", X_val.shape,   T_val.shape)
print("Test: ", X_test.shape,  T_test.shape)

Train: (285, 10) (285, 3)
Val:   (95, 10) (95, 3)
Test:  (95, 10) (95, 3)
