In [1]:
import numpy as np
from sklearn.datasets import fetch_california_housing
import os
from sklearn.model_selection import KFold, train_test_split

In [2]:
# Load the dataset
housing_data = fetch_california_housing()

# Display the keys of the dataset
print(housing_data.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])


In [3]:
# Assign data to x and target to label
X = housing_data['data']
y = housing_data['target']
print(X.shape, y.shape)

(20640, 8) (20640,)


## 交差検証を使う場合のデータの保存
1. cfg.data.dir_nameで指定しているディレクトリ上にfoldごとにデータを保存する
2. input（特徴量）は `fold_{i}_X.npy` と保存する。(iはfold番号)
3. target（ラベル・正解値）は　`fold_{i}_y.npy` と保存する。

In [4]:
save_dirs = "./features/raw"

# Create the directory if it doesn't exist
os.makedirs(save_dirs, exist_ok=True)

# Number of folds
num_folds = 5

# Create KFold object
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform the split and save the folds
for i, (train_index, test_index) in enumerate(kf.split(X)):
    fold_X = X[test_index]
    fold_y = y[test_index]
    
    # Save the fold data and labels
    np.save(f"{save_dirs}/fold_{i}_X.npy", fold_X)
    np.save(f"{save_dirs}/fold_{i}_y.npy", fold_y)


## 交差検証を使わない場合（train_test か train_valid か train_valid_test）のデータの保存
1. cfg.data.dir_nameで指定しているディレクトリ上にセットごとにデータを保存する
2. input（特徴量）は `{mode}_X.npy` と保存する。(modeは{train or valid or test})
3. target（ラベル・正解値）は　`{mode}_y.npy` と保存する。
4. 下の例はtrain_valid_testに分割

In [5]:
save_dirs = "./features/raw"

# Split the data into train, valid, and test sets (e.g., 60% train, 20% valid, 20% test)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Save each set as specified
np.save(f"{save_dirs}/train_X.npy", X_train)
np.save(f"{save_dirs}/train_y.npy", y_train)
np.save(f"{save_dirs}/valid_X.npy", X_valid)
np.save(f"{save_dirs}/valid_y.npy", y_valid)
np.save(f"{save_dirs}/test_X.npy", X_test)
np.save(f"{save_dirs}/test_y.npy", y_test)