# SafeStep

## 1. 라이브러리 임포트

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# 경로 설정
TRAIN_CSV = 'data/train/train.csv'
TEST_CSV = 'data/test/test.csv'
MODEL_PATH = 'models/sensor_model.h5'
SCALER_PATH = 'models/scaler.pkl'
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)


## 2. 데이터 로드 및 전처리

In [None]:
def load_and_preprocess(train_csv: str, test_csv: str, scaler_path: str = SCALER_PATH):
    """
    CSV 파일로부터 데이터를 로드하고 전처리(표준화)하여 반환합니다.

    Returns:
        X_train, y_train, X_test, y_test, scaler
    """
    # 데이터 로드
    df_train = pd.read_csv(train_csv)
    df_test = pd.read_csv(test_csv)

    # 피처와 레이블 분리
    X_train = df_train.iloc[:, :-1].values
    y_train = df_train.iloc[:, -1].values
    X_test = df_test.iloc[:, :-1].values
    y_test = df_test.iloc[:, -1].values

    # 표준화 수행
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # 스케일러 저장
    import pickle
    with open(scaler_path, 'wb') as f:
        pickle.dump(scaler, f)

    return X_train, y_train, X_test, y_test, scaler


## 3. 모델 학습

In [None]:
def train_model(
    X_train: np.ndarray,
    y_train: np.ndarray,
    model_path: str = MODEL_PATH,
    epochs: int = 20,
    batch_size: int = 32
) -> tf.keras.Model:
    """
    전처리된 데이터를 이용해 모델을 학습하고 저장합니다.
    Returns: 학습된 모델
    """
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(len(np.unique(y_train)), activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
    model.save(model_path)
    print(f"Model saved to {model_path}")
    return model


## 4. 모델 로드 및 예측

In [None]:
def load_model_from_path(model_path: str = MODEL_PATH) -> tf.keras.Model:
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found: {model_path}")
    return tf.keras.models.load_model(model_path)

def predict_step(model: tf.keras.Model, data_list: list[float], scaler: StandardScaler) -> int:
    arr = np.array(data_list, dtype=float).reshape(1, -1)
    arr = scaler.transform(arr)
    preds = model.predict(arr)
    return int(np.argmax(preds, axis=1)[0])


## 5. 학습·예측·평가

In [None]:
if __name__ == '__main__':
    # 데이터 로드 및 전처리
    X_train, y_train, X_test, y_test, scaler = load_and_preprocess(TRAIN_CSV, TEST_CSV)
    # 모델 학습
    model = train_model(X_train, y_train, epochs=10, batch_size=16)
    # 테스트 데이터 예측
    preds = []
    for xi in X_test:
        preds.append(predict_step(model, xi.tolist(), scaler))

    # 평가 지표 출력
    acc = accuracy_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    print(f'Accuracy: {acc:.4f}')
    print('Confusion Matrix:\n', cm)
