In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 데이터 로딩 및 전처리
data_dir = "C:/Users/wdd45/OneDrive/바탕 화면/딥러닝응용/human/UCI HAR Dataset/UCI HAR Dataset"

In [2]:
def load_HAR_data(data_dir):
    train_signals, train_labels, test_signals, test_labels = [], [], [], []
    
    for signal_type in ["train", "test"]:
        for signal in ["Inertial Signals/total_acc_x_", "Inertial Signals/body_acc_x_", "Inertial Signals/body_gyro_x_"]:
            filename = f"{data_dir}/{signal_type}/{signal}{signal_type}.txt"
            data = pd.read_csv(filename, delim_whitespace=True, header=None).values
            if "train" in signal_type:
                train_signals.append(data)
            else:
                test_signals.append(data)

        labels_filename = f"{data_dir}/{signal_type}/y_{signal_type}.txt"
        labels = pd.read_csv(labels_filename, header=None, names=["Activity"]).values
        if "train" in signal_type:
            train_labels.append(labels)
        else:
            test_labels.append(labels)

    train_signals = np.transpose(np.array(train_signals), (1, 2, 0))
    train_labels = train_labels[0]
    test_signals = np.transpose(np.array(test_signals), (1, 2, 0))
    test_labels = test_labels[0]

    train_labels = train_labels - 1
    test_labels = test_labels - 1

    train_signals = (train_signals - np.mean(train_signals)) / np.std(train_signals)
    test_signals = (test_signals - np.mean(test_signals)) / np.std(test_signals)

    return train_signals, train_labels, test_signals, test_labels

train_signals, train_labels, test_signals, test_labels = load_HAR_data(data_dir)

In [8]:
# 데이터를 훈련 및 검증 세트로 분할
X_train, X_val, y_train, y_val = train_test_split(train_signals, train_labels, test_size=0.2, random_state=42)


In [9]:
# 3D 배열을 2D 배열로 변환
X_train_2d = X_train.reshape(X_train.shape[0], -1)
test_signals_2d = test_signals.reshape(test_signals.shape[0], -1)

In [11]:
# 랜덤 포레스트 모델
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_2d, y_train)
y_pred_rf = rf_model.predict(test_signals_2d)
test_accuracy_rf = accuracy_score(test_labels, y_pred_rf)
print(f"Random Forest Test accuracy: {test_accuracy_rf * 100:.2f}%")

  This is separate from the ipykernel package so we can avoid doing imports until


Random Forest Test accuracy: 85.61%


In [12]:
# 로지스틱 회귀 모델
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_lr = lr_model.predict(test_signals.reshape(test_signals.shape[0], -1))
test_accuracy_lr = accuracy_score(test_labels, y_pred_lr)
print(f"Logistic Regression Test accuracy: {test_accuracy_lr * 100:.2f}%")

  y = column_or_1d(y, warn=True)


Logistic Regression Test accuracy: 51.03%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [13]:
# 의사결정 트리 모델
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_dt = dt_model.predict(test_signals.reshape(test_signals.shape[0], -1))
test_accuracy_dt = accuracy_score(test_labels, y_pred_dt)
print(f"Decision Tree Test accuracy: {test_accuracy_dt * 100:.2f}%")

Decision Tree Test accuracy: 69.53%


In [14]:
# 서포트 벡터 머신 모델
svm_model = SVC(random_state=42)
svm_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_svm = svm_model.predict(test_signals.reshape(test_signals.shape[0], -1))
test_accuracy_svm = accuracy_score(test_labels, y_pred_svm)
print(f"SVM Test accuracy: {test_accuracy_svm * 100:.2f}%")

  y = column_or_1d(y, warn=True)


SVM Test accuracy: 79.64%
