In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [21]:
import pandas as pd
import numpy as np

In [13]:
path = "/content/drive/MyDrive/Colab Notebooks/WISDM.csv"

df = pd.read_csv(path, sep=';')

In [54]:
print(df.shape)
print(df.columns.tolist())
print(df.head(5))

(1048575, 5)
['activity', 'x', 'y', 'z', 'label_id']
  activity         x          y         z  label_id
0  Jogging -0.694638  12.680544  0.503953         1
1  Jogging  5.012288  11.264028  0.953424         1
2  Jogging  4.903325  10.882658 -0.081722         1
3  Jogging -0.612916  18.496431  3.023717         1
4  Jogging -1.184970  12.108489  7.205164         1


In [55]:
names = ["idx", "user", "activity", "timestamp", "x", "y", "z"]
df = pd.read_csv(
    path,
    header=None,
    names=names,
    sep=r"[;,]",
    engine="python",
    skip_blank_lines=True
)

for c in ["x","y","z"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
df["activity"] = df["activity"].astype(str).str.strip()
df = df.dropna(subset=["x","y","z","activity"]).reset_index(drop=True)

In [56]:
df = df.drop(columns=["user","timestamp","idx"], errors="ignore")

print("=== cleaned shape ===", df.shape)
print(df.head(3))
print("\nlabel counts:")
print(df["activity"].value_counts())

=== cleaned shape === (1048575, 4)
  activity         x          y         z
0  Jogging -0.694638  12.680544  0.503953
1  Jogging  5.012288  11.264028  0.953424
2  Jogging  4.903325  10.882658 -0.081722

label counts:
activity
Walking       406775
Jogging       325975
Upstairs      118589
Downstairs     97813
Sitting        54876
Standing       44547
Name: count, dtype: int64


In [70]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df["label_id"] = le.fit_transform(df["activity"])
id2name = {i:n for i,n in enumerate(le.classes_)}

NORMAL_NAME  = "Sitting"
ANOMALY_NAME = "Jogging"

if NORMAL_NAME not in le.classes_ or ANOMALY_NAME not in le.classes_:
    raise ValueError(f"'{NORMAL_NAME}' or '{ANOMALY_NAME}' no label. real={list(le.classes_)}")

normal_id  = le.transform([NORMAL_NAME])[0]
anomaly_id = le.transform([ANOMALY_NAME])[0]

In [71]:
df_train = df[df["label_id"] == normal_id].copy()
df_test  = df[df["label_id"].isin([normal_id, anomaly_id])].copy()

X_train = df_train[["x","y","z"]].to_numpy(dtype=np.float32)
X_test  = df_test[["x","y","z"]].to_numpy(dtype=np.float32)
y_test  = (df_test["label_id"].to_numpy() == anomaly_id).astype(int)  # 비정상=1

In [72]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

In [60]:
import numpy as np

def make_windows(X, y, win=128, stride=64):
    xs, ys = [], []
    for s in range(0, len(X) - win + 1, stride):
        xs.append(X[s:s+win].reshape(-1))
        ys.append(y[s + win - 1])
    return np.stack(xs), np.array(ys)

In [74]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
in_dim = 3

class AE(nn.Module):
    def __init__(self, in_dim, h1=32, h2=8):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(in_dim, h1), nn.ReLU(),
            nn.Linear(h1, h2)
        )
        self.dec = nn.Sequential(
            nn.Linear(h2, h1), nn.ReLU(),
            nn.Linear(h1, in_dim)
        )
    def forward(self, x):
        z  = self.enc(x)
        xh = self.dec(z)
        return xh, z

In [76]:
model = AE(in_dim, h1=32, h2=8).to(device)
crit  = nn.L1Loss()
opt   = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

BATCH, EPOCHS = 1024, 12

train_ds = TensorDataset(torch.from_numpy(X_train_s.astype(np.float32)))
train_ld = DataLoader(train_ds, batch_size=BATCH, shuffle=True, drop_last=True)

model.train()
for ep in range(1, EPOCHS+1):
    tot = 0.0
    for (xb,) in train_ld:
        xb = xb.to(device)
        xh, _ = model(xb)
        loss = crit(xh, xb)
        opt.zero_grad(); loss.backward(); opt.step()
        tot += loss.item() * xb.size(0)
    print(f"[{ep:02d}/{EPOCHS}] MAE={tot/len(train_ds):.6f}")


[01/12] MAE=0.679673
[02/12] MAE=0.409795
[03/12] MAE=0.180321
[04/12] MAE=0.041293
[05/12] MAE=0.013345
[06/12] MAE=0.009309
[07/12] MAE=0.007451
[08/12] MAE=0.006436
[09/12] MAE=0.005720
[10/12] MAE=0.006134
[11/12] MAE=0.005142
[12/12] MAE=0.004738


In [78]:
model.eval()
with torch.no_grad():
    XN = torch.from_numpy(X_train_s.astype(np.float32)).to(device)
    XN_hat,_ = model(XN)
    recon_train = (XN_hat - XN).abs().mean(dim=1).cpu().numpy()

    XT = torch.from_numpy(X_test_s.astype(np.float32)).to(device)
    XH,_ = model(XT)
    recon_test = (XH - XT).abs().mean(dim=1).cpu().numpy()

th = np.percentile(recon_train, 95)  # 정상(Sitting) 분포 95퍼 기준
y_pred = (recon_test > th).astype(int)

print("threshold:", th)
print(classification_report(y_test, y_pred, target_names=[f"normal({NORMAL_NAME})", f"anomaly({ANOMALY_NAME})"]))
print("AUROC(두개 클래스 분리):", roc_auc_score(y_test, recon_test))

threshold: 0.0113174515
                  precision    recall  f1-score   support

 normal(Sitting)       0.96      0.95      0.95     54876
anomaly(Jogging)       0.99      0.99      0.99    325975

        accuracy                           0.99    380851
       macro avg       0.98      0.97      0.97    380851
    weighted avg       0.99      0.99      0.99    380851

AUROC(두개 클래스 분리): 0.9969057507008208
