In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
DATA_DIR = "/content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET"

In [3]:
def subject_file_name(subject_id):
    return DATA_DIR + "/mHealth_subject" + str(subject_id) + ".log"

def parse_line(line):
    line = line.strip()
    if line == "":
        return None, None #빈줄 무시
    parts = line.split()
    label_str = parts[-1]
    feature_strs = parts[:-1]

    features = []
    for v in feature_strs:
        if v == "":
            continue
        features.append(float(v))

    label = int(label_str)

    return features, label

In [4]:
X = []
y = []
subjects = []

for sid in range(1, 11):
    file_path = subject_file_name(sid)
    print("Reading subject", sid, "from", file_path)

    try:
        f = open(file_path, "r")
    except:
        print("err")
        continue

    line_count = 0
    sample_count = 0

    while True:
        line = f.readline()
        if not line:
            break

        line_count += 1
        features, label = parse_line(line)

        if features is None:
            continue

        X.append(features)
        y.append(label)
        subjects.append(sid)
        sample_count += 1

    f.close()
    print("  lines:", line_count, " -> samples:", sample_count)


Reading subject 1 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject1.log
  lines: 161280  -> samples: 161280
Reading subject 2 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject2.log
  lines: 130561  -> samples: 130561
Reading subject 3 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject3.log
  lines: 122112  -> samples: 122112
Reading subject 4 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject4.log
  lines: 116736  -> samples: 116736
Reading subject 5 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject5.log
  lines: 119808  -> samples: 119808
Reading subject 6 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject6.log
  lines: 98304  -> samples: 98304
Reading subject 7 from /content/drive/MyDrive/Colab Notebooks/MHEALTHDATASET/mHealth_subject7.log
  lines: 104448  -> samples: 104448
Reading subject 8 from /content/drive/MyDrive/Colab Notebooks/MH

In [5]:
print("총 샘플:", len(X))
print("피처(feature dimension):", len(X[0]))

총 샘플: 1215745
피처(feature dimension): 23


In [6]:
unique_labels = []
for v in y:
    if v not in unique_labels:
        unique_labels.append(v)

print("라벨 종류:", unique_labels)

라벨 종류: [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 5]


In [7]:
def make_folds():
    return [[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]]

def split_by_subjects(X, y, subjects, test_subjects):
    X_train = []
    y_train = []
    X_test  = []
    y_test  = []

    for i in range(len(X)):
        s = subjects[i]
        if s in test_subjects:
            X_test.append(X[i])
            y_test.append(y[i])
        else:
            X_train.append(X[i])
            y_train.append(y[i])

    return X_train, y_train, X_test, y_test

In [8]:
folds = make_folds()
for idx in range(len(folds)):
    ts = folds[idx]
    X_tr, y_tr, X_te, y_te = split_by_subjects(X, y, subjects, ts)
    print("Fold", idx+1, " | test subjects =", ts,
          " | train samples:", len(X_tr),
          " | test samples:", len(X_te))

Fold 1  | test subjects = [1, 2]  | train samples: 923904  | test samples: 291841
Fold 2  | test subjects = [3, 4]  | train samples: 976897  | test samples: 238848
Fold 3  | test subjects = [5, 6]  | train samples: 997633  | test samples: 218112
Fold 4  | test subjects = [7, 8]  | train samples: 982273  | test samples: 233472
Fold 5  | test subjects = [9, 10]  | train samples: 982273  | test samples: 233472


In [9]:
def get_labels_union(y_true, y_pred):
    labels = []

    for v in y_true:
      if v not in labels:
        labels.append(v)

    for v in y_pred:
      if v not in labels:
        labels.append(v)

    return labels

Macro, F1 calc

In [10]:
def macro_f1_score(y_true, y_pred):
  labels = get_labels_union(y_true, y_pred)

  TP = {}
  FP = {}
  FN = {}
  for c in labels:
      TP[c] = 0
      FP[c] = 0
      FN[c] = 0

  for i in range(len(y_true)):
      t = y_true[i]
      p = y_pred[i]

      for c in labels:
        if p == c and t == c:
          TP[c] += 1
        elif p == c and t != c:
          FP[c] += 1
        elif p != c and t == c:
          FN[c] += 1

  f1_sum = 0.0
  count = 0

  for c in labels:
    tp = TP[c]
    fp = FP[c]
    fn = FN[c]

    if tp == 0 and fp == 0 and fn == 0:
      continue

    # precision, recall
    if tp + fp == 0:
        precision = 0.0
    else:
        precision = tp / float(tp + fp)

    if tp + fn == 0:
        recall = 0.0
    else:
        recall = tp / float(tp + fn)

    if precision == 0.0 and recall == 0.0:
        f1 = 0.0
    else:
        f1 = 2.0 * precision * recall / (precision + recall)

    f1_sum += f1
    count += 1

    if count == 0:
        return 0.0

    return f1_sum / float(count)

In [11]:
WINDOW = 128
STRIDE = 64

def create_windows(X, y, subjects, window_size=128, stride=64):
    N = len(X)
    D = len(X[0])

    Xw = []
    yw = []
    sw = []

    i = 0
    while i + window_size <= N:
        win = []
        for t in range(window_size):
            win.append(X[i + t])

        mid = i + window_size // 2
        label = y[mid]
        subject_id = subjects[mid]

        Xw.append(win)
        yw.append(label)
        sw.append(subject_id)

        i += stride
    return Xw, yw, sw

Xw, yw, sw = create_windows(X, y, subjects, window_size=WINDOW, stride=STRIDE)
print("윈도우 개수:", len(Xw))
print("윈도우 shape:", len(Xw[0]), "x", len(Xw[0][0]))

윈도우 개수: 18995
윈도우 shape: 128 x 23


FORWARD

In [12]:
class Conv1D:
    def __init__(self, in_channels, out_channels, kernel_size=3):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size

        self.W = []
        for f in range(out_channels):
            kernel = []
            for k in range(kernel_size):
                row = []
                for c in range(in_channels):
                    row.append(((f+1)*(k+2)*(c+3) % 17) * 0.001)
                kernel.append(row)
            self.W.append(kernel)

        self.b = [0.0] * out_channels

        self.last_x = None
        self.grad_W = None
        self.grad_b = None

    def forward(self, x):
        self.last_x = x

        T = len(x)
        K = self.kernel_size
        C = self.in_channels
        F = self.out_channels

        out_T = T - K + 1

        out = []
        for t in range(out_T):
            row = []
            for f in range(F):
                s = self.b[f]
                kernel = self.W[f]

                for k in range(K):
                    xi = x[t + k]
                    wi = kernel[k]
                    for c in range(C):
                        s += xi[c] * wi[c]
                row.append(s)
            out.append(row)

        return out

    def backward(self, grad_out):
        x = self.last_x
        T = len(x)
        C = self.in_channels
        F = self.out_channels
        K = self.kernel_size

        out_T = T - K + 1

        self.grad_W = []
        for f in range(F):
            kernel_grad = []
            for k in range(K):
                kernel_grad.append([0.0] * C)
            self.grad_W.append(kernel_grad)

        self.grad_b = [0.0] * F

        grad_x = []
        for t in range(T):
            grad_x.append([0.0] * C)

        for t in range(out_T):
            for f in range(F):
                go = grad_out[t][f]

                self.grad_b[f] += go

                for k in range(K):
                    xi = x[t + k]
                    for c in range(C):
                        self.grad_W[f][k][c] += xi[c] * go

                for k in range(K):
                    wi = self.W[f][k]
                    for c in range(C):
                        grad_x[t + k][c] += wi[c] * go
        return grad_x

ReLU

In [13]:
class ReLU:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        T = len(x)
        C = len(x[0])

        out = []
        self.mask = []

        for t in range(T):
            row = []
            mask_row = []
            for c in range(C):
                if x[t][c] > 0:
                    row.append(x[t][c])
                    mask_row.append(1)
                else:
                    row.append(0.0)
                    mask_row.append(0)
            out.append(row)
            self.mask.append(mask_row)

        return out

    def backward(self, grad_out):
        T = len(grad_out)
        C = len(grad_out[0])

        grad_in = []

        for t in range(T):
            row = []
            for c in range(C):
                if self.mask[t][c] == 1:
                    row.append(grad_out[t][c])
                else:
                    row.append(0.0)
            grad_in.append(row)

        return grad_in

In [14]:
relu = ReLU()
x = [[-1, 2], [3, -4]]

out = relu.forward(x)
print("forward:", out)
grad_out = [[1,1],[1,1]]
gx = relu.backward(grad_out)
print("backward:", gx)

forward: [[0.0, 2], [3, 0.0]]
backward: [[0.0, 1], [1, 0.0]]


GAP

In [15]:
class GAP:
    def __init__(self):
        self.T = None
        self.C = None

    def forward(self, x):
        self.T = len(x)
        self.C = len(x[0])

        out = []

        for c in range(self.C):
            s = 0.0
            for t in range(self.T):
                s += x[t][c]
            out.append(s / float(self.T))
        return out

    def backward(self, grad_out):
        grad_in = []
        for t in range(self.T):
            row = []
            for c in range(self.C):
                row.append(grad_out[c] / float(self.T))
            grad_in.append(row)
        return grad_in

In [16]:
gap = GAP()

x = [
    [1, 4],
    [2, 5],
    [3, 6]
]

out = gap.forward(x)
print("GAP forward:", out)
grad_out = [3, 6]

gx = gap.backward(grad_out)
print("GAP backward:", gx)

GAP forward: [2.0, 5.0]
GAP backward: [[1.0, 2.0], [1.0, 2.0], [1.0, 2.0]]


FC(full connection)

loss = -log( exp(score[label]) / sum(exp(score)) )

In [17]:
def my_exp(x):
    return 1.0 + x + (x*x)/2.0 + (x*x*x)/6.0

def softmax(scores):
    mx = scores[0]
    for s in scores:
        if s > mx:
            mx = s

    exps = []
    ssum = 0.0
    for s in scores:
        e = my_exp(s - mx)
        exps.append(e)
        ssum += e

    out = []
    for e in exps:
        out.append(e / ssum)
    return out


def cross_entropy(softmax_probs, target_label):
    p = softmax_probs[target_label]
    if p <= 1e-12:
        p = 1e-12
    return - (0.0 + my_log(p))

def my_log(x):
    y = x - 1.0
    return y - (y*y)/2.0 + (y*y*y)/3.0

## FC layer
class FC:
    def __init__(self, in_dim, out_dim):
        self.in_dim = in_dim
        self.out_dim = out_dim

        self.W = []
        for i in range(in_dim):
            row = []
            for j in range(out_dim):
                row.append( ( (i * 97 + j * 13) % 100 ) / 5000.0 - 0.01 )
            self.W.append(row)

        self.b = []
        for j in range(out_dim):
            self.b.append(0.0)

        self.x = None
        self.grad_W = None
        self.grad_b = None

    def forward(self, x):
        self.x = x

        out = []
        for j in range(self.out_dim):
            s = self.b[j]
            for i in range(self.in_dim):
                s += self.W[i][j] * x[i]
            out.append(s)
        return out

    def backward(self, grad_out):
        self.grad_W = []
        for i in range(self.in_dim):
            row = []
            for j in range(self.out_dim):
                row.append(self.x[i] * grad_out[j])
            self.grad_W.append(row)

        self.grad_b = []
        for j in range(self.out_dim):
            self.grad_b.append(grad_out[j])

        grad_in = []
        for i in range(self.in_dim):
            s = 0.0
            for j in range(self.out_dim):
                s += self.W[i][j] * grad_out[j]
            grad_in.append(s)
        return grad_in

SoftMax, SGD

In [18]:
def softmax_cross_entropy_backward(softmax_probs, target_label):
    num_classes = len(softmax_probs)
    grad_scores = []

    for j in range(num_classes):
        if j == target_label:
            grad_scores.append(softmax_probs[j] - 1.0)
        else:
            grad_scores.append(softmax_probs[j])

    return grad_scores

def sgd_update(params, grads, lr):
    if isinstance(params[0], list):
        if isinstance(params[0][0], list):
            for i in range(len(params)):
                for j in range(len(params[i])):
                    for k_dim in range(len(params[i][j])):
                        params[i][j][k_dim] -= lr * grads[i][j][k_dim]
        else:
            for i in range(len(params)):
                for j in range(len(params[i])):
                    params[i][j] -= lr * grads[i][j]
    else:
        for i in range(len(params)):
            params[i] -= lr * grads[i]

CNN ASSEMBLE

In [19]:
class CNNModel:
    def __init__(self, in_channels=23, num_classes=13, hidden_channels=16):
        self.conv1 = Conv1D(in_channels, hidden_channels, kernel_size=3)
        self.relu1 = ReLU()
        self.conv2 = Conv1D(hidden_channels, hidden_channels, kernel_size=3)
        self.relu2 = ReLU()
        self.gap = GAP()
        self.fc = FC(hidden_channels, num_classes)

        self.last_softmax = None
        self.last_scores = None
        self.last_label = None

    def forward(self, x, label):
        out = self.conv1.forward(x)
        out = self.relu1.forward(out)
        out = self.conv2.forward(out)
        out = self.relu2.forward(out)
        out = self.gap.forward(out)

        scores = self.fc.forward(out)

        probs = softmax(scores)

        loss = -my_log(probs[label])

        self.last_label = label
        self.last_scores = scores
        self.last_softmax = probs

        return loss, probs

    def backward(self):
        label = self.last_label
        probs = self.last_softmax
        scores = self.last_scores

        grad_scores = softmax_cross_entropy_backward(probs, label)
        grad_fc = self.fc.backward(grad_scores)
        grad_gap = self.gap.backward(grad_fc)
        grad_relu2 = self.relu2.backward(grad_gap)
        grad_conv2 = self.conv2.backward(grad_relu2)
        grad_relu1 = self.relu1.backward(grad_conv2)
        grad_conv1 = self.conv1.backward(grad_relu1)

    def step(self, lr):
        sgd_update(self.conv1.W, self.conv1.grad_W, lr)
        sgd_update(self.conv1.b, self.conv1.grad_b, lr)

        sgd_update(self.conv2.W, self.conv2.grad_W, lr)
        sgd_update(self.conv2.b, self.conv2.grad_b, lr)

        sgd_update(self.fc.W, self.fc.grad_W, lr)
        sgd_update(self.fc.b, self.fc.grad_b, lr)

In [20]:
model = CNNModel()

In [21]:
sample_x = Xw[0]
sample_label = yw[0]

loss, probs = model.forward(sample_x, sample_label)
model.backward()
model.step(lr=0.001)

print("loss:", loss)
print("probs:", probs)

loss: 1.6112880895942328
probs: [0.07692308793857167, 0.07692307955698345, 0.07692307324465489, 0.07692306780359363, 0.07692307053065872, 0.07692307630715792, 0.07692308208365757, 0.07692308786015764, 0.0769230855774381, 0.07692307817602541, 0.07692307044788838, 0.07692306816516936, 0.07692307230804304]


In [22]:
def make_batches(Xw, yw, batch_size):
    batches = []
    N = len(Xw)

    i = 0
    while i < N:
        bx = Xw[i:i+batch_size]
        by = yw[i:i+batch_size]
        batches.append((bx, by))
        i += batch_size

    return batches

In [23]:
def split_windows_by_subjects(Xw, yw, sw, test_subjects):
    X_train = []
    y_train = []
    X_test = []
    y_test = []

    for i in range(len(Xw)):
        subj = sw[i]
        if subj in test_subjects:
            X_test.append(Xw[i])
            y_test.append(yw[i])
        else:
            X_train.append(Xw[i])
            y_train.append(yw[i])

    return X_train, y_train, X_test, y_test

In [28]:
def train_one_epoch(model, batches, lr):
    total_loss = 0.0
    count = 0
    total_samples = sum(len(bx) for (bx, by) in batches)

    processed = 0

    for (bx, by) in batches:
        for i in range(len(bx)):
            x = bx[i]
            label = by[i]

            loss, probs = model.forward(x, label)
            model.backward()
            model.step(lr)

            total_loss += loss
            count += 1
            processed += 1

            bar_size = 20
            ratio = processed / total_samples
            filled = int(ratio * bar_size)
            bar = "[" + "=" * filled + ">" + "." * (bar_size - filled) + "]"

            print(
                f"\r {processed}/{total_samples} {bar} {ratio*100:5.1f}%",
                end=""
            )

    print()
    return total_loss / count

def predict(model, x):
    _, probs = model.forward(x, 0)
    best = 0
    best_val = probs[0]
    for i in range(len(probs)):
        if probs[i] > best_val:
            best_val = probs[i]
            best = i
    return best

def evaluate_model(model, Xw_test, yw_test):
    preds = []
    for i in range(len(Xw_test)):
        pred = predict(model, Xw_test[i])
        preds.append(pred)
    return macro_f1_score(yw_test, preds)

In [29]:
def run_5fold_CNN(Xw, yw, sw, epochs=1, batch_size=8, lr=0.001):
    folds = [[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]]
    fold_f1_scores = []

    for fold_idx in range(5):
        test_subjects = folds[fold_idx]

        Xw_train = []
        yw_train = []
        Xw_test = []
        yw_test = []

        for i in range(len(Xw)):
            if sw[i] in test_subjects:
                Xw_test.append(Xw[i])
                yw_test.append(yw[i])
            else:
                Xw_train.append(Xw[i])
                yw_train.append(yw[i])

        print("===== Fold", fold_idx+1, "test subjects =", test_subjects, "=====")
        print("train windows:", len(Xw_train), " | test windows:", len(Xw_test))

        batches = make_batches(Xw_train, yw_train, batch_size)
        model = CNNModel()

        for ep in range(epochs):
            avg_loss = train_one_epoch(model, batches, lr)
            print("Epoch", ep+1, "avg_loss:", avg_loss)

        f1 = evaluate_model(model, Xw_test, yw_test)
        print("Fold F1:", f1)
        fold_f1_scores.append(f1)
    mean_f1 = sum(fold_f1_scores) / len(fold_f1_scores)

    print("====================================")
    print("Fold F1 scores:", fold_f1_scores)
    print("Mean F1:", mean_f1)

    return fold_f1_scores, mean_f1

In [30]:
fold_scores, meanf = run_5fold_CNN(Xw, yw, sw, epochs = 1,batch_size = 8,lr = 0.001)

===== Fold 1 test subjects = [1, 2] =====
train windows: 14435  | test windows: 4560
Epoch 1 avg_loss: nan
Fold F1: 0.8619915148490143
===== Fold 2 test subjects = [3, 4] =====
train windows: 15263  | test windows: 3732
Epoch 1 avg_loss: nan
Fold F1: 0.8264150943396227
===== Fold 3 test subjects = [5, 6] =====
train windows: 15587  | test windows: 3408
Epoch 1 avg_loss: nan
Fold F1: 0.8215767634854771
===== Fold 4 test subjects = [7, 8] =====
train windows: 15347  | test windows: 3648
Epoch 1 avg_loss: nan
Fold F1: 0.8307692307692308
===== Fold 5 test subjects = [9, 10] =====
train windows: 15348  | test windows: 3647
Epoch 1 avg_loss: nan
Fold F1: 0.8299647096567212
Fold F1 scores: [0.8619915148490143, 0.8264150943396227, 0.8215767634854771, 0.8307692307692308, 0.8299647096567212]
Mean F1: 0.8341434626200133
