# EWC

In [44]:
import tensorflow as tf
from tensorflow.keras.optimizers.legacy import Adamax
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.metrics import Mean
from tensorflow.keras.utils import to_categorical

In [45]:
import import_ipynb
import pandas as pd
import numpy as np

In [46]:
from copy import deepcopy

In [47]:
import utils
from utils import *

In [48]:
import Model 
from Model import *

In [49]:
def compute_ewc_penalty(model, fisher_matrix, optimal_weights, lamb):   
    loss = 0
    current = model.trainable_weights 
    
    for F, c, o in zip(fisher_matrix, current, optimal_weights):
        loss += tf.reduce_sum(F * ((c - o) ** 2))


    return loss * (lamb / 2)

In [50]:
def ewc_loss(model, fisher_matrix, lamb):
    optimal_weights = deepcopy(model.trainable_weights)

    def loss_fn(y_true, y_pred):

        ce_loss = CategoricalCrossentropy(from_logits=False)(y_true, y_pred)
        ewc_loss = compute_ewc_penalty(model, fisher_matrix, optimal_weights, lamb=lamb)

        return ce_loss + ewc_loss
    
    return loss_fn

In [51]:
def compute_fisher_matrix(model, data, num_sample=10):

    weights = model.trainable_weights
    variance = [tf.zeros_like(tensor) for tensor in weights]



    # [디버깅 추가]
    print("🔍 [DEBUG] Initial weights shape:")
    for i, w in enumerate(weights):
        print(f" - Weight {i}: {w.shape}")
    '''
    for i, (x, y) in enumerate(data):
        if i >= num_sample:
            break

        print(f"\n📦 [DEBUG] Sample {i} input shape: x={x.shape}, y={y.shape}")
    '''



    # num_sample 개의 데이터 랜덤샘플링 
    indices = np.random.choice(len(data), size=num_sample, replace=False)

    for i in indices:

        with tf.GradientTape() as tape:
            tape.watch(weights)
            x = tf.expand_dims(data[i], axis=0)
            output = model(x, training=False) # (수정) 메모리 문제, 모든 데이터를 한번에 넣으면 오류 생김. 여기서는 하나씩 열개의 데이터를 사용 
            log_likelihood = tf.math.log(output)

        gradients = tape.gradient(log_likelihood, weights)
        variance = [var + (grad ** 2) for var, grad in zip(variance, gradients)]

    fisher_matrix = [tensor / num_sample for tensor in variance]


    # [디버깅 추가]
    print("\n✅ [DEBUG] Fisher matrix shapes:")
    for i, f in enumerate(fisher_matrix):
        print(f" - Fisher {i}: {f.shape}, mean={tf.reduce_mean(f):.4f}, std={tf.math.reduce_std(f):.4f}")


    
    return fisher_matrix

In [52]:
# (수정) 배치를 사용하지 않음 
def evaluate(model, test_set):
  acc = tf.keras.metrics.CategoricalAccuracy(name='accuracy')
  for i, (seq, labels) in enumerate(test_set):
    preds = model.predict_on_batch(seq)
    acc.update_state(labels, preds)
  return acc.result().numpy()

In [53]:

def train_loop(model, OPTIMIZER, data, test_size,
                first_task = 44, inc_task = 5, first_epochs = 30, inc_epochs = 5,
                  lamb=0, num_sample=10):
    
    first_part = split_by_label(data, 0, first_task)
    train, test = split_train_test(first_part, test_size=test_size, random_state=11)
    

    # OPTIMIZER -> param
    i = 0
    while(1):

        if ( first_task + i * inc_task ) < MAX_LABEL:
            
            if i == 0:
                model.compile(loss=CategoricalCrossentropy(from_logits=False), optimizer=OPTIMIZER, metrics=["accuracy"])

                # 3D ndarray 로 변환 
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)
                

                history = model.fit(x=train_seq, y=train_label, epochs=first_epochs, verbose=1)
                print(f"   Task_0 training accuracy: {history.history['accuracy'][-1]:.4f}")

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

            else:
                # 데이터 준비 
                inc_part = split_by_label(data, first_task + (i-1) * inc_task + 1, first_task + i * inc_task )
                train, inc_test = split_train_test(inc_part, test_size=test_size, random_state=11)

                # [디버깅 코드 추가]
                optimal_weights = deepcopy(model.trainable_weights)

                print("🧠 [DEBUG] Optimal weights (after task):")
                for index, w in enumerate(optimal_weights):
                    print(f" - Weight {index}: {w.shape}, mean={tf.reduce_mean(w):.4f}, std={tf.math.reduce_std(w):.4f}")


                model.compile(loss=ewc_loss(model, fisher_matrix, lamb=lamb), optimizer=OPTIMIZER, metrics=["accuracy"])
                
                # 3D ndarray 로 변환 (이부분 함수로 바꾸기)
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)



                # train
                history = model.fit(x=train_seq, y=train_label, epochs=inc_epochs, verbose=1)
                print(f"   Task_{i} training accuracy: {history.history['accuracy'][-1]:.4f}")



                # (수정) 일종의 전처리이므로 preprocessing 또는 utils에 함수 작성 
                # 축적된 test로 정확도 측정 (중요, EWC 성능)
                test_seq, test_label = split_data_label(test)
                test_seq = np.stack(test_seq.values)
                test_seq = test_seq[..., np.newaxis]

                test_label = test_label.values
                test_label = to_categorical(test_label, num_classes=MAX_LABEL)

                test_ = tf.data.Dataset.from_tensor_slices((test_seq, test_label))
                test_ = test_.batch(32) #(수정) 모델 자체 배치 존재? - 학습시 fit 디폴트값도 32

                inc_accuracy = evaluate(model, test_)
                print(f"Task ~{i-1} accuracy after training on Task_{i}: {inc_accuracy:.4f}")




                # test 업데이트 
                test = accumulate_data(test, inc_test)

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

        else:
            break 

In [54]:

def train_loop_nonbase(model, OPTIMIZER, data, test_size,
                first_task = 44, inc_task = 5, first_epochs = 30, inc_epochs = 5,
                  lamb=0, num_sample=10):
    
    first_part = split_by_label(data, 0, first_task)
    train, test = split_train_test(first_part, test_size=test_size, random_state=11)
    

    # OPTIMIZER -> param
    i = 0
    while(1):

        if ( first_task + i * inc_task ) < MAX_LABEL:
            
            if i == 0:
                model.compile(loss=CategoricalCrossentropy(from_logits=False), optimizer=OPTIMIZER, metrics=["accuracy"])

                # 3D ndarray 로 변환 
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)
                

                history = model.fit(x=train_seq, y=train_label, epochs=first_epochs, verbose=1)
                print(f"   Task_0 training accuracy: {history.history['accuracy'][-1]:.4f}")

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

            else:
                # 데이터 준비 
                inc_part = split_by_label(data, first_task + (i-1) * inc_task + 1, first_task + i * inc_task )
                train, inc_test = split_train_test(inc_part, test_size=test_size, random_state=11)

                model.compile(loss=CategoricalCrossentropy(from_logits=False), optimizer=OPTIMIZER, metrics=["accuracy"])
                
                # 3D ndarray 로 변환 (이부분 함수로 바꾸기)
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)



                # train
                history = model.fit(x=train_seq, y=train_label, epochs=inc_epochs, verbose=1)
                print(f"   Task_{i} training accuracy: {history.history['accuracy'][-1]:.4f}")



                # (수정) 일종의 전처리이므로 preprocessing 또는 utils에 함수 작성 
                # 축적된 test로 정확도 측정 (중요, EWC 성능)
                test_seq, test_label = split_data_label(test)
                test_seq = np.stack(test_seq.values)
                test_seq = test_seq[..., np.newaxis]

                test_label = test_label.values
                test_label = to_categorical(test_label, num_classes=MAX_LABEL)

                test_ = tf.data.Dataset.from_tensor_slices((test_seq, test_label))
                test_ = test_.batch(32) #(수정) 모델 자체 배치 존재? - 학습시 fit 디폴트값도 32

                inc_accuracy = evaluate(model, test_)
                print(f"Task ~{i-1} accuracy after training on Task_{i}: {inc_accuracy:.4f}")




                # test 업데이트 
                test = accumulate_data(test, inc_test)

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

        else:
            break 

In [55]:
def train_loop_joint(model, OPTIMIZER, data, test_size,
                first_task = 44, inc_task = 5, first_epochs = 30, inc_epochs = 5,
                  lamb=0, num_sample=10):
    
    first_part = split_by_label(data, 0, first_task)
    train, test = split_train_test(first_part, test_size=test_size, random_state=11)
    

    # OPTIMIZER -> param
    i = 0
    while(1):

        if ( first_task + i * inc_task ) <= MAX_LABEL:
            
            if i == 0:
                model.compile(loss=CategoricalCrossentropy(from_logits=False), optimizer=OPTIMIZER, metrics=["accuracy"])

                # 3D ndarray 로 변환 
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)
                

                history = model.fit(x=train_seq, y=train_label, epochs=first_epochs, verbose=2)
                print(f"   First_task training accuracy: {history.history['accuracy'][-1]:.4f}")

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

            else:
                # 데이터 준비 
                inc_part = split_by_label(data, first_task + (i-1) * inc_task + 1, first_task + i * inc_task )
                inc_train, inc_test = split_train_test(inc_part, test_size=test_size, random_state=11)
                train = accumulate_data(train, inc_train)

                model.compile(loss=CategoricalCrossentropy(from_logits=False), optimizer=OPTIMIZER, metrics=["accuracy"])
                
                # 3D ndarray 로 변환 (이부분 함수로 바꾸기)
                train_seq, train_label = split_data_label(train)

                train_seq = np.stack(train_seq.values)
                train_seq = train_seq[..., np.newaxis]

                train_label = train_label.values
                train_label = to_categorical(train_label, num_classes=MAX_LABEL)



                # train
                history = model.fit(x=train_seq, y=train_label, epochs=inc_epochs, verbose=2)
                print(f"   {i}_task training accuracy: {history.history['accuracy'][-1]:.4f}")



                # (수정) 일종의 전처리이므로 preprocessing 또는 utils에 함수 작성 
                # 축적된 test로 정확도 측정 (중요, EWC 성능)
                test_seq, test_label = split_data_label(test)
                test_seq = np.stack(test_seq.values)
                test_seq = test_seq[..., np.newaxis]

                test_label = test_label.values
                test_label = to_categorical(test_label, num_classes=MAX_LABEL)

                test_ = tf.data.Dataset.from_tensor_slices((test_seq, test_label))
                test_ = test_.batch(32) #(수정) 모델 자체 배치 존재? - 학습시 fit 디폴트값도 32

                inc_accuracy = evaluate(model, test_)
                print(f"Task {i} accuracy after training on Task ~{i-1}: {inc_accuracy:.4f}")




                # test 업데이트 
                test = accumulate_data(test, inc_test)

                # Fisher matrix 계산 
                fisher_matrix = compute_fisher_matrix(model, train_seq, num_sample=num_sample)

                i = i + 1

        else:
            break 

# test

In [56]:
data = pd.read_pickle('mon_data.pkl')
print(data.shape)
MAX_LABEL = 95

(19000, 2)


In [57]:
# 모델 빌드 
model = DFNet.build(input_shape=(10000, 1), classes=MAX_LABEL)
# 옵티마이저 설정 
OPTIMIZER = Adamax(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
train_loop(model, OPTIMIZER, data, test_size=0.2, first_task = 39, inc_task = 10, first_epochs = 50, inc_epochs = 10, lamb=0.1, num_sample=100)

Epoch 1/50


  super().__init__(name, **kwargs)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
   Task_0 training accuracy: 0.9505
🔍 [DEBUG] Initial weights shape:
 - Weight 0: (8, 1, 32)
 - Weight 1: (32,)
 - Weight 2: (32,)
 - Weight 3: (32,)
 - Weight 4: (8, 32, 32)
 - Weight 5: (32,)
 - Weight 6: (32,)
 - Weight 7: (32,)
 - Weight 8: (8, 32, 64)
 - Weight 9: (64,)
 - Weight 10: (64,)
 - Weight 11: (64,)
 - Weight 12: (8, 64, 64)
 - Weight 13: (64,)
 - Weight 14: (64,)
 - Weight 15: (64,)
 - Weight 16: (8, 

In [15]:
# 모델 빌드 
model = DFNet.build(input_shape=(10000, 1), classes=MAX_LABEL)
# 옵티마이저 설정 
OPTIMIZER = Adamax(lr=0.0002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
train_loop_nonbase(model, OPTIMIZER, data, test_size=0.2, first_task = 39, inc_task = 10, first_epochs = 50, inc_epochs = 10, lamb=0, num_sample=50)

Epoch 1/50


  super().__init__(name, **kwargs)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
   Task_0 training accuracy: 0.7664
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
   Task_1 training accuracy: 0.0000
Task ~0 accuracy after training on Task_1: 0.0456
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
   Task_2 training accuracy: 0.0000
Task ~1 accuracy after training on Task_

In [16]:
# Joint
# 모델 빌드 
model = DFNet.build(input_shape=(10000, 1), classes=MAX_LABEL)
# 옵티마이저 설정 
OPTIMIZER = Adamax(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
train_loop_joint(model, OPTIMIZER, data, test_size=0.2, first_task = 39, inc_task = 10, first_epochs = 50, inc_epochs = 10, lamb=0, num_sample=100)

Epoch 1/50


  super().__init__(name, **kwargs)


200/200 - 14s - loss: 3.8412 - accuracy: 0.0864 - 14s/epoch - 71ms/step
Epoch 2/50
200/200 - 12s - loss: 3.2819 - accuracy: 0.1431 - 12s/epoch - 58ms/step
Epoch 3/50
200/200 - 11s - loss: 3.0243 - accuracy: 0.1838 - 11s/epoch - 57ms/step
Epoch 4/50
200/200 - 11s - loss: 2.7955 - accuracy: 0.2414 - 11s/epoch - 57ms/step
Epoch 5/50
200/200 - 11s - loss: 2.6375 - accuracy: 0.2722 - 11s/epoch - 57ms/step
Epoch 6/50
200/200 - 11s - loss: 2.4461 - accuracy: 0.3209 - 11s/epoch - 57ms/step
Epoch 7/50
200/200 - 11s - loss: 2.2863 - accuracy: 0.3636 - 11s/epoch - 57ms/step
Epoch 8/50
200/200 - 11s - loss: 2.1523 - accuracy: 0.4011 - 11s/epoch - 57ms/step
Epoch 9/50
200/200 - 11s - loss: 2.0190 - accuracy: 0.4259 - 11s/epoch - 57ms/step
Epoch 10/50
200/200 - 11s - loss: 1.8516 - accuracy: 0.4811 - 11s/epoch - 57ms/step
Epoch 11/50
200/200 - 11s - loss: 1.7139 - accuracy: 0.5219 - 11s/epoch - 57ms/step
Epoch 12/50
200/200 - 11s - loss: 1.5980 - accuracy: 0.5545 - 11s/epoch - 57ms/step
Epoch 13/50


In [17]:
# Nonbase
from tensorflow.keras.optimizers.legacy import Adam

# 모델 빌드 
model = DFNet.build(input_shape=(10000, 1), classes=MAX_LABEL)
# 옵티마이저 설정 
OPTIMIZER = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
train_loop(model, OPTIMIZER, data, test_size=0.2, first_task = 19, inc_task = 5, first_epochs = 50, inc_epochs = 10, lamb=0, num_sample=100)

Epoch 1/50


  super().__init__(name, **kwargs)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
   Task_0 training accuracy: 0.9606
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
   Task_1 training accuracy: 0.9100
Task ~0 accuracy after training on Task_1: 0.0000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
   Task_2 training accuracy: 0.7912
Task ~1 accuracy after training on Task_

In [18]:
# Joint
from tensorflow.keras.optimizers.legacy import Adam

# 모델 빌드 
model = DFNet.build(input_shape=(10000, 1), classes=MAX_LABEL)
# 옵티마이저 설정 
OPTIMIZER = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
train_loop_joint(model, OPTIMIZER, data, test_size=0.2, first_task = 19, inc_task = 5, first_epochs = 50, inc_epochs = 10, lamb=0, num_sample=100)

Epoch 1/50


  super().__init__(name, **kwargs)


100/100 - 8s - loss: 3.1071 - accuracy: 0.1534 - 8s/epoch - 81ms/step
Epoch 2/50
100/100 - 6s - loss: 2.4469 - accuracy: 0.2609 - 6s/epoch - 56ms/step
Epoch 3/50
100/100 - 6s - loss: 2.1326 - accuracy: 0.3375 - 6s/epoch - 55ms/step
Epoch 4/50
100/100 - 6s - loss: 1.8926 - accuracy: 0.4131 - 6s/epoch - 55ms/step
Epoch 5/50
100/100 - 6s - loss: 1.7309 - accuracy: 0.4731 - 6s/epoch - 56ms/step
Epoch 6/50
100/100 - 6s - loss: 1.5253 - accuracy: 0.5272 - 6s/epoch - 56ms/step
Epoch 7/50
100/100 - 6s - loss: 1.3574 - accuracy: 0.5791 - 6s/epoch - 56ms/step
Epoch 8/50
100/100 - 6s - loss: 1.2562 - accuracy: 0.6072 - 6s/epoch - 56ms/step
Epoch 9/50
100/100 - 6s - loss: 1.1152 - accuracy: 0.6594 - 6s/epoch - 56ms/step
Epoch 10/50
100/100 - 6s - loss: 1.0259 - accuracy: 0.6869 - 6s/epoch - 56ms/step
Epoch 11/50
100/100 - 6s - loss: 0.9367 - accuracy: 0.7081 - 6s/epoch - 56ms/step
Epoch 12/50
100/100 - 6s - loss: 0.8603 - accuracy: 0.7350 - 6s/epoch - 56ms/step
Epoch 13/50
100/100 - 6s - loss: 0.7