In [1]:
import numpy as np
from scipy.stats import bernoulli
import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# define problem parameters
N = 1000  # num data points arriving per exp
J = 50    # num exp
numfeat = 50  # number of features
C = 2 # number of classes
total_seed = 10 # total number of seeds 

# model learns a feature v when there are at least "gamma" data points
gamma = 50

# a data point has zero gradient when there are "tau" features in the data point that are already learned by the model.
tau = 3

In [None]:
# construct train data
def get_train_data(N, J, C, numfeat, seed=0):
    N_c = int(N * J * 1/C)
    np.random.seed(seed)
    X = np.zeros((C, numfeat, N_c))

    p = [] # empty prob list
    for c in range(C):
        prob = np.sort(0.2 * np.random.rand(numfeat))[::-1]   # sampled from 0~0.2, sorted
        p.append(prob)
        for i in range(N_c):
            X[c, :, i] = bernoulli.rvs(prob)

    return X, p

def get_sort_indexes(arr):
    indexed_arr = list(enumerate(arr))
    sorted_arr = sorted(indexed_arr, key=lambda x: x[1], reverse=True)
    sort_indexes = [item[0] for item in sorted_arr]
    
    return sort_indexes

# construct test data
def get_test_data(data_num, C, numfeat, prob_lst, seed=0):
    np.random.seed(seed * 100 + 100)
    
    X = np.zeros((C, numfeat, data_num//C))
    for c in range(C):
        for i in range(int(data_num * 1/C)):
            X[c, :, i] = bernoulli.rvs(prob_lst[c])

    return X

# Calculate Test accuracy
def cal_testacc(test_data, learn_lst, C, data_num=10000, tau=2):
    acc = 0
    for c in range(C):
        for data in test_data[c].T:
            if np.sum(data * learn_lst[c]) >= tau:
                acc += 1
            else:
                acc += bernoulli.rvs(1/C)
    acc /= data_num
    return acc

In [None]:
Learn_F = np.zeros((total_seed, J, C, numfeat, 3))  # features learned for the 3 methods
Learn_X = np.zeros((total_seed, J, C, int(N * (J+1) * 1/C), 3))    # data points learned for the 3 methods
Memorize_X = np.zeros((total_seed, J, C, int(N * (J+1) * 1/C), 3)) # data points memorized for the 3 methods
Nzero_X = np.zeros((total_seed, J, 3))           # number of non-zero gradient data points for the 3 methods
prob_lst = np.zeros((total_seed, C, numfeat)) # save prob_lst to sample test_data from same distribution

for seed in tqdm.tqdm(range(total_seed)):
    # Initialize settings
    X, p = get_train_data(N, J, C, numfeat, seed)
    prob_lst[seed] = p

    # Method 1: rand training, sequentially.
    for j in range(J):
        Nzero_X[seed, j, 0] = N * (j+1)
        N_c = int(N * (j+1) * 1/C)
        nzero_idx = np.arange(N_c)
        nzero_idx = np.tile(nzero_idx, (C, 1)).tolist()

        # Sequential train
        while True:
            fcount = np.zeros((C, numfeat))
            for c in range(C):
                fcount[c] = np.sum(X[c, :, nzero_idx[c]], axis=0)

            # sort feature indexes by descending order to have a list of a tuple, [(c, idx), ...]
            sort_indexes = [(idx // numfeat, idx % numfeat) for idx in get_sort_indexes(fcount.flatten())]

            # Break if all features are learned
            nolearn_idx = (Learn_F[seed, j, :, :, 0] == 0)
            if all(fcount[nolearn_idx] < gamma):
                break
            for idx in sort_indexes:
                # Continue if already learned
                if Learn_F[seed, j, idx[0], idx[1], 0] == 1:
                    continue

                # Learn if # of occurence is sufficiently large
                if fcount[idx] >= gamma:
                    Learn_F[seed, j, idx[0], idx[1], 0] = 1

                    # Make zero gradient
                    for c in range(C):
                        for i in nzero_idx[c]:
                            idx = np.where(X[c, :, i] == 1)[0] 
                            if np.sum(Learn_F[seed, j, c, idx, 0]) >= tau:
                                Learn_X[seed, j, c, i, 0] = 1

                        learn_idx = np.where(Learn_X[seed, j, c, :, 0] == 1)[0]
                        nzero_idx[c] = list(set(nzero_idx[c]) - set(learn_idx))
                    break #break for loop since we need to calculate fcount again if one feature is learned.
        for c in range(C):
            Memorize_X[seed, j, c, nzero_idx[c], 0] = 1

        print(f'\nX at start of the {j+1}th experiment (Random):', int(Nzero_X[seed, j, 0]))
        print(f'Learned Features at {j+1}th experiment (Random):', np.sum(Learn_F[seed, j, :, :, 0]==1))
        print(f'Learned X at {j+1}th experiment (Random):', np.sum(Learn_X[seed, j, :, :, 0]==1))
        print(f'Memorized X at {j+1}th experiment (Random):', np.sum(Memorize_X[seed, j, :, :, 0]==1))        


    # Method 2: warm training, sequentially.
    for j in range(J):
        N_c = int(N * (j+1) * 1/C)
        nzero_idx = np.arange(int(N * j * 1/C), N_c)
        nzero_idx = np.tile(nzero_idx, (C, 1)).tolist()
        # convey previous experiments' information
        if j >= 1:
            Learn_F[seed, j, :, :, 1] = Learn_F[seed, j-1, :, :, 1]
            Learn_X[seed, j, :, :, 1] = Learn_X[seed, j-1, :, :, 1]
            Memorize_X[seed, j, :, :, 1] = Memorize_X[seed, j-1, :, :, 1]

        # check zero gradient for newly added data
        for c in range(C):
            for i in nzero_idx[c]:
                idx = np.where(X[c, :, i] == 1)[0]
                if np.sum(Learn_F[seed, j, c, idx, 1]) >= tau:
                    Learn_X[seed, j, c, i, 1] = 1

            learn_idx = np.where(Learn_X[seed, j, c, :, 1] == 1)[0]
            nzero_idx[c] = list(set(nzero_idx[c]) - set(learn_idx))
        Nzero_X[seed, j, 1] = sum([len(nzero_idx[c]) for c in range(C)])

        # Start training
        while True:
            fcount = np.zeros((C, numfeat))
            for c in range(C):
                fcount[c] = np.sum(X[c, :, nzero_idx[c]], axis=0)
            sort_indexes = [(idx // numfeat, idx % numfeat) for idx in get_sort_indexes(fcount.flatten())]

            # Break if all features are learned
            nolearn_idx = (Learn_F[seed, j, :, :, 1] == 0)
            if all(fcount[nolearn_idx] < gamma):
                break
            for idx in sort_indexes:
                # Continue if already learned
                if Learn_F[seed, j, idx[0], idx[1], 1] == 1:
                    continue

                # Learn if # of occurence is sufficiently large    
                if fcount[idx] >= gamma:
                    Learn_F[seed, j, idx[0], idx[1], 1] = 1

                    # Make zero gradient
                    for c in range(C):
                        for i in nzero_idx[c]:
                            idx = np.where(X[c, :, i] == 1)[0] 
                            if np.sum(Learn_F[seed, j, c, idx, 1]) >= tau:
                                Learn_X[seed, j, c, i, 1] = 1

                        learn_idx = np.where(Learn_X[seed, j, c, :, 1] == 1)[0]
                        nzero_idx[c] = list(set(nzero_idx[c]) - set(learn_idx))
                    break #break for loop since we need to calculate fcount again if one feature is learned.

        for c in range(C):
            Memorize_X[seed, j, c, nzero_idx[c], 1] = 1

        print(f'\nX at start of the {j+1}th experiment (Warm):', int(Nzero_X[seed, j, 1]))
        print(f'Learned Features at {j+1}th experiment (Warm):', np.sum(Learn_F[seed, j, :, :, 1]==1))
        print(f'Learned X at {j+1}th experiment (Warm):', np.sum(Learn_X[seed, j, :, :, 1]==1))
        print(f'Memorized X at {j+1}th experiment (Warm):', np.sum(Memorize_X[seed, j, :, :, 1]==1))        

    # Method 3: forget noise (ideal)
    for j in range(J):
        Nzero_X[seed, j, 0] = N * (j+1)
        N_c = int(N * (j+1) * 1/C)
        nzero_idx = np.arange(N_c)
        nzero_idx = np.tile(nzero_idx, (C, 1)).tolist()

        # convey only properly learned features/data
        if j >= 1:
            Learn_F[seed, j, :, :, 2] = Learn_F[seed, j-1, :, :, 2]
            Learn_X[seed, j, :, :, 2] = Learn_X[seed, j-1, :, :, 2]

        # check zero gradient for newly added data
        for c in range(C):
            for i in nzero_idx[c]:
                idx = np.where(X[c, :, i] == 1)[0]
                if np.sum(Learn_F[seed, j, c, idx, 2]) >= tau:
                    Learn_X[seed, j, c, i, 2] = 1

            learn_idx = np.where(Learn_X[seed, j, c, :, 2] == 1)[0]
            nzero_idx[c] = list(set(nzero_idx[c]) - set(learn_idx))
        Nzero_X[seed, j, 2] = sum([len(nzero_idx[c]) for c in range(C)])


        # Start training
        while True:
            fcount = np.zeros((C, numfeat))
            for c in range(C):
                fcount[c] = np.sum(X[c, :, nzero_idx[c]], axis=0)
            sort_indexes = [(idx // numfeat, idx % numfeat) for idx in get_sort_indexes(fcount.flatten())]

            # Break if all features are learned
            nolearn_idx = (Learn_F[seed, j, :, :, 2] == 0)
            if all(fcount[nolearn_idx] < gamma):
                break
            for idx in sort_indexes:
                # Continue if already learned
                if Learn_F[seed, j, idx[0], idx[1], 2] == 1:
                    continue

                # Learn if # of occurence is sufficiently large    
                if fcount[idx] >= gamma:
                    Learn_F[seed, j, idx[0], idx[1], 2] = 1

                    # Make zero gradient
                    for c in range(C):
                        for i in nzero_idx[c]:
                            idx = np.where(X[c, :, i] == 1)[0] 
                            if np.sum(Learn_F[seed, j, c, idx, 2]) >= tau:
                                Learn_X[seed, j, c, i, 2] = 1

                        learn_idx = np.where(Learn_X[seed, j, c, :, 2] == 1)[0]
                        nzero_idx[c] = list(set(nzero_idx[c]) - set(learn_idx))
                    break #break for loop since we need to calculate fcount again if one feature is learned.

        for c in range(C):
            Memorize_X[seed, j, c, nzero_idx[c], 2] = 1

        print(f'\nX at start of the {j+1}th experiment (Ideal):', int(Nzero_X[seed, j, 2]))
        print(f'Learned Features at {j+1}th experiment (Ideal):', np.sum(Learn_F[seed, j, :, :, 2]==1))
        print(f'Learned X at {j+1}th experiment (Ideal):', np.sum(Learn_X[seed, j, :, :, 2]==1))
        print(f'Memorized X at {j+1}th experiment (Ideal):', np.sum(Memorize_X[seed, j, :, :, 2]==1))

In [None]:
# Calculate test accuracy for each method

test_acc = np.zeros((total_seed, J, 3))
for seed in tqdm.tqdm(range(total_seed)):
    test_data = get_test_data(10000, C, numfeat, prob_lst[seed], C)
    for method in range(3):
        for j in range(J):
            test_acc[seed, j, method] = cal_testacc(test_data, Learn_F[seed, j, :, :, method], C, 10000, tau)

In [None]:
# Plot Figure

fig, ax = plt.subplots(1, 3, figsize=(15, 5))

random_nx = np.mean([Nzero_X[i, :, 0] for i in range(total_seed)], axis=0)
warm_nx = np.mean([Nzero_X[i, :, 1] for i in range(total_seed)], axis=0)
ideal_nx = np.mean([Nzero_X[i, :, 2] for i in range(total_seed)], axis=0)

random_nx_std = np.std([Nzero_X[i, :, 0] for i in range(total_seed)], axis=0)
warm_nx_std = np.std([Nzero_X[i, :, 1] for i in range(total_seed)], axis=0)
ideal_nx_std = np.std([Nzero_X[i, :, 2] for i in range(total_seed)], axis=0)

data = pd.DataFrame({
    'Number of experiments': range(1, len(random_nx)+1),
    'Random': random_nx,
    'Warm': warm_nx,
    'Ideal': ideal_nx,
    'Random_std': random_nx_std,
    'Warm_std': warm_nx_std,
    'Ideal_std': ideal_nx_std
})

data_melted = pd.melt(data, id_vars='Number of experiments', value_vars=['Random', 'Warm', 'Ideal'], var_name='Method', value_name='Number of data points w/ non-zero gradient')
data_melted_std = pd.melt(data, id_vars='Number of experiments', value_vars=['Random_std', 'Warm_std', 'Ideal_std'], var_name='Method', value_name='Standard deviation')

sns.lineplot(data=data_melted, x='Number of experiments', y='Number of data points w/ non-zero gradient', hue='Method', ax = ax[2], legend=False, palette=['green', 'navy', 'red'])

for method, color in zip(['Random', 'Warm', 'Ideal'], ['green', 'navy', 'red']) :
    ax[2].fill_between(data['Number of experiments'],
                     data_melted[data_melted['Method'] == method]['Number of data points w/ non-zero gradient'] - data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                     data_melted[data_melted['Method'] == method]['Number of data points w/ non-zero gradient'] + data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                     alpha=0.1, color=color)

ax[2].set_xlabel('Number of Experiments', fontsize=20)
ax[2].set_ylabel('Number of Non-Zero Gradient Data', fontsize=15)


random_f = np.mean(np.sum([np.sum(Learn_F[i, :, :, :, 0]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)
warm_f = np.mean(np.sum([np.sum(Learn_F[i, :, :, :, 1]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)
ideal_f = np.mean(np.sum([np.sum(Learn_F[i, :, :, :, 2]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)

random_f_std = np.std(np.sum([np.sum(Learn_F[i, :, :, :, 0]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)
warm_f_std = np.std(np.sum([np.sum(Learn_F[i, :, :, :, 1]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)
ideal_f_std = np.std(np.sum([np.sum(Learn_F[i, :, :, :, 2]==1, axis=1) for i in range(total_seed)], axis=-1), axis=0)

data = pd.DataFrame({
    'Number of experiments': range(1, len(random_f)+1),
    'Random': random_f,
    'Warm': warm_f,
    'Ideal': ideal_f,
    'Random_std': random_f_std,
    'Warm_std': warm_f_std,
    'Ideal_std': ideal_f_std
})

data_melted = pd.melt(data, id_vars='Number of experiments', value_vars=['Random', 'Warm', 'Ideal'], var_name='Method', value_name='Number of learned features')
data_melted_std = pd.melt(data, id_vars='Number of experiments', value_vars=['Random_std', 'Warm_std', 'Ideal_std'], var_name='Method', value_name='Standard deviation')

sns.lineplot(data=data_melted, x='Number of experiments', y='Number of learned features', hue='Method', ax=ax[1], legend=False, palette=['green', 'navy', 'red'])

for method, color in zip(['Random', 'Warm', 'Ideal'], ['green', 'navy', 'red']) :
    ax[1].fill_between(data['Number of experiments'],
                     data_melted[data_melted['Method'] == method]['Number of learned features'] - data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                     data_melted[data_melted['Method'] == method]['Number of learned features'] + data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                      alpha=0.1, color=color)

ax[1].set_xlabel('Number of Experiments', fontsize=20)
ax[1].set_ylabel('Number of Total Learned Features', fontsize=15)
data = pd.DataFrame({
    'Number of experiments': range(1, len(random_f)+1),
    'Random': np.mean(test_acc, axis=0)[:, 0] * 100,
    'Warm': np.mean(test_acc, axis=0)[:, 1] * 100,
    'Ideal': np.mean(test_acc, axis=0)[:, 2] * 100,
    'Random_std': np.std(test_acc, axis=0)[:, 0] * 100,
    'Warm_std': np.std(test_acc, axis=0)[:, 1] * 100,
    'Ideal_std': np.std(test_acc, axis=0)[:, 2] * 100,
})

data_melted = pd.melt(data, id_vars='Number of experiments', value_vars=['Random', 'Warm', 'Ideal'], var_name='Method', value_name='Number of learned features')
data_melted_std = pd.melt(data, id_vars='Number of experiments', value_vars=['Random_std', 'Warm_std', 'Ideal_std'], var_name='Method', value_name='Standard deviation')

if i == 0:
    g = sns.lineplot(data=data_melted, x='Number of experiments', y='Number of learned features', hue='Method', ax=ax[0], palette=['green', 'navy', 'red'])
else:
    sns.lineplot(data=data_melted, x='Number of experiments', y='Number of learned features', hue='Method', ax=ax[0], legend=False, palette=['green', 'navy', 'red'])
for method, color in zip(['Random', 'Warm', 'Ideal'], ['green', 'navy', 'red']) :
    ax[0].fill_between(data['Number of experiments'],
                     data_melted[data_melted['Method'] == method]['Number of learned features'] - data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                     data_melted[data_melted['Method'] == method]['Number of learned features'] + data_melted_std[data_melted_std['Method'] == method+'_std']['Standard deviation'],
                     alpha=0.1, color=color)


ax[0].set_xlabel('Number of Experiments', fontsize=20)
ax[0].set_ylabel('Test Accuracy (%)', fontsize=20)
if i == 0:
    g.legend_.set_title('')
    handles, labels = ax[i, 0].get_legend_handles_labels()
    ax[0].legend(handles, labels, fontsize=20)   

plt.tight_layout()
    
plt.show()