## 実験4 : $p_l$ に対する $p_u$ の割合による性能

@fig_test_with_various_labeled に示すようにラベル付きデータがテストデータの20%以上のときにエラー率が低くなるので，20％に固定する．

その上で，ラベル無しデータのラベル付きデータに対する割合を0%から50％刻みで400%（$\frac{100-20}{20}$）まで変えてみる．結果を @fig_test_with_various_unlabeled に示す．

$p_u=0$ のとき，半教師ではない教師付きC-SVMに等しい．


In [None]:
#(5)
#| echo: false
def counter2hms(c):
    H = 60*60
    M = 60
    h, c = c//H, c%H
    m, s = c//M, c%M
    return int(h),int(m),int(s)

In [None]:
#(9)
#| label: fig_test_with_various_unlabeled_OvO
#| fig-cap: "Testing with Various Unlabeled Data Proportion (OvO)"
#| output: true
from qns3vm import QN_S3VM_OVO
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import random
from matplotlib import style
from matplotlib import pyplot as plt
style.use(['science','ieee'])
from time import sleep, perf_counter
from s3vm_pines.module import train_test_split3, labeled_unlabeled_sample, colored_map
from IndianPines import load, recategorize17to10_csv

pines = load(pca=20,recategorize_rule=recategorize17to10_csv)

train_test_status, train_test_status_name = train_test_split3()

rg = random.Random()

prop_train_l = 0.2
prop_train_u_list = np.arange(50,(1-prop_train_l)/(prop_train_l)*100,50)/100

seed_l = None
seed_u = None
lam = 0.008765625
lamU = 1e-7 # 0に近いと完全教師

p_labeled = prop_train_l

acc = list()
err = list()
f1 = list()
fig8 = [0 for _ in prop_train_u_list]
ax8 = [0 for _ in prop_train_u_list] * 3
t_start = perf_counter()
t_old = t_start
for i, prop_train_u in enumerate(prop_train_u_list):
    print('='*3 + f'{prop_train_u = }' + '='*3)
    p_unlabeled = p_labeled * prop_train_u
    l_u_t_conf = dict(
        p_labeled = p_labeled,
        p_unlabeled = p_unlabeled,
        train_test_status = train_test_status,
        seed_labeled = seed_l,
        seed_unlabeled = seed_u
    )
    l_u_t_status, l_u_t_status_name = labeled_unlabeled_sample(**l_u_t_conf)

    for j, s in enumerate(l_u_t_status_name):
        n = l_u_t_status[l_u_t_status==j].shape[0]
        print(f'{j}:{s}:{n}')

    X_train_l = pines.features[l_u_t_status==3].tolist()
    L_train_l = pines.target[l_u_t_status==3].tolist()
    X_train_u = pines.features[l_u_t_status==4].tolist()
    L_train_u = pines.target[l_u_t_status==4].tolist()
    X_test = pines.features[l_u_t_status==1].tolist()
    L_test = pines.target[l_u_t_status==1].tolist()

    fig8[i] = plt.figure(figsize=(6.4*2,4.8*4), constrained_layout=True)

    ax8[3*i+0] = fig8[i].add_subplot(4,2,1)
    colored_map(ax8[3*i+0], pines.target[l_u_t_status==3], pines.coordinates[l_u_t_status==3], recategorize_rule=recategorize17to10_csv, gt_gic=False)
    ax8[3*i+0].set_title(f'labeled train area {prop_train_u =}')

    ax8[3*i+1] = fig8[i].add_subplot(4,2,3)
    colored_map(ax8[3*i+1], pines.target[l_u_t_status==4], pines.coordinates[l_u_t_status==4], recategorize_rule=recategorize17to10_csv, gt_gic=False)
    ax8[3*i+1].set_title(f'unlabeled train area {prop_train_u =}')

    clf = QN_S3VM_OVO(X_train_l, L_train_l, X_train_u, random_generator=rg, lam=lam, lamU=lauU)
    clf.train()
    preds = clf.predict(X_test)

    ax8[3*i+2] = fig8[i].add_subplot(2,1,2)
    colored_map(ax8[3*i+2], preds, pines.coordinates[l_u_t_status==1], recategorize_rule=recategorize17to10_csv, gt_gic=False)
    ax8[3*i+1].set_title(f'predicted label for test data {prop_train_u =}')

    fig8[i].tight_layout(rect=[0,0,1,0.96])
    fig8name=f'kotaro_exp04_fig08_{i:0d}.png'
    fig8[i].savefig(fig8name)

    err_ = classification_error(preds,L_test)
    err.append(err_)
    acc_ = accuracy_score(L_test, preds)
    acc.append(acc)
    f1_ = f1_score(L_test, preds, average="micro")
    f1.append(f1_)

    # counter
    t_now = perf_counter()
    t_lap = t_now-t_old
    t_ave = (t_now-t_start)/(i+1)
    t_rest = t_ave * (len(prop_train_u_list)-(i+1))
    ho,mo,so = counter2hms(t_old)
    hl,ml,sl = counter2hms(t_lap)
    hr,mr,sr = counter2hms(t_rest)
    print(f'lap {i+1} | {hl:2d}:{ml:2d}:{sl:2d} + {ho:2d}:{mo:2d}:{so:2d}, est {hr:2d}:{mr:2d}:{sr:2d}')
    t_old = t_now

fig9 = plt.figure()
ax9 = fig9.add_subplot(1,1,1)
ax9.plot(prop_train_u_list, acc, 'o', label='accuracy score')
ax9.plot(prop_train_u_list, f1, 's--', label='f1 score')
ax9.legend()
ax9.set_xlabel(r'Amount of Unlabeled Data [%%]')
ax9.set_ylabel(r'Accuracy Score [%%]')
ax9.set_title('Testing with Various Unlabeled Data Proportion')
fig9.tight_layout(rect=[0,0,1,0.96])
fig9.savefig("kotaro_exp04_fig09.png")