# Sample of Semi-supervised Learning Methods

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
SAMPLE_X = torch.rand(8, 1, 32)
SAMPLE_Y = torch.randint(2, [8])
EPOCH = 20
MASK = torch.tensor([True, True, False, False, False, False, False, False])


In [4]:
class CuteModel(nn.Module):
    def __init__(self):
        super(CuteModel, self).__init__()
        self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=2)
        self.enc_layer2 = nn.Conv1d(8, 8, kernel_size=5, stride=2)
        
        self.decoder = nn.Linear(8, 6)
        
    def forward(self, x):
        x = self.enc_layer1(x)
        x = self.enc_layer2(x)
        
        x = x.mean(dim=-1)
        
        x = self.decoder(x)
        return x

## 1. Pseudo-Labeling
either on probability or cross entropy
temporary vs permanently
threshold

here

In [5]:
model = CuteModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

THRES = 0.25  # this is very low value for tutorial. You should use higher value(0.7~)

for e in range(EPOCH):
    p = model(SAMPLE_X)
    pred = torch.argmax(p, dim=1)
    
    prob = F.softmax(p, dim=-1)
    PSEUDO = (~MASK) & (prob.max(dim=-1)[0]>THRES)
    print(PSEUDO)
    p_pseudo = torch.cat([p[MASK], p[PSEUDO]], dim=0)
    y_pseudo = torch.cat([SAMPLE_Y[MASK], pred[PSEUDO]], dim=0)
    
    if PSEUDO.any() or M.any():
        loss = nn.CrossEntropyLoss()(p_pseudo, y_pseudo)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, False, False, False, False, False, False])
tensor([False, False, Fal

## 2. Entropy Minimization

In [6]:
def HLoss(x):
    b = F.softmax(x, dim=1) * F.log_softmax(x, dim=1)
    b = -1.0 * b.mean()
    return b

In [7]:
model = CuteModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

BETA = 10

for e in range(EPOCH):
    p = model(SAMPLE_X)
    pred = torch.argmax(p, dim=1)
    
    class_loss = nn.CrossEntropyLoss()(p[MASK], SAMPLE_Y[MASK])
    entropy_loss = HLoss(p[~MASK])
    
    loss = class_loss + BETA*entropy_loss
    print('[total] : %.5f,\t[CrossEntropy] : %.5f,\t[Entropy] : %.5f'%(loss.item(), class_loss.item(), entropy_loss.item()))
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

[total] : 4.88817,	[CrossEntropy] : 1.97144,	[Entropy] : 0.29167
[total] : 4.87417,	[CrossEntropy] : 1.95788,	[Entropy] : 0.29163
[total] : 4.86012,	[CrossEntropy] : 1.94452,	[Entropy] : 0.29156
[total] : 4.84598,	[CrossEntropy] : 1.93134,	[Entropy] : 0.29146
[total] : 4.83171,	[CrossEntropy] : 1.91829,	[Entropy] : 0.29134
[total] : 4.81731,	[CrossEntropy] : 1.90534,	[Entropy] : 0.29120
[total] : 4.80271,	[CrossEntropy] : 1.89246,	[Entropy] : 0.29103
[total] : 4.78790,	[CrossEntropy] : 1.87960,	[Entropy] : 0.29083
[total] : 4.77283,	[CrossEntropy] : 1.86675,	[Entropy] : 0.29061
[total] : 4.75747,	[CrossEntropy] : 1.85386,	[Entropy] : 0.29036
[total] : 4.74179,	[CrossEntropy] : 1.84092,	[Entropy] : 0.29009
[total] : 4.72577,	[CrossEntropy] : 1.82791,	[Entropy] : 0.28979
[total] : 4.70935,	[CrossEntropy] : 1.81479,	[Entropy] : 0.28946
[total] : 4.69252,	[CrossEntropy] : 1.80156,	[Entropy] : 0.28910
[total] : 4.67524,	[CrossEntropy] : 1.78817,	[Entropy] : 0.28871
[total] : 4.65748,	[Cross

## 3. Consistency Regularization
you can give perturbation by temporal shift, adding noise, adversarial training, etc.
It is important that the perturbation should be realistic

In [8]:
model = CuteModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

BETA = 500

for e in range(EPOCH):
    pp = torch.randint(1,16,[1])
    X_original = SAMPLE_X[:,:,:16]        # here only used the half of the input
    X_perturbed = SAMPLE_X[:,:,pp:pp+16]  # perturbation : shifting
    
    p_original = model(X_original)
    p_perturbed = model(X_perturbed)
    
    class_loss = nn.CrossEntropyLoss()(p_original[MASK], SAMPLE_Y[MASK])
    dist_loss = nn.MSELoss()(p_original[~MASK], p_perturbed[~MASK])
    
    loss = class_loss + BETA*dist_loss
    print('[total] : %.5f,\t[CrossEntropy] : %.5f,\t[Distance] : %.5f'%(loss.item(), class_loss.item(), dist_loss.item()))
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

[total] : 6.10509,	[CrossEntropy] : 1.60278,	[Distance] : 0.00900
[total] : 5.05081,	[CrossEntropy] : 1.60062,	[Distance] : 0.00690
[total] : 5.79912,	[CrossEntropy] : 1.59822,	[Distance] : 0.00840
[total] : 4.26081,	[CrossEntropy] : 1.59619,	[Distance] : 0.00533
[total] : 3.30770,	[CrossEntropy] : 1.59389,	[Distance] : 0.00343
[total] : 3.77460,	[CrossEntropy] : 1.59199,	[Distance] : 0.00437
[total] : 6.87218,	[CrossEntropy] : 1.59002,	[Distance] : 0.01056
[total] : 5.72307,	[CrossEntropy] : 1.58821,	[Distance] : 0.00827
[total] : 4.98811,	[CrossEntropy] : 1.58618,	[Distance] : 0.00680
[total] : 3.13149,	[CrossEntropy] : 1.58424,	[Distance] : 0.00309
[total] : 4.53216,	[CrossEntropy] : 1.58231,	[Distance] : 0.00590
[total] : 4.53355,	[CrossEntropy] : 1.58044,	[Distance] : 0.00591
[total] : 3.19068,	[CrossEntropy] : 1.57846,	[Distance] : 0.00322
[total] : 3.91504,	[CrossEntropy] : 1.57644,	[Distance] : 0.00468
[total] : 3.52021,	[CrossEntropy] : 1.57452,	[Distance] : 0.00389
[total] : 