In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import math
import copy
import scipy.io as scio
from PIL import Image
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.semi_supervised import LabelPropagation, LabelSpreading

In [None]:
use_gpu = False
device = torch.device('cuda' if use_gpu else 'cpu') 

In [None]:
model_resnet = torchvision.models.resnet50(pretrained=True)
model_resnet = model_resnet.to(device)
# model_resnetCifar10 = 
model_resFeats = nn.Sequential(*list(model_resnet.children()))[:-1]

In [26]:
data_path = 'C:/Users/admin/Desktop/data/cifar10.mat'
batch_size = 8
numLabels = 800
num_iteration = 20

In [27]:
data_transforms = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [28]:
def samplesForOneExperiment(data_path,numLabels):
    dataCifar10 = scio.loadmat(data_path)
    xTr = dataCifar10['xTr'].astype('float')
    yTr = dataCifar10['yTr'].astype('float')
    xTe = dataCifar10['xTe'].astype('float')
    yTe = dataCifar10['yTe'].astype('float')
    xTmp1, xTmp2, yTmp1, yTmp2 = train_test_split(xTr, yTr, test_size=35000, random_state=42, shuffle=True)
    xTr1, xUn1, yTr1, yUn1 = train_test_split(xTmp1, yTmp1, test_size=len(yTmp1)-numLabels, random_state=500, shuffle=True)
    return xTr1,yTr1, xUn1, yUn1, xTe, yTe

In [29]:
def CNN_feature_extractor(model,dataX):
    feats = []
    model.eval()
    x_images = dataX.reshape((-1,3,32,32))
    x_images = x_images.transpose((0,2,3,1))
    num_image = np.shape(x_images)[0]
    for i in range(num_image):
        x_images_1 = Image.fromarray(np.uint8(x_images[i,:,:,:]))
        x_images_1 = data_transforms(x_images_1)
        feat_tmp = model(torch.unsqueeze(x_images_1,0))
        feat_tmp = feat_tmp.detach().numpy()
        feat_tmp = np.squeeze(feat_tmp)
        if i==0:
            feats = feat_tmp
        else:
            feats = np.vstack((feats,feat_tmp))
    return feats

In [30]:
def train_model(xTr,yTr,model,criterion, optimizer, scheduler, num_epochs=25):
    model.train()
    since = time.time()
    dataset_sizes = len(yTr)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    model.train()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 30)

        scheduler.step()
        model.train(True)  # 设置 model 为训练 (training) 模式

        running_loss = 0.0
        running_corrects = 0

            # 遍历数据
        mini_batches = random_mini_batches(xTr,yTr,mini_batch_size=batch_size,seed=0)
        for batch_idx, (inputs, labels) in enumerate(mini_batches):
            x_images = inputs.reshape((-1,3,32,32))
            x_images = x_images.transpose((0,2,3,1))
            num_image = np.shape(x_images)[0]
            inputs1 = torch.zeros(num_image,3,224,224,dtype = torch.float)
            for i in range(num_image):
                x_images_1 = Image.fromarray(np.uint8(x_images[i,:,:,:]))
                x_images_1 = data_transforms(x_images_1)
                inputs1[i,:,:,:] = x_images_1
            
            # 用 Variable 包装输入数据
            if use_gpu:
                inputs = Variable(inputs1.cuda().cuda())
                labels = Variable(torch.from_numpy(labels).cuda())
            else:
                inputs, labels = Variable(inputs1), Variable(torch.from_numpy(labels))
            
            # 设置梯度参数为 0
            optimizer.zero_grad()
            # 正向传递
            #print( inputs.size())
            outputs = model(inputs)
            #print( outputs.size())
            _, preds = torch.max(outputs.data, 1)
            #print(preds.size())
            #print(labels.size())
            #print(outputs)
            labels = torch.Tensor.long(labels)
            #print(labels.dtype)
            loss = criterion(outputs, labels)
            # 如果是训练阶段, 向后传递和优化
            loss.backward()
            optimizer.step()
            # 统计
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        running_corrects = running_corrects.float()
        epoch_loss = running_loss / dataset_sizes
        epoch_acc = running_corrects / dataset_sizes

        print('Loss: {:.4f} Acc: {:.4f}'.format( epoch_loss, epoch_acc))

            # 深拷贝 model
        if  epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    # 加载最佳模型的权重
    model.load_state_dict(best_model_wts)
    return model

In [31]:
def random_mini_batches(X,Y,mini_batch_size= 16, seed=0):

    np.random.seed(seed) #指定随机种子
    m = X.shape[0]
    mini_batches = []

    #第一步：打乱顺序
    permutation = list(np.random.permutation(m)) #它会返回一个长度为m的随机数组，且里面的数是0到m-1
    shuffled_X = X[permutation,:]   #将每一列的数据按permutation的顺序来重新排列。
    shuffled_Y = np.squeeze(Y[permutation])

    num_complete_minibatches = math.floor(m / mini_batch_size) #把你的训练集分割成多少份,请注意，如果值是99.99，那么返回值是99，剩下的0.99会被舍弃
#    print(m)
#    print(num_complete_minibatches)
#    print(np.shape(shuffled_Y))
    for k in range(0,num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size:(k+1)*mini_batch_size,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size:(k+1)*mini_batch_size]
        mini_batch = (mini_batch_X,mini_batch_Y)
        mini_batches.append(mini_batch)

    #如果训练集的大小刚好是mini_batch_size的整数倍，那么这里已经处理完了
    #如果训练集的大小不是mini_batch_size的整数倍，那么最后肯定会剩下一些，我们要把它处理了
    if m % mini_batch_size != 0:
        #获取最后剩余的部分
        mini_batch_X = shuffled_X[mini_batch_size * num_complete_minibatches:,:]
        mini_batch_Y = shuffled_Y[mini_batch_size * num_complete_minibatches:]

        mini_batch = (mini_batch_X,mini_batch_Y)
        mini_batches.append(mini_batch)

    return mini_batches

In [32]:
num_inFeats = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(num_inFeats, 10)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_resnet.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [33]:
xTr,yTr,xUn,yUn,xTe,yTe = samplesForOneExperiment(data_path,numLabels)

for iter1 in range(0,num_iteration):
    # Extract data features, which takes a lot of time
    xTr_features = CNN_feature_extractor(model_resFeats,xTr)
    xUn_features = CNN_feature_extractor(model_resFeats,xUn)
    # SSL, obtain psudo-labels of unlabeled samples
    dataX = np.vstack((xTr_features,xUn_features)).astype(np.float64)
    dataY = np.vstack((yTr,yUn))
    numSamples = len(dataY)
    ind_unlabeled = np.arange(numLabels,numSamples)
    dataY[ind_unlabeled] = -1
    cls=LabelSpreading(max_iter=150,kernel='rbf', gamma = 0.003)
    cls.fit(preprocessing.scale(dataX),dataY.ravel())
    predicted_labels=cls.transduction_[ind_unlabeled]
    print("Semi-Supervised Learning Accuracy:%f"%metrics.accuracy_score(yUn,predicted_labels))
    # Finetune the network using given labels and psudo-labels
    model_resnet = train_model(xTr,yTr,model_resnet, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=1)
    model_resnet = train_model(xUn,predicted_labels,model_resnet, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=0)



Semi-Supervised Learning Accuracy:0.876901
Epoch 0/0
------------------------------
Loss: 0.9745 Acc: 0.7212

Training complete in 5m 44s
Best val Acc: 0.721250
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.890352
Epoch 0/0
------------------------------
Loss: 0.2550 Acc: 0.9362

Training complete in 5m 58s
Best val Acc: 0.936250
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900282
Epoch 0/0
------------------------------
Loss: 0.0713 Acc: 0.9912

Training complete in 5m 58s
Best val Acc: 0.991250
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.899789
Epoch 0/0
------------------------------
Loss: 0.0307 Acc: 0.9987

Training complete in 5m 59s
Best val Acc: 0.998750
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.899930
Epoch 0/0
------------------------------
Loss: 0.0152 Acc: 1.0000

Training complete in 5m 58s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900352
Epoch 0/0
------------------------------
Loss: 0.0102 Acc: 1.0000

Training complete in 5m 59s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900282
Epoch 0/0
------------------------------
Loss: 0.0081 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0071 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900000
Epoch 0/0
------------------------------
Loss: 0.0070 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.899930
Epoch 0/0
------------------------------
Loss: 0.0069 Acc: 1.0000

Training complete in 5m 54s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900000
Epoch 0/0
------------------------------
Loss: 0.0068 Acc: 1.0000

Training complete in 5m 56s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0067 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0066 Acc: 1.0000

Training complete in 5m 54s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900141
Epoch 0/0
------------------------------
Loss: 0.0065 Acc: 1.0000

Training complete in 5m 54s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900000
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 56s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 54s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 54s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 55s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000




Semi-Supervised Learning Accuracy:0.900070
Epoch 0/0
------------------------------
Loss: 0.0064 Acc: 1.0000

Training complete in 5m 53s
Best val Acc: 1.000000
Training complete in 0m 0s
Best val Acc: 0.000000
