In [None]:
#import packages
import numpy as np
import random
import torch
import os
from PIL import Image
from torch.utils import data
from torchvision import transforms as T
import torchvision.transforms.functional as tf
from torch.utils.data import DataLoader
from math import sqrt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
import torchvision
import matplotlib.pyplot as plt

In [None]:
def seed_everything(SEED=42):
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True # keep True if all the input have same size.
SEED=42
seed_everything(SEED=SEED)

In [None]:
# customize dataset
class LipTrainDataset(data.Dataset):
    def __init__(self, file_path=None, transform1=None,transform2=None,table=None):   
        """
        初始化自定义Dataset类的参数
        Attributes
            file_path: 字符串，数据集的存储路径，例如‘./UCF101/train’ 或 './UCF101/eval'等
            classes  : 列表，每个元素为一个字符串，代表一个子类别，例如['dog', 'airplane', ...]等
            transform: 传入一个从torchvision.transforms定义的数据预处理
        """
        self.file_path = file_path
        self.transform1 = transform1
        self.transform2 = transform2
        self.table = [1] * 256
        self.table[0] = 0
        # 初始化给定文件夹下的所有数据
        self.init_all_data(file_path) 
        return None
    def init_all_data(self, file_path):
        """
        初始化该数据集内所有的图像及其对应的标签的位置，保存在self.images和self.labels两个列表内
        Attributes
            file_path: 字符串，数据集文件夹的存储路径
        """
        # 初始化两个列表，记录该数据集内每一张图片的完整路径及其对应的标签
        self.images = []
        self.labels = []
        #because the whole set is too big, I only 300 to do a trial first
        number = 300################len(totallist)
       
    # 遍历所有的子类别，并得到每个子类别对应的文件夹路径
        
        totallist = [line.rstrip('\n') for line in
                        open(os.path.join(self.file_path, 'TrainVal_pose_annotations', 'lip_train_set.csv'), 'r')]
        for idx in range(number):##################################################################################
            tokens = totallist[idx].split(',')
            i = tokens[0].split('.')[0]+'.png'
            image_path = os.path.join(self.file_path, 'TrainVal_images', 'TrainVal_images', 'train_images', tokens[0])
            label_path = os.path.join(self.file_path, 'TrainVal_parsing_annotations', 'TrainVal_parsing_annotations','TrainVal_parsing_annotations','train_segmentations', i)
            if self.is_valid_image(image_path):
                self.images.append(image_path)
                self.labels.append(label_path)   
        self.images.sort()
        self.labels.sort()
        return None               
    def is_valid_image(self, img_path):
        """
        判断图片是否为可以打开的有效文件
        Attributes
            img_path: 字符串，待检测图片的存储路径
        Returns
            valid: 布尔变量，True/False分别表示该图片是否可以正常打开
        """
        try:
            # 若读取成功，设valid为True
            i = Image.open(img_path)
            valid = True
        except:
            # 若读取失败，设valid为False
            valid = False
            
        return valid        

    def __getitem__(self, idx):
        """
        按给定索引，获取对应的图片及其标签
        Attributes
            idx: int类型数字，表示目标图像的索引
        Returns
            image: 一个打开的PIL.Image对象，是PIL库存储图像的一种数据格式（类似于OpenCV利用numpy张量存储图像）
            label: 
        """
        # 利用PIL.Image.open打开图片，并将其强制转化为RGB格式（防止数据集中混杂灰度图，导致读取出单通道图片，送入网络因矩阵维度不一致而报错）
        image = Image.open(self.images[idx]).convert('RGB')
        # 获取对应的标签
        label = Image.open(self.labels[idx])
        label=label.point(self.table,'1')
        # 进行预处理的变换
        if self.transform1:
            image = self.transform1(image)
        if self.transform2:
            label = self.transform2(label) 
                         
        return image, label   
    def __len__(self):
        """
        获取数据集中图像的总数，该方法的作用是用于DataLoader去调用，从而获取在给定Batch Size的情况下，一个Epoch的总长，
        从而可以在一个Epoch结束时实现shuffle数据集的功能
        """
        return len(self.images)
    
    
class LipValDataset(data.Dataset):
    def __init__(self, file_path=None, transform1=None,transform2=None,table=None):   
        
        self.file_path = file_path
        self.transform1 = transform1
        self.transform2 = transform2
        self.table = [1] * 256
        self.table[0] = 0
        # 初始化给定文件夹下的所有数据
        self.init_all_data(file_path) 
        return None
    def init_all_data(self, file_path):
       
        # 初始化两个列表，记录该数据集内每一张图片的完整路径及其对应的标签
        self.images = []
        self.labels = []
        # 遍历所有的子类别，并得到每个子类别对应的文件夹路径
        number=300######################len(totallist)
        totallist = [line.rstrip('\n') for line in
                        open(os.path.join(self.file_path, 'TrainVal_pose_annotations', 'lip_val_set.csv'), 'r')]
        for idx in range(number):###########################################################################
            tokens = totallist[idx].split(',')
            i = tokens[0].split('.')[0]+'.png'
            image_path = os.path.join(self.file_path, 'TrainVal_images', 'TrainVal_images', 'val_images', tokens[0])
            label_path = os.path.join(self.file_path, 'TrainVal_parsing_annotations', 'TrainVal_parsing_annotations','TrainVal_parsing_annotations','val_segmentations', i)
            if self.is_valid_image(image_path):
                self.images.append(image_path)
                self.labels.append(label_path) 
        self.images.sort()
        self.labels.sort()
        return None               
    def is_valid_image(self, img_path):
        
        try:
            # 若读取成功，设valid为True
            i = Image.open(img_path)
            valid = True
        except:
            # 若读取失败，设valid为False
            valid = False
            
        return valid        

    def __getitem__(self, idx):
       
        # 利用PIL.Image.open打开图片，并将其强制转化为RGB格式（防止数据集中混杂灰度图，导致读取出单通道图片，送入网络因矩阵维度不一致而报错）
        image = Image.open(self.images[idx]).convert('RGB')
        # 获取对应的标签
        label = Image.open(self.labels[idx])
        label=label.point(self.table,'1')
        
        # 进行预处理的变换
        if self.transform1:
            image = self.transform1(image)
        if self.transform2:
            label = self.transform2(label)              
        return image, label   
    def __len__(self):
        
        return len(self.images)
    
class LipTestDataset(data.Dataset):
    def __init__(self, file_path=None, transform=None):   
       
        self.file_path = file_path
        self.transform = transform
        # 初始化给定文件夹下的所有数据
        self.init_all_data(file_path) 
        return None

    def init_all_data(self, file_path):
       
        # 初始化两个列表，记录该数据集内每一张图片的完整路径及其对应的标签
        self.images = []
        # 遍历所有的子类别，并得到每个子类别对应的文件夹路径
        
        totallist = [line.rstrip('\n') for line in
                        open(os.path.join(self.file_path, 'Testing_images', 'test_id.txt'), 'r')]
        for idx in range(len(totallist)):
            tokens = totallist[idx]
            i = tokens+'.jpg'
            image_path = os.path.join(self.file_path, 'Testing_images', 'Testing_images','testing_images', i)
            if self.is_valid_image(image_path):
                self.images.append(image_path)       
        return None               
    def is_valid_image(self, img_path):
        
        try:
            # 若读取成功，设valid为True
            i = Image.open(img_path)
            valid = True
        except:
            # 若读取失败，设valid为False
            valid = False
            
        return valid        

    def __getitem__(self, idx):
       
        # 利用PIL.Image.open打开图片，并将其强制转化为RGB格式（防止数据集中混杂灰度图，导致读取出单通道图片，送入网络因矩阵维度不一致而报错）
        image = Image.open(self.images[idx]).convert('RGB')
        
        # 进行预处理的变换
        if self.transform:
            image = self.transform(image)       
        return image 
    def __len__(self):
        
        return len(self.images)

In [None]:
#transform because the random crop is random, to guarantee the same operation on lable and image, i do not include them here
#another version include same operation on images and lable in another file
transform_train = T.Compose([   
        T.Resize([256, 256]),
        T.CenterCrop([224,224]),
        T.ToTensor(),
        T.Normalize(mean= [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
transform_label = T.Compose([   
        T.Resize([256, 256]),
        T.CenterCrop([224,224]),
        T.ToTensor(),
        T.Normalize((0.5), (0.5))
        ])      
# transform_test = T.Compose([
#         T.Resize([224, 224]),
#         T.ToTensor(),
#         T.Normalize(mean= [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#         ])  
        

In [None]:
traindataset = LipTrainDataset('../input/singleperson',transform1=transform_train,transform2=transform_label)
train_loader = DataLoader(traindataset, batch_size=32, shuffle=True, num_workers=2)
valdataset = LipValDataset('../input/singleperson',transform1=transform_train, transform2=transform_label)
val_loader = DataLoader(valdataset, batch_size=64, shuffle=True, num_workers=2)
# testdataset = LipTestDataset('../input/singleperson',transform=transform_test)
# test_loader = DataLoader(testdataset, batch_size=64, shuffle=True, num_workers=2)

In [None]:
import torch.nn as nn

from math import sqrt
class Double_Conv_Block(nn.Module):
    def __init__(self, input_channel, output_channel):
        super(Double_Conv_Block, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_channel, output_channel, kernel_size=3, stride=1, padding=1, bias=True),
            #if there is padding=1,then no crop
            nn.BatchNorm2d(output_channel),
            nn.ReLU(inplace=True),
            nn.Conv2d(output_channel, output_channel, kernel_size=3, stride=1, padding=1, bias=True),
            nn.BatchNorm2d(output_channel),
            nn.ReLU(inplace=True)
        )

    def forward(self, input_channel): 
        return self.conv(input_channel)

class Up_Conv_Block(nn.Module): #Up sampling
    def __init__(self, input_channel, output_channel):
        super(Up_Conv_Block, self).__init__()
        self.up = nn.Sequential(
            nn.Upsample(scale_factor=2),
            nn.Conv2d(input_channel, output_channel, kernel_size=3, stride=1, padding=1, bias=True),
            nn.BatchNorm2d(output_channel),
            nn.ReLU(inplace=True)
        )

    def forward(self, input_channel):
        
        return self.up(input_channel)


class U_Net(nn.Module):
    def __init__(self, img_ch=3, output_ch=1):
        #img_ch=3 when RGB
        #output_ch=1 for our project
        super(U_Net, self).__init__()

        self.Maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.Conv1 = Double_Conv_Block(input_channel=img_ch, output_channel=64)
        self.Conv2 = Double_Conv_Block(input_channel=64, output_channel=128)
        self.Conv3 = Double_Conv_Block(input_channel=128, output_channel=256)
        self.Conv4 = Double_Conv_Block(input_channel=256, output_channel=512)
        self.Conv5 = Double_Conv_Block(input_channel=512, output_channel=1024)

        self.Up5 = Up_Conv_Block(input_channel=1024, output_channel=512)
        self.Up_Conv_Block5 = Double_Conv_Block(input_channel=1024, output_channel=512)

        self.Up4 = Up_Conv_Block(input_channel=512, output_channel=256)
        self.Up_Conv_Block4 = Double_Conv_Block(input_channel=512, output_channel=256)

        self.Up3 = Up_Conv_Block(input_channel=256, output_channel=128)
        self.Up_Conv_Block3 = Double_Conv_Block(input_channel=256, output_channel=128)

        self.Up2 = Up_Conv_Block(input_channel=128, output_channel=64)
        self.Up_Conv_Block2 = Double_Conv_Block(input_channel=128, output_channel=64)

        self.Conv_1x1 = nn.Conv2d(64, output_ch, kernel_size=1, stride=1, padding=0)
        
        #initialize weight
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def forward(self, x):
        # downsample 
        x1 = self.Conv1(x)

        x2 = self.Maxpool(x1)
        x2 = self.Conv2(x2)

        x3 = self.Maxpool(x2)
        x3 = self.Conv3(x3)

        x4 = self.Maxpool(x3)
        x4 = self.Conv4(x4)

        x5 = self.Maxpool(x4)
        x5 = self.Conv5(x5)

        # up sample + concat 
        d5 = self.Up5(x5)
        d5 = torch.cat((x4, d5), dim=1)

        d5 = self.Up_Conv_Block5(d5)

        d4 = self.Up4(d5)
        d4 = torch.cat((x3, d4), dim=1)
        d4 = self.Up_Conv_Block4(d4)

        d3 = self.Up3(d4)
        d3 = torch.cat((x2, d3), dim=1)
        d3 = self.Up_Conv_Block3(d3)

        d2 = self.Up2(d3)
        d2 = torch.cat((x1, d2), dim=1)
        d2 = self.Up_Conv_Block2(d2)

        d1 = self.Conv_1x1(d2)

        return d1

        

In [None]:
model = U_Net().cuda()


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs=70
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0= 5, T_mult=1, eta_min=0, last_epoch=-1)

In [None]:
trainloss=[]
valloss=[]
train_acc=[]
val_acc=[]
lrs = []
iters = len(train_loader)
    
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    model.train()
    running_loss = 0.0
    running_corrects = 0
                      
    for step,(inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            o = outputs.cuda().data.cpu().numpy()
            preds = torch.tensor((o>0.5).astype(np.float32)).to(device)
            loss = criterion(outputs, labels)

                    
            lrs.append((step, optimizer.param_groups[0]['lr']))
            loss.backward()
            optimizer.step()
            #scheduler.step(epoch + step / iters)
                        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(traindataset)
    epoch_acc = running_corrects.double() / len(traindataset) / outputs.size(-1)**2
    trainloss.append(epoch_loss)
    train_acc.append(float(epoch_acc))
    
    
    model.eval()
    running_loss = 0.0
    running_corrects = 0
                      
    for step,(inputs, labels) in enumerate(val_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            o = outputs.cuda().data.cpu().numpy()
            preds = torch.tensor((o>0.5).astype(np.float32)).to(device)
            loss = criterion(outputs, labels)

                    
            
                        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valdataset)
    epoch_acc = running_corrects.double() / len(valdataset) / outputs.size(-1)**2
    valloss.append(epoch_loss)
    val_acc.append(float(epoch_acc))

    print('Train Loss: {:.4f} Train Acc: {:.4f} Val Loss: {:.4f} Val Acc: {:.4f}'.format(
    trainloss[-1], train_acc[-1],valloss[-1], val_acc[-1]))
print()


In [None]:
EPOCH=40
train_num = len(traindataset)
val_num = len(valdataset)
losses=[]
eval_accs=[]
train_accs=[]
for epoch in range(EPOCH):
    # train
    model.train()
    running_loss = 0.0
    train_acc=[]
    acc = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        logits = model(images.cuda())
        o = logits.cuda().data.cpu().numpy()
        preds = torch.tensor((o>0.5).astype(np.float32)).to(device)
        loss = criterion(logits, labels.cuda())
        
        loss.backward()
        optimizer.step()
        
        #predict_y = torch.max(logits, dim=1)[1]
        acc += (preds == labels.cuda()).sum().item()
        train_accurate = acc / train_num /logits.size(-1)**2
        train_acc.append(train_accurate)

        # print statistics
        running_loss += loss.item()
        # print train process
        rate = (step+1)/len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="")
    print()
    train_accs.append(train_acc[-1])

    # validate
    torch.cuda.empty_cache()
    model.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in val_loader:
            val_images, val_labels = val_data
            outputs = model(val_images.cuda())  # eval model only have last output layer
            # loss = criterion(outputs, test_labels)
            o = outputs.cuda().data.cpu().numpy()
            preds = torch.tensor((o>0.5).astype(np.float32)).to(device)
            
            acc += (preds == val_labels.cuda()).sum().item()
            last_eval = {'image': val_images, 'mask': val_labels, 'output': outputs, 'pred': preds}
        val_accurate = acc / val_num / outputs.size(-1)**2
        eval_accs.append(val_accurate)
        if val_accurate > best_acc:
            best_acc = val_accurate
            
        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f train_accuracy: %.3f   ' %
              (epoch + 1, running_loss / step, val_accurate,train_accs[-1]))
        losses.append(running_loss / step)

print('Finished Training')

In [None]:
def visualize_performance(dic):##this can visualize the validate part in te last epoch
    for k,v in dic.items():
        im = torchvision.utils.make_grid(v[:8,:,:,:], nrow=4)
        im = im.cuda().data.cpu().numpy().transpose((1, 2, 0))
        if k == 'image':
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            im = std * im + mean
        im = np.clip(im, 0, 1)
        plt.imshow(im);
        
        plt.title(k)
        plt.pause(0.001)

In [None]:
visualize_performance(last_eval)

In [None]:
x=np.arange(len(losses))
plt.plot(x,losses)
#plt.plot(x,eval_losses)
plt.title('Loss ')
plt.xlabel('Epoch')
plt.ylabel('Loss')
#plt.legend
plt.show()

In [None]:
x=np.arange(len(train_accs))

plt.plot(x,train_accs,label='train')
plt.plot(x,eval_accs,label='validate')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
x=np.arange(len(train_accs))
plt.plot(x,train_accs,label='train')
plt.plot(x,eval_accs,label='validate')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()