In [12]:
import paddle
import paddle.nn.functional as F
import numpy as np
import pandas as pd
import os
from paddle import nn
from PIL import Image
from paddle.distributed import fleet, get_rank
from visualdl import LogWriter
import matplotlib.pyplot as plt
from paddle.vision.transforms import *
import copy

IMAGE_SIZE = 224
BATCH_SIZE = 5
# EPOCH_NUM = 1

print(paddle.__version__)

2.4.1


In [25]:
def transform(img):
    # 基本变换
    imgs = np.array([img,rotate(img,90),rotate(img,180),rotate(img,270)])
    temp = []
    # 提高亮度
    for img in imgs:
        temp.append(adjust_brightness(img,1))
        temp.append(adjust_brightness(img,3))
        temp.append(adjust_brightness(img,5))
        temp.append(adjust_contrast(img,3))
        temp.append(adjust_contrast(img,1.7))
        temp.append(adjust_contrast(img,-2))
        temp.append(adjust_hue(img,0.5))
    imgs = np.concatenate((imgs, np.array(temp)))
    return imgs

In [23]:
# 定义数据集
class MyDataset(paddle.io.Dataset):
    def __init__(self, img_dir='data/PALM-Training400/', csv_dir='data/Classification.csv') -> None:
        super(MyDataset, self).__init__()
        if csv_dir is None:
            self.csvfile = None
            self.filedir = os.listdir(img_dir)
        else:
            self.csvfile = pd.read_csv(csv_dir)
            self.imgpath = img_dir
        pass
    def __len__(self):
        if self.csvfile is None:
            return len(self.filedir)
        else:
            return len(self.csvfile)
        pass
    def __getitem__(self, idx):
        if self.csvfile is None:
            img = np.reshape(transform(np.array(Image.open(self.imgpath+os.sep+self.filedir[idx]).resize((IMAGE_SIZE,IMAGE_SIZE))).astype('float32')),(3,32,IMAGE_SIZE,IMAGE_SIZE))/256.
            lab = self.filedir[idx]
        else:
            img = np.reshape(transform(np.array(Image.open(self.imgpath+os.sep+self.csvfile['imgName'][idx]).resize((IMAGE_SIZE,IMAGE_SIZE))).astype('float32')),(3,32,IMAGE_SIZE,IMAGE_SIZE))/256.
            lab = np.array(self.csvfile['Label'][idx]).astype('float32')
        return img,lab
    pass
mydataset = MyDataset()

In [11]:
# 定义网络结构
def vgg_block(num_convs, in_channels, out_channels):
    net = [nn.Conv3D(in_channels=in_channels,out_channels=out_channels,kernel_size=3,padding=1),nn.ReLU()]
    for i in range(num_convs-1):
        net.append(nn.Conv3D(out_channels=out_channels,in_channels=out_channels,kernel_size=3,stride=1,padding=1))
        net.append(nn.ReLU())
    net.append(nn.MaxPool3D(kernel_size=2))
    return nn.Sequential(*net)

def vgg_stack(num_convs,channels):
    net = []
    for n,c in zip(num_convs,channels):
        in_c = c[0]
        out_c = c[1]
        net.append(vgg_block(n,in_c,out_c))
    return nn.Sequential(*net)

class VGG(paddle.nn.Layer):
    def __init__(self,vgg_net) -> None:
        super(VGG,self).__init__()
        self.conv = vgg_stack(vgg_net[0],vgg_net[1])
        self.line = nn.Sequential(
            nn.Linear(512*7*7,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,1)
        )
        
    def forward(self,x):
        x = self.conv(x)
        x = paddle.flatten(x, 1, -1)
        x = self.line(x)
        return x

net=VGG([[2,2,3,3,3], [[3,64],[64,128],[128,256],[256,512],[512,512]]])

In [12]:
def train_pm(model, optimizer, train_loader, valid_loader):
    # 开启0号GPU训练
    paddle.device.set_device('gpu:0')

    print('start training ... ')
    model.train()
    print('===')
    for batch_id, data in enumerate(train_loader()):
        print('===')
        print('\r{}/16'.format(batch_id+1),end='')
        x_data, y_data = data
        img = paddle.to_tensor(x_data)
        label = paddle.reshape(paddle.to_tensor(y_data),(-1,1))
        # 运行模型前向计算，得到预测值
        return(img)
        logits = model(img)
        loss = F.binary_cross_entropy_with_logits(logits, label)
        avg_loss = paddle.mean(loss)

        if batch_id % 16 == 15:
            print("epoch: {}, batch_id: {}, loss is: {:.4f}".format(epoch, batch_id, float(avg_loss.numpy())))
        # 反向传播，更新权重，清除梯度
        avg_loss.backward()
        optimizer.step()
        optimizer.clear_grad()

    # model.eval()
    # accuracies = []
    # losses = []
    # for batch_id, data in enumerate(valid_loader()):
    #     x_data, y_data = data
    #     img = paddle.to_tensor(x_data)
    #     label = paddle.reshape(paddle.to_tensor(y_data),(-1,1))
    #     # 运行模型前向计算，得到预测值
    #     logits = model(img)
    #     # 二分类，sigmoid计算后的结果以0.5为阈值分两个类别
    #     # 计算sigmoid后的预测概率，进行loss计算
    #     pred = F.sigmoid(logits)
    #     loss = F.binary_cross_entropy_with_logits(logits, label)
    #     # 计算预测概率小于0.5的类别
    #     pred2 = pred * (-1.0) + 1.0
    #     # 得到两个类别的预测概率，并沿第一个维度级联
    #     pred = paddle.concat([pred2, pred], axis=1)
    #     acc = paddle.metric.accuracy(pred, paddle.cast(label, dtype='int64'))

    #     accuracies.append(acc.numpy())
    #     losses.append(loss.numpy())
    # print("[validation] accuracy/loss: {:.4f}/{:.4f}".format(np.mean(accuracies), np.mean(losses)))
    # model.train()

    # paddle.save(model.state_dict(), 'palmp.pdparams')
    # paddle.save(optimizer.state_dict(), 'palmp.pdopt')

In [13]:
opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=net.parameters())

In [14]:
for i in range(5):
    valid_dataset = copy.deepcopy(mydataset)
    valid_dataset.csvfile=mydataset.csvfile[i*160:i*160+160]
    valid_dataloader=paddle.io.DataLoader(
        valid_dataset,
        drop_last=True,
        batch_size=BATCH_SIZE)
    train_dataset = copy.deepcopy(mydataset)
    train_dataset.csvfile=mydataset.csvfile[0:i*160].append(mydataset.csvfile[i*160+160:])
    train_dataloader=paddle.io.DataLoader(
        train_dataset,
        drop_last=True,
        batch_size=BATCH_SIZE)
    # train_pm(net, opt, train_dataloader, valid_dataloader)

In [15]:
a = net(paddle.randn((5,3,32,224,224)))

In [16]:
a

Tensor(shape=[5, 1], dtype=float32, place=Place(gpu:0), stop_gradient=False,
       [[ 4.01642990],
        [-4.20592785],
        [-4.84662724],
        [-2.21960235],
        [-2.93191361]])

In [17]:
# a = train_pm(net, opt, train_dataloader, valid_dataloader)

In [None]:
dataloader = paddle.io.DataLoader(
    mydataset
)

In [16]:
for x in dataloader():
    print(x)

NameError: name 'dataloader' is not defined

In [26]:
for x in mydataset:
    print(x)

error: OpenCV(4.7.0) /io/opencv/modules/core/src/lut.cpp:366: error: (-215:Assertion failed) (lutcn == cn || lutcn == 1) && _lut.total() == 256 && _lut.isContinuous() && (depth == CV_8U || depth == CV_8S) in function 'LUT'
