In [3]:
# 导入需要的包
import torch
import pandas as pd
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from d2l import torch as d2l
import torchvision.transforms as transforms
from PIL import Image
from torch.utils import data
from tqdm import tqdm

In [4]:
# 读取csv数据
train_data = pd.read_csv('../data/train.csv')
train_data[:6]

Unnamed: 0,image,label
0,images/0.jpg,maclura_pomifera
1,images/1.jpg,maclura_pomifera
2,images/2.jpg,maclura_pomifera
3,images/3.jpg,maclura_pomifera
4,images/4.jpg,maclura_pomifera
5,images/5.jpg,maclura_pomifera


In [5]:
#给类别排序
labels = sorted(list(set(train_data['label'])))
labels_len = len(labels)
print(labels_len)
labels[:5]

176


['abies_concolor',
 'abies_nordmanniana',
 'acer_campestre',
 'acer_ginnala',
 'acer_griseum']

In [6]:
label_to_num = dict(zip(labels, range(labels_len)))#标签和数字对应
print(label_to_num)
num_to_label = dict(zip(range(labels_len),labels))#数字和标签对应
print(num_to_label)

{'abies_concolor': 0, 'abies_nordmanniana': 1, 'acer_campestre': 2, 'acer_ginnala': 3, 'acer_griseum': 4, 'acer_negundo': 5, 'acer_palmatum': 6, 'acer_pensylvanicum': 7, 'acer_platanoides': 8, 'acer_pseudoplatanus': 9, 'acer_rubrum': 10, 'acer_saccharinum': 11, 'acer_saccharum': 12, 'aesculus_flava': 13, 'aesculus_glabra': 14, 'aesculus_hippocastamon': 15, 'aesculus_pavi': 16, 'ailanthus_altissima': 17, 'albizia_julibrissin': 18, 'amelanchier_arborea': 19, 'amelanchier_canadensis': 20, 'amelanchier_laevis': 21, 'asimina_triloba': 22, 'betula_alleghaniensis': 23, 'betula_jacqemontii': 24, 'betula_lenta': 25, 'betula_nigra': 26, 'betula_populifolia': 27, 'broussonettia_papyrifera': 28, 'carpinus_betulus': 29, 'carpinus_caroliniana': 30, 'carya_cordiformis': 31, 'carya_glabra': 32, 'carya_ovata': 33, 'carya_tomentosa': 34, 'castanea_dentata': 35, 'catalpa_bignonioides': 36, 'catalpa_speciosa': 37, 'cedrus_atlantica': 38, 'cedrus_deodara': 39, 'cedrus_libani': 40, 'celtis_occidentalis': 41

In [7]:
# 创建一个继承pytorch的dataset的数据类
class LeavesData(Dataset):
    def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.2, resize_height=224, resize_width=224):
        """
        Args:
            csv_path (string): csv 文件路径
            img_path (string): 图像文件所在路径
            mode (string): 训练模式还是测试模式
            valid_ratio (float): 验证集比例
        """
        
        # 因为可能每张图片的大小尺寸不一致，故统一调整图片尺寸大小
        self.resize_height = resize_height
        self.resize_width = resize_width

        self.file_path = file_path
        self.mode = mode

        # 读取 csv 文件
        # 利用pandas读取csv文件
        self.data_info = pd.read_csv(csv_path, header=None)  #header=None是去掉表头部分
        # 计算 length
        self.data_len = len(self.data_info.index) - 1
        self.train_len = int(self.data_len * (1 - valid_ratio))
        
        if mode == 'train':#训练集
            # 第一列包含图像文件的名称
            self.train_image = self.data_info.iloc[1:self.train_len, 0].to_numpy()  #self.data_info.iloc[1:,0]表示读取第一列，从第二行开始到train_len
            # 第二列是图像的 label
            self.train_label = self.data_info.iloc[1:self.train_len, 1].to_numpy()
            self.image_arr = self.train_image 
            self.label_arr = self.train_label
        elif mode == 'valid':#测试集
            self.valid_image = self.data_info.iloc[self.train_len:, 0].to_numpy()
            self.valid_label = self.data_info.iloc[self.train_len:, 1].to_numpy()
            self.image_arr = self.valid_image
            self.label_arr = self.valid_label
        elif mode == 'test':
            self.test_image = self.data_info.iloc[1:, 0].to_numpy()
            self.image_arr = self.test_image
            
        self.real_len = len(self.image_arr)

        print('Finished reading the {} set of Leaves Dataset ({} samples found)'
              .format(mode, self.real_len))

    def __getitem__(self, index):
        # 从 image_arr中得到索引对应的文件名
        single_image_name = self.image_arr[index]

        # 读取图像文件
        img_data = Image.open(self.file_path + single_image_name)

        #设置好需要转换的变量，还可以包括一系列的nomarlize等等操作
        if self.mode == 'train':
            transform = transforms.Compose([
                transforms.Resize((self.resize_height, self.resize_width)),
                transforms.RandomHorizontalFlip(p=0.5),   #随机水平翻转 选择一个概率
                transforms.ToTensor()
            ])
        else:
            # valid和test不做数据增强
            transform = transforms.Compose([
                transforms.Resize((self.resize_height, self.resize_width)),
                transforms.ToTensor()
            ])
        
        img_data = transform(img_data)
        
        if self.mode == 'test':
            return img_data
        else:
            # 得到图像的 string label
            label = self.label_arr[index]
            # number label
            number_label = label_to_num[label]

            return img_data, number_label  #返回每一个index对应的图片数据和对应的label

    def __len__(self):
        return self.real_len

In [8]:
train_path = '../data/train.csv'
test_path = '../data/test.csv'
# csv文件中已经images的路径了，因此这里只到上一级目录
img_path = '../data/'

train_dataset = LeavesData(train_path, img_path, mode='train')
val_dataset = LeavesData(train_path, img_path, mode='valid')
test_dataset = LeavesData(test_path, img_path, mode='test')
print(train_dataset)
print(val_dataset)
print(test_dataset)

Finished reading the train set of Leaves Dataset (14681 samples found)
Finished reading the valid set of Leaves Dataset (3672 samples found)
Finished reading the test set of Leaves Dataset (8800 samples found)
<__main__.LeavesData object at 0x000001AA6D473370>
<__main__.LeavesData object at 0x000001AA43020D30>
<__main__.LeavesData object at 0x000001AA43298460>


In [9]:
def load_data_leave_img(train_dataset,val_dataset,test_dataset,batch_size):
    """下载叶子图片的数据集，然后将其加载到内存中"""
    return (data.DataLoader(train_dataset, batch_size, shuffle=True,
                            num_workers=0),
            data.DataLoader(val_dataset, batch_size, shuffle=False,
                            num_workers=0),
            data.DataLoader(test_dataset, batch_size, shuffle=False,
                            num_workers=0))

In [8]:
# train_iter,val_iter,test_iter = load_data_leave_img(train_dataset,val_dataset,test_dataset,128)

In [10]:
# 创建一个resnet模型
class Residual(nn.Module):  #残差块
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


class ResNetModel(nn.Module):
    def __init__(self, num_classes):
        super(ResNetModel, self).__init__()
        self.b1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.b2 = nn.Sequential(*resnet_block(64, 64, 3, first_block=True))
        self.b3 = nn.Sequential(*resnet_block(64, 128, 4))
        self.b4 = nn.Sequential(*resnet_block(128, 256, 6))
        self.b5 = nn.Sequential(*resnet_block(256, 512, 3))
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.b1(x)
        x = self.b2(x)
        x = self.b3(x)
        x = self.b4(x)
        x = self.b5(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [11]:
# 看一下是在cpu还是GPU上
def get_device():
    return 'cuda' if torch.cuda.is_available() else 'cpu'
device = get_device()
print(device)

cuda


In [12]:
net = ResNetModel(176)

In [13]:


def train(net,train_iter,val_iter,learning_rate,num_epochs,weight_decay,device):
    #初始化参数
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)

    # 将模型移到相应device上
    net = net.to(device)
    net.device = device
    #交叉熵损失
    loss = nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate, weight_decay = weight_decay)

    best_acc = 0.0
    for epoch in range(num_epochs):
        #模型训练模式
        net.train()
        #训练损失和训练精度
        train_loss = []
        train_accs = []
        for batch in tqdm(train_iter):
            imgs, labels = batch
            #将图片和标签都移到正确的device上
            imgs = imgs.to(device)
            labels = labels.to(device)
            labels_hat = net(imgs)
            l = loss(labels_hat, labels)
            #更新梯度为0
            optimizer.zero_grad()
            #计算参数的梯度
            l.backward()
            #根据梯度更新参数
            optimizer.step()
            
            # 计算当前批量的精确度
            acc = (labels_hat.argmax(dim=-1) == labels).float().mean()

            # Record the loss and accuracy.
            train_loss.append(l.item())
            train_accs.append(acc)
        #计算整体的训练损失和精确度
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)

        #打印结果
        print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


        #模型检测模式
        net.eval()
        
        valid_loss = []
        valid_accs = []


        for batch in tqdm(val_iter):
            imgs, labels = batch

            imgs = imgs.to(device)
            labels = labels.to(device)

            with torch.no_grad():
                labels_hat = net(imgs)
                
            l = loss(labels_hat, labels)

            acc = (labels_hat.argmax(dim=-1) == labels.to(device)).float().mean()

            valid_loss.append(l.item())
            valid_accs.append(acc)
        
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)

        print(f"[ Valid | {epoch + 1:03d}/{num_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

        if valid_acc > best_acc :
            best_acc = valid_acc
            torch.save(net.state_dict(), 'model_state_dict.pth')
            print('saving model with acc {:.3f}'.format(best_acc))


In [15]:
lr, num_epochs, batch_size, weight_decay= 3e-4, 30, 16,1e-3
train_iter,val_iter,_ = load_data_leave_img(train_dataset,val_dataset,test_dataset,batch_size)
train(net,train_iter,val_iter,lr,num_epochs,weight_decay,get_device())

100%|██████████| 918/918 [02:56<00:00,  5.19it/s]


[ Train | 001/030 ] loss = 3.79166, acc = 0.13246


100%|██████████| 230/230 [00:17<00:00, 13.22it/s]


[ Valid | 001/030 ] loss = 3.54311, acc = 0.16277


100%|██████████| 918/918 [02:55<00:00,  5.23it/s]


[ Train | 002/030 ] loss = 2.64976, acc = 0.30679


100%|██████████| 230/230 [00:17<00:00, 13.46it/s]


[ Valid | 002/030 ] loss = 5.38220, acc = 0.16386


100%|██████████| 918/918 [02:57<00:00,  5.19it/s]


[ Train | 003/030 ] loss = 2.07124, acc = 0.42490


100%|██████████| 230/230 [00:18<00:00, 12.63it/s]


[ Valid | 003/030 ] loss = 2.79527, acc = 0.30598


100%|██████████| 918/918 [02:56<00:00,  5.20it/s]


[ Train | 004/030 ] loss = 1.75155, acc = 0.50405


100%|██████████| 230/230 [00:17<00:00, 12.91it/s]


[ Valid | 004/030 ] loss = 1.68830, acc = 0.50842


100%|██████████| 918/918 [02:56<00:00,  5.21it/s]


[ Train | 005/030 ] loss = 1.49862, acc = 0.56727


100%|██████████| 230/230 [00:17<00:00, 12.96it/s]


[ Valid | 005/030 ] loss = 1.46665, acc = 0.56440


100%|██████████| 918/918 [02:55<00:00,  5.23it/s]


[ Train | 006/030 ] loss = 1.34656, acc = 0.60474


100%|██████████| 230/230 [00:17<00:00, 13.39it/s]


[ Valid | 006/030 ] loss = 1.45674, acc = 0.56196


100%|██████████| 918/918 [02:56<00:00,  5.21it/s]


[ Train | 007/030 ] loss = 1.21041, acc = 0.64300


100%|██████████| 230/230 [00:17<00:00, 13.22it/s]


[ Valid | 007/030 ] loss = 1.51796, acc = 0.56658


100%|██████████| 918/918 [02:55<00:00,  5.23it/s]


[ Train | 008/030 ] loss = 1.10301, acc = 0.67400


100%|██████████| 230/230 [00:17<00:00, 13.31it/s]


[ Valid | 008/030 ] loss = 1.51952, acc = 0.54701


100%|██████████| 918/918 [02:55<00:00,  5.24it/s]


[ Train | 009/030 ] loss = 1.01121, acc = 0.70202


100%|██████████| 230/230 [00:16<00:00, 13.53it/s]


[ Valid | 009/030 ] loss = 3.49329, acc = 0.30978


100%|██████████| 918/918 [02:53<00:00,  5.29it/s]


[ Train | 010/030 ] loss = 0.93514, acc = 0.72212


100%|██████████| 230/230 [00:17<00:00, 13.30it/s]


[ Valid | 010/030 ] loss = 3.81665, acc = 0.29212


100%|██████████| 918/918 [02:55<00:00,  5.22it/s]


[ Train | 011/030 ] loss = 0.85872, acc = 0.74894


100%|██████████| 230/230 [00:16<00:00, 13.54it/s]


[ Valid | 011/030 ] loss = 1.33647, acc = 0.60842


100%|██████████| 918/918 [02:56<00:00,  5.21it/s]


[ Train | 012/030 ] loss = 0.81681, acc = 0.75459


100%|██████████| 230/230 [00:17<00:00, 13.09it/s]


[ Valid | 012/030 ] loss = 0.89691, acc = 0.73071


100%|██████████| 918/918 [02:57<00:00,  5.17it/s]


[ Train | 013/030 ] loss = 0.75982, acc = 0.77316


100%|██████████| 230/230 [00:17<00:00, 13.08it/s]


[ Valid | 013/030 ] loss = 1.05834, acc = 0.68478


100%|██████████| 918/918 [02:56<00:00,  5.21it/s]


[ Train | 014/030 ] loss = 0.71629, acc = 0.78618


100%|██████████| 230/230 [00:17<00:00, 12.84it/s]


[ Valid | 014/030 ] loss = 1.40142, acc = 0.59022


100%|██████████| 918/918 [02:55<00:00,  5.23it/s]


[ Train | 015/030 ] loss = 0.69437, acc = 0.79430


100%|██████████| 230/230 [00:17<00:00, 12.93it/s]


[ Valid | 015/030 ] loss = 1.38621, acc = 0.61522


100%|██████████| 918/918 [02:57<00:00,  5.17it/s]


[ Train | 016/030 ] loss = 0.66074, acc = 0.80451


100%|██████████| 230/230 [00:17<00:00, 13.12it/s]


[ Valid | 016/030 ] loss = 3.26134, acc = 0.33179


100%|██████████| 918/918 [02:57<00:00,  5.18it/s]


[ Train | 017/030 ] loss = 0.64986, acc = 0.80543


100%|██████████| 230/230 [00:17<00:00, 13.22it/s]


[ Valid | 017/030 ] loss = 0.81534, acc = 0.74348


100%|██████████| 918/918 [02:56<00:00,  5.21it/s]


[ Train | 018/030 ] loss = 0.60690, acc = 0.82458


100%|██████████| 230/230 [00:17<00:00, 13.16it/s]


[ Valid | 018/030 ] loss = 1.04390, acc = 0.69348


100%|██████████| 918/918 [03:00<00:00,  5.09it/s]


[ Train | 019/030 ] loss = 0.56935, acc = 0.83479


100%|██████████| 230/230 [00:18<00:00, 12.58it/s]


[ Valid | 019/030 ] loss = 1.09194, acc = 0.67609


100%|██████████| 918/918 [02:58<00:00,  5.16it/s]


[ Train | 020/030 ] loss = 0.55142, acc = 0.83711


100%|██████████| 230/230 [00:18<00:00, 12.77it/s]


[ Valid | 020/030 ] loss = 1.30091, acc = 0.63207


100%|██████████| 918/918 [02:54<00:00,  5.25it/s]


[ Train | 021/030 ] loss = 0.53699, acc = 0.84243


100%|██████████| 230/230 [00:17<00:00, 13.50it/s]


[ Valid | 021/030 ] loss = 0.76970, acc = 0.76603


100%|██████████| 918/918 [02:55<00:00,  5.22it/s]


[ Train | 022/030 ] loss = 0.52060, acc = 0.84513


100%|██████████| 230/230 [00:18<00:00, 12.32it/s]


[ Valid | 022/030 ] loss = 1.16219, acc = 0.67772


100%|██████████| 918/918 [02:58<00:00,  5.16it/s]


[ Train | 023/030 ] loss = 0.48762, acc = 0.86026


100%|██████████| 230/230 [00:17<00:00, 12.97it/s]


[ Valid | 023/030 ] loss = 1.07650, acc = 0.68043


100%|██████████| 918/918 [02:57<00:00,  5.17it/s]


[ Train | 024/030 ] loss = 0.50619, acc = 0.85384


100%|██████████| 230/230 [00:17<00:00, 13.47it/s]


[ Valid | 024/030 ] loss = 0.79749, acc = 0.76114


100%|██████████| 918/918 [02:54<00:00,  5.26it/s]


[ Train | 025/030 ] loss = 0.47116, acc = 0.86546


100%|██████████| 230/230 [00:17<00:00, 13.48it/s]


[ Valid | 025/030 ] loss = 1.32125, acc = 0.63179


100%|██████████| 918/918 [02:54<00:00,  5.26it/s]


[ Train | 026/030 ] loss = 0.45808, acc = 0.86942


100%|██████████| 230/230 [00:17<00:00, 13.52it/s]


[ Valid | 026/030 ] loss = 0.83380, acc = 0.75435


100%|██████████| 918/918 [02:54<00:00,  5.26it/s]


[ Train | 027/030 ] loss = 0.44923, acc = 0.87088


100%|██████████| 230/230 [00:17<00:00, 13.25it/s]


[ Valid | 027/030 ] loss = 0.69865, acc = 0.78668


100%|██████████| 918/918 [02:54<00:00,  5.27it/s]


[ Train | 028/030 ] loss = 0.42915, acc = 0.87964


100%|██████████| 230/230 [00:16<00:00, 13.54it/s]


[ Valid | 028/030 ] loss = 2.18438, acc = 0.48071


100%|██████████| 918/918 [02:54<00:00,  5.27it/s]


[ Train | 029/030 ] loss = 0.42424, acc = 0.88148


100%|██████████| 230/230 [00:17<00:00, 13.50it/s]


[ Valid | 029/030 ] loss = 0.58950, acc = 0.81603
saving model with acc 0.816


100%|██████████| 918/918 [02:55<00:00,  5.24it/s]


[ Train | 030/030 ] loss = 0.42002, acc = 0.88243


100%|██████████| 230/230 [00:17<00:00, 13.38it/s]


[ Valid | 030/030 ] loss = 1.12573, acc = 0.68832


In [12]:
saveFileName = './submission.csv'

In [None]:

model = ResNetModel(176)

_,_,test_iter = load_data_leave_img(train_dataset,val_dataset,test_dataset,64)
model = model.to(device)
model.load_state_dict(torch.load('model_state_dict.pth'))

model.eval()


predictions = []

for batch in tqdm(test_iter): 
    imgs = batch
    with torch.no_grad():
        logits = model(imgs.to(device))
    
    # Take the class with greatest logit as prediction and record it.
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

preds = []
for i in predictions:
    preds.append(num_to_label[i])

test_data = pd.read_csv('../data/test.csv')
test_data['label'] = pd.Series(preds)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(saveFileName, index=False)