In [1]:
import torch
import torch.utils.data as Data
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data.dataloader import DataLoader

In [2]:
class PdDataset(Data.Dataset): # 创建自己的类：MyDataset,这个类是继承的torch.utils.data.Dataset
    def __init__(self, root, csvfile, transform=None, target_transform=None): # 初始化需要传入的参数
        super(PdDataset,self).__init__()
        fh = open(root + csvfile, 'r') #按照传入的路径和txt名，打开文本读取内容
        csvs = [] # 创建空列表
        for line in fh: # 按行循环txt文本中的内容
            line = line.rstrip() # 删除本行string字符串末尾的指定字符
            words = line.split() # 通过指定分隔符对字符串进行切片
            csvs.append((words[0],words[1])) # 把txt里的内容读入csv列表保存，[0]为文件路径，[1]是label
        
        self.csvs = csvs
        self.transform = transform
        self.target_transform = target_transform
        
    def __getitem__(self, index): #按照索引读取每个元素的具体内容
        fn, label = self.csvs[index] #fn和label分别获得csvs[index]也即是刚才每行中word[0]和word[1]的信息
        csv = pd.read_csv(fn, header=None)
        csv = torch.from_numpy(csv.values)
        csv = csv.permute(1, 0).float()
        return csv, label # return返回哪些内容，在训练时循环读取每个batch时就能获得哪些内容
 
    def __len__(self): #返回数据集的长度，也就是多少个文件，要和loader的长度作区分
        return len(self.csvs)

In [3]:
root = "d:\\Codes\\keyan\\peidian\\"
#根据自己定义的PdDataset创建数据集
train_data=PdDataset(root, "test_list.txt")
test_data=PdDataset(root, "train_list.txt")

In [4]:
#调用DataLoader创建dataloader，loader的长度是有多少个batch，和batch_size有关
train_loader = DataLoader(dataset=train_data, batch_size=4, shuffle=True, num_workers=0)
test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=False, num_workers=0)

In [5]:
import torch.nn as nn
import torch.nn.functional as F

In [6]:
device = torch.device("cuda")

In [7]:
class CNNNet(nn.Module):
    def __init__(self):
        super(CNNNet, self).__init__()
        self.conv1 = nn.Conv1d(3, 100, 10)
        self.pool1 = nn.MaxPool1d(3)
        self.conv2 = nn.Conv1d(100, 100, 10)
        self.pool2 = nn.MaxPool1d(3)
        self.conv3 = nn.Conv1d(100, 160, 10)
        self.pool3 = nn.MaxPool1d(3)
        self.conv4 = nn.Conv1d(160, 160, 10)
        self.drop1 = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(160*1, 48)
        self.fc2 = nn.Linear(48, 32)
        self.fc3 = nn.Linear(32, 5)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.drop1(F.relu(self.conv4(x)))
        x = x.view(-1, 160*1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [8]:
net = CNNNet()
net = net.to(device)

In [9]:
print(net)

CNNNet(
  (conv1): Conv1d(3, 100, kernel_size=(10,), stride=(1,))
  (pool1): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(100, 100, kernel_size=(10,), stride=(1,))
  (pool2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(100, 160, kernel_size=(10,), stride=(1,))
  (pool3): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(160, 160, kernel_size=(10,), stride=(1,))
  (drop1): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=160, out_features=48, bias=True)
  (fc2): Linear(in_features=48, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
)


In [10]:
import torch.optim as optim

In [11]:
criterion = nn.CrossEntropyLoss()
optimizier = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [12]:
def tuple2tensor_char(x):
    return torch.tensor(list(map(int, x)))

In [13]:
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = tuple2tensor_char(labels)
        labels = labels.to(device)
        optimizier.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizier.step()

        running_loss += loss.item()

        print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
        running_loss = 0.0
    
print('finish')

[1,     1] loss: 0.001
[1,     2] loss: 0.001
[1,     3] loss: 0.001
[1,     4] loss: 0.002
[1,     5] loss: 0.001
[1,     6] loss: 0.001
[1,     7] loss: 0.001
[1,     8] loss: 0.001
[1,     9] loss: 0.001
[1,    10] loss: 0.001
[1,    11] loss: 0.001
[1,    12] loss: 0.001
[1,    13] loss: 0.001
[1,    14] loss: 0.000
[1,    15] loss: 0.001
[1,    16] loss: 0.001
[1,    17] loss: 0.001
[1,    18] loss: 0.001
[1,    19] loss: 0.001
[1,    20] loss: 0.001
[1,    21] loss: 0.001
[1,    22] loss: 0.001
[1,    23] loss: 0.001
[1,    24] loss: 0.001
[1,    25] loss: 0.001
[2,     1] loss: 0.001
[2,     2] loss: 0.001
[2,     3] loss: 0.001
[2,     4] loss: 0.001
[2,     5] loss: 0.001
[2,     6] loss: 0.001
[2,     7] loss: 0.001
[2,     8] loss: 0.001
[2,     9] loss: 0.001
[2,    10] loss: 0.001
[2,    11] loss: 0.001
[2,    12] loss: 0.001
[2,    13] loss: 0.001
[2,    14] loss: 0.001
[2,    15] loss: 0.001
[2,    16] loss: 0.001
[2,    17] loss: 0.001
[2,    18] loss: 0.001
[2,    19] 

In [14]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        csvs, labels = data
        csvs = csvs.to(device)
        labels = tuple2tensor_char(labels)
        labels = labels.to(device)
        outputs = net(csvs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
 
print('Accuracy of the network on the test csvs: %d %%' % (100 * correct / total))

Accuracy of the network on the test csvs: 65 %


In [15]:
class_correct = list(0. for i in range(5))
class_total = list(0. for i in range(5))
with torch.no_grad():
    for data in test_loader:
        csvs, labels = data
        csvs = csvs.to(device)
        labels = tuple2tensor_char(labels)
        labels = labels.to(device)
        outputs = net(csvs)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
 
classes = ('0', '1', '2', '3', '4')
for i in range(5):
    print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of     0 : 62 %
Accuracy of     1 : 100 %
Accuracy of     2 :  0 %
Accuracy of     3 : 71 %
Accuracy of     4 : 91 %
