pytorch深度学习实践

https://www.bilibili.com/video/BV1Y7411d7Ys?p=1&vd_source=6d033c01bacc1b94de92d9ff542bdb52


https://liuii.github.io

用PyTorch实现线性回归

In [None]:
# 1、算预测值
# 2、算loss
# 3、梯度设为0，并反向传播
# 3、梯度更新


import torch

x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])


# 构造线性模型,后面都是使用这样的模板
# 至少实现两个函数，__init__构造函数和forward()前馈函数
# backward()会根据我们的计算图自动构建
# 可以继承Functions来构建自己的计算块
class LinerModel(torch.nn.Module):
    def __init__(self):
        # 调用父类的构造
        super(LinerModel, self).__init__()
        # 构造Linear这个对象，对输入数据做线性变换
        # class torch.nn.Linear(in_features, out_features, bias=True)
        # in_features - 每个输入样本的大小
        # out_features - 每个输出样本的大小
        # bias - 若设置为False，这层不会学习偏置。默认值：True
        self.linear = torch.nn.Linear(1, 1)

    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred


model = LinerModel()
# 定义MSE(均方差)损失函数，size_average=False不求均值
criterion = torch.nn.MSELoss(size_average=False)
# optim优化模块的SGD，第一个参数就是传递权重，model.parameters()model的所有权重
# 优化器对象
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(100):
    y_pred = model(x_data)
    loss = criterion(y_pred, y_data)
    # loss为一个对象，但会自动调用__str__()所以不会出错
    print(epoch, loss)

    # 梯度归零
    optimizer.zero_grad()
    # 反向传播
    loss.backward()
    # 根据梯度和预先设置的学习率进行更新
    optimizer.step()

# 打印权重和偏置值,weight是一个值但是一个矩阵
print('w=', model.linear.weight.item())
print('b=', model.linear.bias.item())

# 测试
x_test = torch.Tensor([4.0])
y_test = model(x_test)
print('y_pred=', y_test.data)



逻辑斯蒂回归

In [None]:
# 逻辑斯蒂回归
import torch.nn
import torch.nn.functional as F

x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])


class LogisticRegressionModel(torch.nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()
        self.linear = torch.nn.Linear(1, 1)

    def forward(self, x):
        # 将sigmoid函数应用到结果中
        y_pred = F.sigmoid(self.linear(x))
        return y_pred


model = LogisticRegressionModel()
# 定义MSE(均方差)损失函数，size_average=False不求均值
criterion = torch.nn.BCELoss(size_average=False)
# optim优化模块的SGD，第一个参数就是传递权重，model.parameters()model的所有权重
# 优化器对象
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(100):
    y_pred = model(x_data)
    loss = criterion(y_pred, y_data)
    # loss为一个对象，但会自动调用__str__()所以不会出错
    print(epoch, loss)

    # 梯度归零
    optimizer.zero_grad()
    # 反向传播
    loss.backward()
    # 根据梯度和预先蛇者的学习率进行更新
    optimizer.step()

# 打印权重和偏置值,weight是一个值但是一个矩阵
print('w=', model.linear.weight.item())
print('b=', model.linear.bias.item())

# 测试
x_test = torch.Tensor([4.0])
y_test = model(x_test)
print('y_pred=', y_test.data)



处理多维特征的输入

In [2]:
import numpy as np
import torch

xy = np.loadtxt('diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:, :-1])
y_data = torch.from_numpy(xy[:, [-1]])
# mean, std = torch.mean(x_data), torch.std(x_data)
# x_data = (x_data-mean)/std
# mean, std = torch.mean(y_data), torch.std(y_data)
# y_data = (y_data-mean)/std
# x_data = torch.nn.functional.normalize(x_data, dim=0)
# y_data = torch.nn.functional.normalize(y_data, dim=0)



class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 4)
        self.linear2 = torch.nn.Linear(4, 2)
        self.linear3 = torch.nn.Linear(2, 1)

        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x


model = Model()
criterion = torch.nn.BCELoss(size_average=True)  # 损失函数
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)  # 优化函数，随机梯度递减

for epoch in range(100):
    # 前馈
    y_pred = model(x_data)
    loss = criterion(y_pred, y_data)
    print(epoch, loss.item())

    # 反馈
    optimizer.zero_grad()
    loss.backward()

    # 更新
    optimizer.step()


0 0.6563534736633301
1 0.6555374264717102
2 0.6547828912734985
3 0.6540851593017578
4 0.6534399390220642
5 0.6528429388999939
6 0.6522905826568604
7 0.6517795324325562
8 0.6513063311576843
9 0.6508685350418091
10 0.6504629850387573
11 0.6500874757766724
12 0.6497397422790527
13 0.6494176387786865
14 0.6491191983222961
15 0.6488426923751831
16 0.6485865712165833
17 0.6483490467071533
18 0.6481289267539978
19 0.6479249000549316
20 0.6477357149124146
21 0.6475602388381958
22 0.6473976373672485
23 0.6472467184066772
24 0.6471067667007446
25 0.6469769477844238
26 0.6468565464019775
27 0.6467447876930237
28 0.6466410756111145
29 0.6465448141098022
30 0.6464555263519287
31 0.6463726758956909
32 0.6462957859039307
33 0.6462243795394897
34 0.6461580395698547
35 0.6460965275764465
36 0.6460393071174622
37 0.6459861993789673
38 0.6459369659423828
39 0.6458911299705505
40 0.6458486914634705
41 0.6458092331886292
42 0.6457726359367371
43 0.6457385420799255
44 0.645706832408905
45 0.6456774473190308

加载数据集

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset  # Dataset是一个抽象类，只能被继承，不能实例化
from torch.utils.data import DataLoader  # 可以直接实例化

'''
四步：准备数据集-设计模型-构建损失函数和优化器-周期训练
'''


class DiabetesDataset(Dataset):
    def __init__(self, filepath):
        xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, :-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])

    def __getitem__(self, index):  # 实例化对象后，该类能支持下标操作，通过index拿出数据
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len


dataset = DiabetesDataset('diabetes.csv.gz')
# dataset数据集，batch_size小批量的容量，shuffle是否要打乱，num_workers要几个并行进程来读
# DataLoader的实例化对象不能直接使用，因为windows和linux的多线程运行不一样，所以一般要放在函数里运行
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)


class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        # 这是nn下的Sigmoid是一个模块没有参数，在function调用的Sigmoid是函数
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x


model = Model()
criterion = torch.nn.BCELoss(size_average=True)  # 损失函数
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)  # 优化函数，随机梯度递减

# 变成嵌套循环，实现Mini-Batch
for epoch in range(100):
    # 从数据集0开始迭代
    # 可以简写为for i, (inputs, labels) in enumerate(train_loader, 0):
    for i, data in enumerate(train_loader, 0):
        # 准备数据
        inputs, labels = data
        # 前馈
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        print(epoch, i, loss.item())
        # 反馈
        optimizer.zero_grad()
        loss.backward()
        # 更新
        optimizer.step()



多分类问题

In [None]:
import torch
from torchvision import transforms  # 对图像进行处理的工具
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F  # 使用激活函数relu()的包
import torch.optim as optim  # 优化器的包

batch_size = 64
# 对图像进行预处理，将图像转换为
transform = transforms.Compose([
    # 将原始图像PIL变为张量tensor(H*W*C),再将[0,255]区间转换为[0.1,1.0]
    transforms.ToTensor(),
    # 使用均值和标准差对张量图像进行归一化
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = datasets.MNIST(root='dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(784, 512)
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 10)

    def forward(self, x):
        # 改变形状，相当于numpy的reshape
        # view中一个参数定为-1，代表动态调整这个维度上的元素个数，以保证元素的总数不变。
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)


model = Net()
# 交叉熵损失函数
criterion = torch.nn.CrossEntropyLoss()
# model.parameters()直接使用的模型的所有参数
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  # momentum动量


def train(epoch):
    running_loss = 0.0
    # 返回了数据下标和数据
    for batch_idx, data in enumerate(train_loader, 0):
        # 送入两个张量，一个张量是64个图像的特征，一个张量图片对应的数字
        inputs, target = data
        # 梯度归零
        optimizer.zero_grad()

        # forward+backward+update
        outputs = model(inputs)
        # 计算损失，用的交叉熵损失函数
        loss = criterion(outputs, target)
        # 反馈
        loss.backward()
        # 随机梯度下降更新
        optimizer.step()

        # 每300次输出一次
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不会计算梯度
    with torch.no_grad():
        for data in test_loader:  # 拿数据
            images, labels = data
            outputs = model(images)  # 预测
            # outputs.data是一个矩阵，每一行10个量，最大值的下标就是预测值
            _, predicted = torch.max(outputs.data, dim=1)  # 沿着第一维度，找最大值的下标，返回最大值和下标
            total += labels.size(0)  # labels.size(0)=64 每个都是64个元素，就可以计算总的元素
            # (predicted == labels).sum()这个是张量，而加了item()变为一个数字，即相等的数量
            correct += (predicted == labels).sum().item()
    print('Accuracy on test set:%d %%' % (100 * correct / total))  # 正确的数量除以总数


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()



卷积神经网络
简单的构建

In [None]:
import torch

# 输入的通道就是上图的n,输出的通道就是上图的m
in_channels, out_channels = 5, 10
width, height = 100, 100  # 图像的大小
kernel_size = 3  # 卷积盒的大小
batch_size = 1  # 批量大小

# 随机生成了一个小批量=1的5*100*100的张量
input = torch.randn(batch_size, in_channels, width, height)

# Conv2d对由多个输入平面组成的输入信号进行二维卷积
conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size)

output = conv_layer(input)

# print(input)
print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)


运行结果：

In [None]:
torch.Size([1, 5, 100, 100])
torch.Size([1, 10, 98, 98])
torch.Size([10, 5, 3, 3])

padding

In [None]:
import torch

input = [3, 4, 6, 5, 7,
         2, 4, 6, 8, 2,
         1, 6, 7, 8, 4,
         9, 7, 4, 6, 2,
         3, 7, 5, 4, 1]

input = torch.Tensor(input).view(1, 1, 5, 5)

# bias=False不加偏置量
conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False)

kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
# 把kernel赋值给卷积层权重，做初始化
conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)


运行结果：

In [None]:
tensor([[[[ 91., 168., 224., 215., 127.],
          [114., 211., 295., 262., 149.],
          [192., 259., 282., 214., 122.],
          [194., 251., 253., 169.,  86.],
          [ 96., 112., 110.,  68.,  31.]]]], grad_fn=<ThnnConv2DBackward>)

Layer-stride步长

In [None]:
import torch

input = [3, 4, 6, 5, 7,
         2, 4, 6, 8, 2,
         1, 6, 7, 8, 4,
         9, 7, 4, 6, 2,
         3, 7, 5, 4, 1]

input = torch.Tensor(input).view(1, 1, 5, 5)

# bias=False不加偏置量
conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, stride=2, bias=False)

kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
# 把kernel赋值给卷积层权重，做初始化
conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)



运行结果：

In [None]:
tensor([[[[211., 262.],
          [251., 169.]]]], grad_fn=<ThnnConv2DBackward>)

Max Pooling Layer最大池化层（最大池化层是没有权重的）

In [None]:
import torch

input = [3, 9, 6, 5,
         2, 4, 6, 8,
         1, 6, 2, 1,
         3, 7, 4, 6]

input = torch.Tensor(input).view(1, 1, 4, 4)

maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2)

output = maxpooling_layer(input)
print(output)



运行结果：

In [None]:
tensor([[[[9., 8.],
          [7., 6.]]]])

In [None]:
import torch
from torchvision import transforms  # 对图像进行处理的工具
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F  # 使用激活函数relu()的包
import torch.optim as optim  # 优化器的包

batch_size = 64
# 对图像进行预处理，将图像转换为
transform = transforms.Compose([
    # 将原始图像PIL变为张量tensor(H*W*C),再将[0,255]区间转换为[0.1,1.0]
    transforms.ToTensor(),
    # 使用均值和标准差对张量图像进行归一化
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = datasets.MNIST(root='dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 定义两个卷积层
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
        # 定义一个池化层
        self.pooling = torch.nn.MaxPool2d(2)
        # 定义一个全连接的线性层
        self.fc = torch.nn.Linear(320, 10)

    def forward(self, x):
        # Flatten data from (n, 1, 28, 28) to (n, 784)
        # x.size(0)就是取的n
        batch_size = x.size(0)
        # 用relu做非线性激活
        # 先做卷积再做池化再做relu
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        # 做view把数据变为做全连接网络所需要的输入
        x = x.view(batch_size, -1)
        return self.fc(x)
        # 因为最后一层要做交叉熵损失，所以最后一层不做激活

model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# 交叉熵损失函数
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  # momentum动量


def train(epoch):
    running_loss = 0.0
    # 返回了数据下标和数据
    for batch_idx, data in enumerate(train_loader, 0):
        # 送入两个张量，一个张量是64个图像的特征，一个张量图片对应的数字
        inputs, target = data
        # 把输入输出迁入GPU
        inputs, target = inputs.to(device), target.to(device)
        # 梯度归零
        optimizer.zero_grad()

        # forward+backward+update
        outputs = model(inputs)
        # 计算损失，用的交叉熵损失函数
        loss = criterion(outputs, target)
        # 反馈
        loss.backward()
        # 随机梯度下降更新
        optimizer.step()

        # 每300次输出一次
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不会计算梯度
    with torch.no_grad():
        for data in test_loader:  # 拿数据
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # 预测
            # outputs.data是一个矩阵，每一行10个量，最大值的下标就是预测值
            _, predicted = torch.max(outputs.data, dim=1)  # 沿着第一维度，找最大值的下标，返回最大值和下标
            total += labels.size(0)  # labels.size(0)=64 每个都是64个元素，就可以计算总的元素
            # (predicted == labels).sum()这个是张量，而加了item()变为一个数字，即相等的数量
            correct += (predicted == labels).sum().item()
    print('Accuracy on test set:%d %%' % (100 * correct / total))  # 正确的数量除以总数


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()



卷积神经网络（高级）GoogLeNet

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms  # 对图像进行处理的工具
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F  # 使用激活函数relu()的包
import torch.optim as optim  # 优化器的包

batch_size = 64
# 对图像进行预处理，将图像转换为
transform = transforms.Compose([
    # 将原始图像PIL变为张量tensor(H*W*C),再将[0,255]区间转换为[0.1,1.0]
    transforms.ToTensor(),
    # 使用均值和标准差对张量图像进行归一化
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = datasets.MNIST(root='dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


class InceptionA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        # 第一个通道，输入通道为in_channels,输出通道为16，卷积盒的大小为1*1的卷积层
        self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)

        # 第二个通道
        self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)

        # 第三个通道
        self.branch3x3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)

        # 第四个通道
        self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)

    def forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)

        branch3x3 = self.branch3x3_1(x)
        branch3x3 = self.branch3x3_2(branch3x3)
        branch3x3 = self.branch3x3_3(branch3x3)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        # 拼接
        outputs = [branch1x1, branch5x5, branch3x3, branch_pool]
        return torch.cat(outputs, dim=1)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(88, 20, kernel_size=5)

        self.incep1 = InceptionA(in_channels=10)
        self.incep2 = InceptionA(in_channels=20)

        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(1408, 10)

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incep1(x)
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incep2(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x


model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# 交叉熵损失函数
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  # momentum动量


def train(epoch):
    running_loss = 0.0
    # 返回了数据下标和数据
    for batch_idx, data in enumerate(train_loader, 0):
        # 送入两个张量，一个张量是64个图像的特征，一个张量图片对应的数字
        inputs, target = data
        # 把输入输出迁入GPU
        inputs, target = inputs.to(device), target.to(device)
        # 梯度归零
        optimizer.zero_grad()

        # forward+backward+update
        outputs = model(inputs)
        # 计算损失，用的交叉熵损失函数
        loss = criterion(outputs, target)
        # 反馈
        loss.backward()
        # 随机梯度下降更新
        optimizer.step()

        # 每300次输出一次
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不会计算梯度
    with torch.no_grad():
        for data in test_loader:  # 拿数据
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # 预测
            # outputs.data是一个矩阵，每一行10个量，最大值的下标就是预测值
            _, predicted = torch.max(outputs.data, dim=1)  # 沿着第一维度，找最大值的下标，返回最大值和下标
            total += labels.size(0)  # labels.size(0)=64 每个都是64个元素，就可以计算总的元素
            # (predicted == labels).sum()这个是张量，而加了item()变为一个数字，即相等的数量
            correct += (predicted == labels).sum().item()
    print('Accuracy on test set:%d %%' % (100 * correct / total))  # 正确的数量除以总数


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()



Residual net残差结构块

定义的该层输入和输出的大小是一样的

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms  # 对图像进行处理的工具
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F  # 使用激活函数relu()的包
import torch.optim as optim  # 优化器的包

batch_size = 64
# 对图像进行预处理，将图像转换为
transform = transforms.Compose([
    # 将原始图像PIL变为张量tensor(H*W*C),再将[0,255]区间转换为[0.1,1.0]
    transforms.ToTensor(),
    # 使用均值和标准差对张量图像进行归一化
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = datasets.MNIST(root='dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


class ResidualBlock(nn.Module):
    def __init__(self,channels):
        super(ResidualBlock,self).__init__()
        self.channels = channels
        self.conv1 = nn.Conv2d(channels,channels,kernel_size=3,padding=1)
        self.conv2 = nn.Conv2d(channels,channels,kernel_size=3,padding=1)

    def forward(self,x):
        y = F.relu(self.conv1(x))
        y = self.conv2(y)
        return F.relu(x+y)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.mp = nn.MaxPool2d(2)

        self.rblock1 = ResidualBlock(16)
        self.rblock2 = ResidualBlock(32)

        self.fc = nn.Linear(512, 10)

    def forward(self, x):
        in_size = x.size(0)
        x = self.mp(F.relu(self.conv1(x)))
        x = self.rblock1(x)
        x = self.mp(F.relu(self.conv2(x)))
        x = self.rblock2(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x
model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# 交叉熵损失函数
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  # momentum动量


def train(epoch):
    running_loss = 0.0
    # 返回了数据下标和数据
    for batch_idx, data in enumerate(train_loader, 0):
        # 送入两个张量，一个张量是64个图像的特征，一个张量图片对应的数字
        inputs, target = data
        # 把输入输出迁入GPU
        inputs, target = inputs.to(device), target.to(device)
        # 梯度归零
        optimizer.zero_grad()

        # forward+backward+update
        outputs = model(inputs)
        # 计算损失，用的交叉熵损失函数
        loss = criterion(outputs, target)
        # 反馈
        loss.backward()
        # 随机梯度下降更新
        optimizer.step()

        # 每300次输出一次
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不会计算梯度
    with torch.no_grad():
        for data in test_loader:  # 拿数据
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # 预测
            # outputs.data是一个矩阵，每一行10个量，最大值的下标就是预测值
            _, predicted = torch.max(outputs.data, dim=1)  # 沿着第一维度，找最大值的下标，返回最大值和下标
            total += labels.size(0)  # labels.size(0)=64 每个都是64个元素，就可以计算总的元素
            # (predicted == labels).sum()这个是张量，而加了item()变为一个数字，即相等的数量
            correct += (predicted == labels).sum().item()
    print('Accuracy on test set:%d %%' % (100 * correct / total))  # 正确的数量除以总数


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

RNNCell

In [None]:
import torch

batch_size = 1  # 批量数
seq_len = 3  # 有几个输入队列x1,x2,x3
input_size = 4  # 每个输入是几维向量
hidden_size = 2  # 每个隐藏层是几维向量

cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)

dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)

for idx, input in enumerate(dataset):
    print('=' * 20, idx, '=' * 20)
    print('Input size:', input.shape)

    hidden = cell(input, hidden)

    print('Outputs size:', hidden.shape)
    print(hidden)



RNN

In [None]:
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

# (seqLen, batchSize, inputSize)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)

out, hidden = cell(inputs, hidden)

print('Output size:', out.shape)
print('Output:', out)
print('Hidden size: ', hidden.shape)
print('Hidden: ', hidden)


使用RNNcell

In [2]:
import torch

batch_size = 1
input_size = 4
hidden_size = 4

idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [2, 3, 2, 0, 1]

# (seq_len, input_size, hidden_size)
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.tensor(x_one_hot).view(-1, batch_size, input_size)
inputs = inputs.float()

y_one_hot = [one_hot_lookup[y] for y in y_data]
labels = torch.tensor(y_one_hot).view(-1, batch_size, input_size)
labels = labels.float()


class Net(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Net, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.rnncell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden

    def init_hidden(self):
        hidden = torch.zeros(self.batch_size, self.hidden_size)


net = Net(input_size, hidden_size, batch_size)

# construct Criterion and Optimizer
criterion = torch.nn.CrossEntropyLoss()
Optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

for epoch in range(15):
    loss = 0
    net.zero_grad()
    hidden = net.init_hidden()
    print('Predicted string: ', end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1)
        print(idx2char[idx.item()], end='')

    loss.backward()
    Optimizer.step()
    print(", Epoch: [%d/15] loss = %.4f" % (epoch+1, loss.item()))

Predicted string: ehhhe, Epoch: [1/15] loss = 8.4598
Predicted string: ehlhl, Epoch: [2/15] loss = 7.2599
Predicted string: lhlel, Epoch: [3/15] loss = 6.3341
Predicted string: lhlel, Epoch: [4/15] loss = 5.4571
Predicted string: loleh, Epoch: [5/15] loss = 4.5418
Predicted string: loleh, Epoch: [6/15] loss = 3.6996
Predicted string: loleh, Epoch: [7/15] loss = 3.2128
Predicted string: loleh, Epoch: [8/15] loss = 3.0056
Predicted string: loleh, Epoch: [9/15] loss = 2.9081
Predicted string: loleh, Epoch: [10/15] loss = 2.8354
Predicted string: loleh, Epoch: [11/15] loss = 2.7565
Predicted string: loleh, Epoch: [12/15] loss = 2.6594
Predicted string: loleh, Epoch: [13/15] loss = 2.5422
Predicted string: loleh, Epoch: [14/15] loss = 2.4102
Predicted string: loleh, Epoch: [15/15] loss = 2.2700


使用RNN

In [1]:
import torch

input_size = 4
hidden_size = 3
batch_size = 1
num_layers = 1
seq_len = 5
#构建输入输出字典
idx2char_1 = ['e', 'h', 'l', 'o']
idx2char_2 = ['h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [2, 0, 1, 2, 1]
# y_data = [3, 1, 2, 2, 3]
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
#labels（seqLen*batchSize,1）为了之后进行矩阵运算，计算交叉熵
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(Model, self).__init__()
        self.batch_size = batch_size #构造H0
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = torch.nn.RNN(input_size = self.input_size,
                                hidden_size = self.hidden_size,
                                num_layers=num_layers)

    def forward(self, input):
        hidden = torch.zeros(self.num_layers,
                             self.batch_size,
                             self.hidden_size)
        out, _ = self.rnn(input, hidden)
        #reshape成（SeqLen*batchsize,hiddensize）便于在进行交叉熵计算时可以以矩阵进行。
        return out.view(-1, self.hidden_size)

net = Model(input_size, hidden_size, batch_size, num_layers)


criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

#RNN中的输入（SeqLen*batchsize*inputsize）
#RNN中的输出（SeqLen*batchsize*hiddensize）
#labels维度 hiddensize*1
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted string: ',''.join([idx2char_2[x] for x in idx]), end = '')
    print(", Epoch [%d/15] loss = %.3f" % (epoch+1, loss.item()))

Predicted string:  hhhhh, Epoch [1/15] loss = 1.204
Predicted string:  hhllh, Epoch [2/15] loss = 1.105
Predicted string:  lhlll, Epoch [3/15] loss = 1.026
Predicted string:  lhlll, Epoch [4/15] loss = 0.968
Predicted string:  ohlll, Epoch [5/15] loss = 0.918
Predicted string:  ohlll, Epoch [6/15] loss = 0.873
Predicted string:  ohlll, Epoch [7/15] loss = 0.829
Predicted string:  ohlll, Epoch [8/15] loss = 0.788
Predicted string:  ohlll, Epoch [9/15] loss = 0.750
Predicted string:  oolll, Epoch [10/15] loss = 0.714
Predicted string:  oolol, Epoch [11/15] loss = 0.683
Predicted string:  oolol, Epoch [12/15] loss = 0.655
Predicted string:  oolol, Epoch [13/15] loss = 0.626
Predicted string:  oolol, Epoch [14/15] loss = 0.594
Predicted string:  ohlol, Epoch [15/15] loss = 0.557


embedding and linear layer

In [None]:
import torch

input_size = 4
num_class = 4
hidden_size = 8
embedding_size =10
batch_size = 1
num_layers = 2
seq_len = 5

idx2char_1 = ['e', 'h', 'l', 'o']
idx2char_2 = ['h', 'l', 'o']

x_data = [[1, 0, 2, 2, 3]]
y_data = [3, 1, 2, 2, 3]

#inputs 作为交叉熵中的Inputs，维度为（batchsize，seqLen）
inputs = torch.LongTensor(x_data)
#labels 作为交叉熵中的Target，维度为（batchsize*seqLen）
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self .emb = torch.nn.Embedding(input_size, embedding_size)

        self.rnn = torch.nn.RNN(input_size = embedding_size,
                                hidden_size = hidden_size,
                                num_layers=num_layers,
                                batch_first = True)
                                
        self.fc = torch.nn.Linear(hidden_size, num_class)
    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emb(x)
        x, _ = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)

net = Model()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)

    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted string: ',''.join([idx2char_1[x] for x in idx]), end = '')
    print(", Epoch [%d/15] loss = %.3f" % (epoch+1, loss.item()))

In [None]:
'''
根据名字识别他所在的国家
人名字符长短不一，最长的10个字符，所以处理成10维输入张量，都是英文字母刚好可以映射到ASCII上
Maclean ->  ['M', 'a', 'c', 'l', 'e', 'a', 'n'] ->  [ 77 97 99 108 101 97 110]  ->  [ 77 97 99 108 101 97 110 0 0 0]
共有18个国家，设置索引为0-17
训练集和测试集的表格文件都是第一列人名，第二列国家
'''
import torch
import  time
import csv
import gzip
from  torch.utils.data import DataLoader
import datetime
import matplotlib.pyplot as plt
import numpy as np

# Parameters
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2
N_EPOCHS = 100
N_CHARS = 128
USE_GPU = True

class NameDataset():         #处理数据集
    def __init__(self, is_train_set=True):
        filename = 'names_train.csv.gz' if is_train_set else 'names_test.csv.gz'
        with gzip.open(filename, 'rt') as f:    #打开压缩文件并将变量名设为为f
            reader = csv.reader(f)              #读取表格文件
            rows = list(reader)
        self.names = [row[0] for row in rows]   #取出人名
        self.len = len(self.names)              #人名数量
        self.countries = [row[1] for row in rows]#取出国家名
        self.country_list = list(sorted(set(self.countries)))#国家名集合，18个国家名的集合
        #countrys是所有国家名，set(countrys)把所有国家明元素设为集合（去除重复项），sorted（）函数是将集合排序
        #测试了一下，实际list(sorted(set(self.countrys)))==sorted(set(self.countrys))
        self.country_dict = self.getCountryDict()#转变成词典
        self.country_num = len(self.country_list)#得到国家集合的长度18
 
    def __getitem__(self, index):
        return self.names[index], self.country_dict[self.countries[index]]
 
    def __len__(self):
        return self.len
 
    def getCountryDict(self):
        country_dict = dict()                                       #创建空字典
        for idx, country_name in enumerate(self.country_list,0):    #取出序号和对应国家名
            country_dict[country_name] = idx                        #把对应的国家名和序号存入字典
        return country_dict
 
    def idx2country(self,index):            #返回索引对应国家名
        return self.country_list(index)
 
    def getCountrysNum(self):               #返回国家数量
        return self.country_num
 
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE,shuffle=False)
 
N_COUNTRY = trainset.getCountrysNum()       #模型输出大小
 
def create_tensor(tensor):#判断是否使用GPU 使用的话把tensor搬到GPU上去
    if USE_GPU:
        device = torch.device("cuda:0")
        tensor = tensor.to(device)
    return tensor
 
class RNNClassifier(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size                  #包括下面的n_layers在GRU模型里使用
        self.n_layers = n_layers
        self.n_directions = 2 if bidirectional else 1
 
        self.embedding = torch.nn.Embedding(input_size, hidden_size)#input.shape=(seqlen,batch) output.shape=(seqlen,batch,hiddensize)
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional=bidirectional)
                                #输入维度       输出维度      层数        说明单向还是双向
        self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)#双向GRU会输出两个hidden，维度需要✖2，要接一个线性层
 
    def forward(self, input, seq_lengths):
        input = input.t()               #input shaoe :  Batch x Seq -> S x B 用于embedding
        batch_size = input.size(1)
        hidden =self._init_hidden(batch_size)
        embedding = self.embedding(input)
 
        # pack_padded_sequence函数当出入seq_lengths是GPU张量时报错，在这里改成cpu张量就可以，不用GPU直接注释掉下面这一行代码
        seq_lengths = seq_lengths.cpu()#改成cpu张量
        # pack them up
        gru_input = torch.nn.utils.rnn.pack_padded_sequence(embedding, seq_lengths)#让0值不参与运算加快运算速度的方式
        #需要提前把输入按有效值长度降序排列 再对输入做嵌入，然后按每个输入len（seq——lengths）取值做为GRU输入
 
        output, hidden = self.gru(gru_input, hidden)#双向传播的话hidden有两个
        if self.n_directions ==2:
            hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)
        else:
            hidden_cat = hidden[-1]
        fc_output = self.fc(hidden_cat)
        return fc_output
 
    def _init_hidden(self,batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
        return  create_tensor(hidden)
 
#classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
 
#对名字的处理需要先把每个名字按字符都变成ASCII码
def name2list(name):#把每个名字按字符都变成ASCII码
    arr = [ord(c) for c in name]
    return arr, len(arr)
 
def make_tensors(names, countries):     #处理名字ASCII码 重新排序的长度和国家列表
    sequences_and_lengths= [name2list(name) for name in names]                  #把每个名字按字符都变成ASCII码
    name_sequences = [sl[0] for sl in sequences_and_lengths]                    #取出名字列表对应的ACSII码
    seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])     #取出每个名字对应的长度列表
    countries = countries.long()
 
    # make tensor of name, BatchSize x SeqLen
    seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()     #先做一个 名字数量x最长名字长度的全0tensor
    for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):  #取出序列，ACSII码和长度列表
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)                       #用名字列表的ACSII码填充上面的全0tensor
 
    # sort by length to use pack_padded_sequence
    seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)#将seq_lengths按序列长度重新降序排序，返回排序结果和排序序列。
    seq_tensor = seq_tensor[perm_idx]                               #按新序列把ASCII表重新排序
    countries = countries[perm_idx]                                 #按新序列把国家列表重新排序
 
                #返回排序后的 ASCII列表         名字长度降序列表        国家名列表
    return create_tensor(seq_tensor),create_tensor(seq_lengths),create_tensor(countries)
 
def trainModel():
    total_loss = 0
 
    for i, (names, countries) in enumerate(trainloader, 1):
        optimizer.zero_grad()
        inputs, seq_lengths, target = make_tensors(names, countries)#取出排序后的 ASCII列表 名字长度列表 国家名列表
        output = classifier(inputs, seq_lengths)    #把输入和序列放入分类器
        loss = criterion(output, target)            #计算损失
 
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
 
        #打印输出结果
        #if i % 100 == 0:
        #    print(f'Epoch {epoch} ')
        if i == len(trainset) // BATCH_SIZE :
            #print(f'[13374/{len(trainset)}] ', end='')
            print(f'loss={total_loss / (i * len(inputs))}')
        '''elif i % 10 == 9 :
            print(f'[{i * len(inputs)}/{len(trainset)}] ', end='')
            print(f'loss={total_loss / (i * len(inputs))}')'''
    return total_loss
 
def testModel():
    correct = 0
    total = len(testset)
 
    with torch.no_grad():
        for i, (names, countries) in enumerate(testloader, 1):
            inputs, seq_lengths, target = make_tensors(names, countries)    #返回处理后的名字ASCII码 重新排序的长度和国家列表
            output = classifier(inputs, seq_lengths)                        #输出
            pred = output.max(dim=1, keepdim=True)[1]                       #预测
            correct += pred.eq(target.view_as(pred)).sum().item()           #计算预测对了多少
 
        percent = '%.2f' % (100 * correct / total)
        print(f'Test set: Accuracy {correct}/{total} {percent}%')
    return correct / total
 
if __name__ == '__main__':
    print("Train for %d epochs..." % N_EPOCHS)
    start = time.time()
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
    if USE_GPU:
        device = torch.device('cuda:0')
        classifier.to(device)
 
    criterion = torch.nn.CrossEntropyLoss()     #计算损失
    optimizer = torch.optim.Adam(classifier.parameters(), lr = 0.001)   #更新
 
    acc_list= []
    for epoch in range(1, N_EPOCHS+1):
        #训练
        print('%d / %d:' % (epoch, N_EPOCHS))
        trainModel()
        acc = testModel()
        acc_list.append(acc)
    end = time.time()
    print(datetime.timedelta(seconds=(end - start) // 1))
 
 
    epoch = np.arange(1, len(acc_list) + 1, 1)
    acc_list = np.array(acc_list)
    plt.plot(epoch, acc_list)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.grid()
    plt.show()
 

shelter_animals实战

In [None]:
'''在美国，每年大约有760万伴侣动物被动物收容所收容。大多数动物是被它们的主人主动放弃，而另一些则是由于种种的意外情况而进入收容所。最终，有些动物足够幸运找到了新的归宿，但另一些不那么幸运的则最终被安乐死。美国每年大约有２７０万的猫狗被执行安乐死。
　　这次的比赛使用的是来自Austin的动物收容所的数据，其中包括动物的品种，颜色，性别和年龄，要求参赛者预测每只动物的最终结局。这些结局包括：被领养、死亡、安乐死、归还所有者和转移。其中训练集和测试集是随机划分的。
　　最后输出测试集种每个动物的每一种结局的可能性即可。
'''
import os
import shutil
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda")

train = pd.read_csv('shelter_animals_train.csv')
# print("Shape:", train.shape)

test = pd.read_csv('shelter_animals_test.csv')
# print("Shape:", test.shape)

# Counter(train['OutcomeType'])
# Counter(train['Name']).most_common(5)
train_X = train.drop(columns=['OutcomeType', 'OutcomeSubtype', 'AnimalID'])
Y = train['OutcomeType']
test_X = test
stacked_df = train_X.append(test_X.drop(columns=['ID']))
stacked_df = stacked_df.drop(columns=['DateTime'])

for col in stacked_df.columns:
    if stacked_df[col].isnull().sum() > 10000:
        # print("dropping", col, stacked_df[col].isnull().sum())
        stacked_df = stacked_df.drop(columns=[col])
for col in stacked_df.columns:
    if stacked_df.dtypes[col] == "object":
        stacked_df[col] = stacked_df[col].fillna("NA")
    else:
        stacked_df[col] = stacked_df[col].fillna(0)
    stacked_df[col] = LabelEncoder().fit_transform(stacked_df[col])
# making all variables categorical
for col in stacked_df.columns:
    stacked_df[col] = stacked_df[col].astype('category')
X = stacked_df[0:26729]
test_processed = stacked_df[26729:]
# check if shape[0] matches original
# print("train shape: ", X.shape, "orignal: ", train.shape)
# print("test shape: ", test_processed.shape, "original: ", test.shape)
Y = LabelEncoder().fit_transform(Y)
# sanity check to see numbers match and matching with previous counter to create target dictionary
# print(Counter(train['OutcomeType']))
# print(Counter(Y))
target_dict = {
    'Return_to_owner': 3,
    'Euthanasia': 2,
    'Adoption': 0,
    'Transfer': 4,
    'Died': 1
}
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.10, random_state=0)

# categorical embedding for columns having more than two values
embedded_cols = {n: len(col.cat.categories) for n, col in X.items() if len(col.cat.categories) > 2}
embedded_col_names = embedded_cols.keys()
len(X.columns) - len(embedded_cols)  # number of numerical columns
embedding_sizes = [(n_categories, min(50, (n_categories + 1) // 2)) for _, n_categories in embedded_cols.items()]

print("X:", type(X), X.shape)
print("Y:", type(Y), Y.shape)


class ShelterOutcomeDataset(Dataset):
    def __init__(self, X, Y, embedded_col_names):
        X = X.copy()
        self.X1 = X.loc[:, embedded_col_names].copy().values.astype(np.int64)  # categorical columns
        self.X2 = X.drop(columns=embedded_col_names).copy().values.astype(np.float32)  # numerical columns
        self.y = Y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):  # 返回单条数据
        return self.X1[idx], self.X2[idx], self.y[idx]


# creating train and valid datasets
train_ds = ShelterOutcomeDataset(X_train, y_train, embedded_col_names)
valid_ds = ShelterOutcomeDataset(X_val, y_val, embedded_col_names)

train_data_size = len(train_ds)
valid_data_size = len(valid_ds)

train_dataloader = DataLoader(train_ds, batch_size=512, shuffle=True)
valid_ds_dataloader = DataLoader(valid_ds, batch_size=512, shuffle=True)


class ShelterOutcomeModel(nn.Module):
    def __init__(self, embedding_sizes, n_cont):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories, size in embedding_sizes])
        n_emb = sum(e.embedding_dim for e in self.embeddings)  # length of all embeddings combined
        self.n_emb, self.n_cont = n_emb, n_cont
        self.lin1 = nn.Linear(self.n_emb + self.n_cont, 200)
        self.lin2 = nn.Linear(200, 70)
        self.lin3 = nn.Linear(70, 5)
        self.bn1 = nn.BatchNorm1d(self.n_cont)
        self.bn2 = nn.BatchNorm1d(200)
        self.bn3 = nn.BatchNorm1d(70)
        self.emb_drop = nn.Dropout(0.6)
        self.drops = nn.Dropout(0.3)

    def forward(self, x_cat, x_cont):
        x = [e(x_cat[:, i]) for i, e in enumerate(self.embeddings)]
        x = torch.cat(x, 1)
        x = self.emb_drop(x)
        x2 = self.bn1(x_cont)
        x = torch.cat([x, x2], 1)
        x = F.relu(self.lin1(x))
        x = self.drops(x)
        x = self.bn2(x)
        x = F.relu(self.lin2(x))
        x = self.drops(x)
        x = self.bn3(x)
        x = self.lin3(x)
        return x


model = ShelterOutcomeModel(embedding_sizes, 1)
model = model.to(device)

loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.to(device)

learning_rate = 1e-3
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=0.0)


def train(epoch):
    running_loss = 0.0
    # 返回了数据下标和数据
    for batch_idx, data in enumerate(train_dataloader, 0):
        # 送入两个张量，一个张量是64个图像的特征，一个张量图片对应的数字
        x1, x2, y = data
        # 把输入输出迁入GPU
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        # 梯度归零
        optimizer.zero_grad()
        # forward+backward+update
        outputs = model(x1, x2)
        # 计算损失，用的交叉熵损失函数
        loss = loss_func(outputs, y.long())
        # 反馈
        loss.backward()
        # 随机梯度下降更新
        optimizer.step()

        # 每300次输出一次
        running_loss += loss.item()
        if batch_idx % 5 == 0:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不会计算梯度
    with torch.no_grad():
        for data in valid_ds_dataloader:  # 拿数据
            x1, x2, y = data
            # 把输入输出迁入GPU
            x1, x2, y = x1.to(device), x2.to(device), y.to(device)
            outputs = model(x1, x2)  # 预测
            # outputs.data是一个矩阵，每一行10个量，最大值的下标就是预测值
            _, predicted = torch.max(outputs.data, dim=1)  # 沿着第一维度，找最大值的下标，返回最大值和下标
            total += y.size(0)  # labels.size(0)=64 每个都是64个元素，就可以计算总的元素
            # (predicted == labels).sum()这个是张量，而加了item()变为一个数字，即相等的数量
            correct += (predicted == y).sum().item()
    print('Accuracy on test set:%d %%' % (100 * correct / total))  # 正确的数量除以总数
    return 100 * correct / total


if __name__ == '__main__':
    total_accuracy = []
    for epoch in range(15):
        train(epoch)
        single_accuracy = test()
        total_accuracy.append(single_accuracy)
    figure = plt.figure(figsize=(8, 8))
    plt.title("ShelterAnimals")
    plt.xlabel("epoch")
    plt.ylabel("accuracy")
    plt.grid(visible=True)
    plt.plot(range(15), total_accuracy)
    plt.show()
