## pytorch NN 预测

In [1]:
import torch
torch.__version__

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

#划分特征/标签
labels = np.array(dff['USEFULBOTECOUNT'])
features = dff.drop('USEFULVOTECOUNT',axis=1)
features = np.array(features)

#特征标准化
from sklearn.preprocessing import StandardScaler
input_features = StandardScaler.fit_transform(features)

#构建网络模型
x = torch.tensor(input_features, dtype=float)
y = torch.tensor(labels, dtype=float)

#权重参数初始化
weights = torch.randn((14,128), dtype=float, requires_grad=True)
biases = torch.randn(128, dtype=float, requires_grad=True)
weights2 = torch.randn((128,1), dtype=float, requires_grad=True)
biases2 = torch.randn(1, dtype=float, requires_grad=True)

learning_rate = 0.01
losses = []

for i in range(1000):
    #第一层隐藏层
    hidden = x.mm(weights)+biases    # 'mm' 矩阵乘法
    #激活函数
    hidden = torch.relu(hidden)
    #output
    predictions = hidden.mm(weights2)+biases2
    
    #定义损失函数（MSE）
    loss = torch.mean((y-predictions)**2)
    losses.append(loss.data.numpy())
    
    #打印损失
    if (i % 100)==0:
        print('loss: '+loss)
        
    #计算反向传播
    loss.backward()  #计算出梯度值
    
    #更新参数
    weights.data.add_(-learning_rate * weights.grad.data)
    biases.data.add_(-learning_rate * biases.grad.data)
    weights2.data.add_(-learning_rate * weights2.grad.data)
    biases2.data.add_(-learning_rate * biases2.grad.data)
    
    #每次迭代清空梯度，否则会叠加
    weights.grad.data.zero_
    biases.grad.data.zero_
    weights2.grad.data.zero_
    biases2.grad.data.zero_

## pytorch简化代码训练网络模型

In [None]:
input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16
my_nn = torch.nn.Sequential(
    torch.nn.Linear(input_size, hidden_size),
    torch.nn.Sigmoid(),
    torch.nn.Linear(hidden_size, output_size)
)
cost = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)

#训练网络
losses = []
for i in range(1000):
    batch_loss = []
    
    # mini-batch 训练方法   mini-batch/full-batch
    for start in range(0,len(input_features),batch_size):
        end = start + batch_size if (start + batch_size) < len(input_features) else len(input_features)
        xx = torch.tensor(input_features[start:end], dtype=float, requires_grad=True)
        yy = torch.tendor(labels[start:end], dtype=float, requires_grad=True)
        prediction = my_nn(xx)
        loss = cost(prediction, yy)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        batch_loss.append(loss.data.numpy())
    #打印损失
    if i%100==0:
        losses.append(np.mean(batch_loss))
        print(i, np.mean(batch_loss))

---
## pytorch分类任务

In [None]:
import torch

In [None]:
#训练集、测试集数据转换成 tensor格式
x_train, x_test, y_train, y_test = map(torch.tensor, (x_train, x_test, y_train, y_test))

#### torch.nn.functional  /   torch.nn.module

In [2]:
import torch.nn.functional as F   #用于调用loss function / activation function
loss_func = F.cross_entropy

## 创建model
- 必须继承nn.Module，且在其构造函数中必须调用nn.Module的构造函数
- 无需写反向传播函数，nn.Module能通过autograd自动实现反向传播
- module中的可学习参数可以通过named_parameters()或parameters()返回迭代器

In [4]:
from torch import nn

class my_NN(nn.Module):  #继承nn.module
    def __init__(self):
        super().__init__()  #调用nn.module的构造函数
        self.hidden1 = nn.Linear(784,128)  #定义model中包含哪些层
        self.hidden2 = nn.Linear(128,256)
        self.out = nn.Linear(256,10)
    
    def forward(self,x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.out(x)
        return x

In [5]:
net = my_NN()  #权重已经初始化
print(net)

my_NN(
  (hidden1): Linear(in_features=784, out_features=128, bias=True)
  (hidden2): Linear(in_features=128, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)


In [9]:
for name, parameter in net.named_parameters():
    print(name,'\n',parameter,'\n',parameter.size())

hidden1.weight 
 Parameter containing:
tensor([[-0.0056,  0.0296, -0.0335,  ...,  0.0202, -0.0137,  0.0168],
        [ 0.0125,  0.0133, -0.0286,  ...,  0.0050,  0.0168,  0.0008],
        [ 0.0147, -0.0203, -0.0082,  ..., -0.0263,  0.0242, -0.0193],
        ...,
        [-0.0062, -0.0166, -0.0218,  ..., -0.0331, -0.0214, -0.0257],
        [-0.0134, -0.0304,  0.0166,  ...,  0.0029,  0.0006,  0.0099],
        [-0.0024,  0.0241, -0.0030,  ..., -0.0234, -0.0107, -0.0136]],
       requires_grad=True) 
 torch.Size([128, 784])
hidden1.bias 
 Parameter containing:
tensor([-0.0100,  0.0046,  0.0219, -0.0171,  0.0312,  0.0146, -0.0134,  0.0352,
        -0.0342,  0.0211,  0.0323,  0.0349,  0.0006,  0.0222,  0.0188, -0.0019,
         0.0265,  0.0149,  0.0169,  0.0187, -0.0110,  0.0289,  0.0291,  0.0285,
        -0.0120, -0.0062, -0.0140, -0.0206,  0.0301,  0.0059,  0.0067,  0.0280,
        -0.0321,  0.0344,  0.0232,  0.0120,  0.0340,  0.0198, -0.0153, -0.0171,
         0.0260, -0.0073,  0.0054, -0.

## 使用TensorDataset和DataLoader来优化

In [None]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)

def get_data(train_dataset, test_dataset, bs):
    return (DataLoader(train_dataset, batch_size=bs, shuffle=True), DataLoader(test_dataset, batch_size=bs*2))

In [None]:
from torch import optim 
def get_model():
    model = my_NN()
    return model, optim.SGD(model.parameters(),lr=0.001)

In [None]:
import numpy as np

def fit(steps, model, loss_func, opt, train_dl, test_dl):
    for step in range(steps):
        model.train()
        for xb,yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

In [None]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

## CNN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms   #数据预处理    data augmentation
from torchvision import datasets
from torchvision import models
import numpy as np

In [2]:
#定义超参数
input_size = 28   #图像总尺寸（28*28*1）
num_classes = 10   #标签种类数
num_epochs = 3   #训练总循环周期
batch_size = 64   

#训练集、测试集
train_datasets = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_datasets = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

#构建batch数据
train_loader = torch.utils.data.DataLoader(dataset=train_datasets, batch_size=batch_size, shuffle=True)  #把x,y都分开了，（data，target） 生成器generator（迭代器）
test_loader = torch.utils.data.DataLoader(dataset=test_datasets, batch_size=batch_size, shuffle=True)

In [None]:
for (data,target) in train_loader:
    print(type(data))

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()          #输入（28*28*1）
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,         
                kernel_size=5,
                stride=1,
                padding=2
            ),                         #经过第一层conv，（28-5+2*2）/1+1=28，（28*28*16）
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)   #maxpooling，（14*14*16）
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16,32,5,1,2),       #经过第二层conv，（14-5+2*2）/1+1=14，（14*14*32）
            nn.ReLU(),
            nn.MaxPool2d(2)               #maxpooling，（7*7*32）
        )                        
        self.out = nn.Linear(32*7*7, 10)
    
    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0),-1)   # flatten操作, view()类似reshape(), 重构tensor, 结果为（batch_size, 7*7*32）
        output = self.out(x)
        return output

In [4]:
#准确率作为评估标准
def accuracy(predictions, labels):
    pred = torch.max(predictions.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)

In [None]:
for data,target in train_loader:
    print(data,end=',')

In [11]:
net = CNN()
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)  #优化器，普通随机梯度下降，Adam

#训练循环
for epoch in range(num_epochs):
    #保留当前epoch结果
    train_rights = []
    
    for data,target in train_loader:   #data -> x ; target -> y
        net.train()
        output = net(data)
        
        loss = loss_func(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        right = accuracy(output, target)
        train_rights.append(right)

In [12]:
train_rights

[(tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(62), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(61), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(62), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(62), 64),
 (tensor(62), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(62), 64),
 (tensor(62), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(62), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(62), 64),
 (tensor(62), 64),
 (tensor(63), 64),
 (tensor(64), 64),
 (tensor(64), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(63), 64),
 (tensor(63)

In [7]:
net = CNN()
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)  #优化器，普通随机梯度下降，Adam

#训练循环
for epoch in range(num_epochs):
    #保留当前epoch结果
    train_rights = []
    
    for batch_index, (data,target) in enumerate(train_loader):   #data -> x ; target -> y
        net.train()
        output = net(data)
        
        loss = loss_func(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        right = accuracy(output, target)
        train_rights.append(right)
        
        if (batch_index % 100)==0:
            net.eval()
            val_rights = []
            
            for (data,target) in enumerate(test_loader):
                output = net(data)
                right = accuracy(output,target)
                val_rights.append(right)        

TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not int

In [16]:
print(len(train_datasets),len(test_datasets))

60000 10000


---
## 数据预处理
- data_transforms中指定了所有图像预处理操作
- data augmentation 数据增强

In [21]:
data_transforms = {
    'train': transforms.Compose([transforms.RandomRotation(45),  #随即旋转，-45~45°之间
        transforms.CenterCrop(224),  #从中心开始裁剪
        transforms.RandomHorizontalFlip(p=0.5),  #随机水平翻转，选择一个概率
        transforms.RandomVerticalFlip(p=0.5),  #随机垂直翻转
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),  #亮度，对比度，饱和度，色相
        transforms.RandomGrayscale(p=0.025),  
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])  #均值，标准差
    ]),
    'valid': transforms.Compose([transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ])    
}

In [22]:
data_transforms

{'train': Compose(
     RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
     CenterCrop(size=(224, 224))
     RandomHorizontalFlip(p=0.5)
     RandomVerticalFlip(p=0.5)
     ColorJitter(brightness=[0.8, 1.2], contrast=[0.9, 1.1], saturation=[0.9, 1.1], hue=[-0.1, 0.1])
     RandomGrayscale(p=0.025)
     ToTensor()
     Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ),
 'valid': Compose(
     Resize(size=256, interpolation=bilinear)
     CenterCrop(size=(224, 224))
     ToTensor()
     Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 )}

## 加载resnet预训练网络

In [25]:
from torchvision import models
model = models.resnet152()
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# 使用 CPU / GPU 进行训练

In [2]:
feature_extract = True

In [6]:
def initialize_model(model_name, num_classes, feature_extract):   # featrue_extract -> 是否要冻结预训练模型的哪些层true/false
    model_ft = None
    input_size = 0
    
    if model_name == 'resnet':
        model_ft = models.resnet152(pretrained = True)
        set_parameters_requires_grad(model_ft, feature_extract)   #冻结预训练模型
        num_ftrs = model_ft.fc.in_features  
        model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes), nn.LogSoftmax(dim=1))   #重构全连接层
        input_size = 224
    
    return model_ft, input_size

In [7]:
def set_parameters_requires_grad(model, feature_extracting):  #迁移学习， 选择冻结预训练模型的哪些层， feature_extracting = True/False
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

### 模型初始化，显示哪些层需要训练，params_to_update（需要训练的参数）

In [10]:
model_ft, input_size = initialize_model('resnet', 102, feature_extract)

#GPU计算
model_ft = model_ft.to(device)  #device

#模型保存
filename = 'checkpoint.pth'

#是否训练所有层
params_to_update = model_ft.parameters()
print('Params to learn: ')
if feature_extract: 
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print('\t', name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print('\t', name)

Params to learn: 
	 fc.0.weight
	 fc.0.bias


In [11]:
model_ft

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### 优化器设置

In [None]:
optimizer_ft = optim.Adam(params_to_update, lr=1e-2)


### 训练模型

In [None]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=25, is_inception=False, filename=filename):
    