# 用pytorch写一个逻辑回归模型 - 二分类
模型结果如下：
![逻辑回归模型](imgs/逻辑回归模型.jpg)

In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt

## 加载数据集

In [2]:
df = pd.read_csv("../datasets/二分类数据集/data.csv")
df.head()

Unnamed: 0,1.0000,2.6487,4.5192
0,1.0,1.5438,2.4443
1,1.0,1.899,4.2409
2,1.0,2.4711,5.8097
3,1.0,3.359,6.4423
4,1.0,3.2406,5.8097


In [3]:
X, y = torch.tensor(df.values[:, 1:], dtype=torch.float32), torch.tensor(df.values[:, 0], dtype=torch.float32)

In [None]:
X.shape, y.shape

(torch.Size([99, 2]), torch.Size([99]))

## 定义模型

In [None]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.lr = nn.Linear(2, 1)
        self.sm = nn.Sigmoid()
    
    def forward(self, x):
        x = self.lr(x)
        x = self.sm(x)
        return x

In [None]:
logistic_model = LogisticRegression()
writer = SummaryWriter(log_dir="./lr_ckps/")

if torch.cuda.is_available():
    logistic_model.cuda()
    
# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(logistic_model.parameters(), lr=1e-3, momentum=0.9)

for epoch in range(10000):
    if torch.cuda.is_available():
        x_data = Variable(X).cuda()
        y_data = Variable(y).cuda()
    else:
        x_data = Variable(X)
        y_data = Variable(y)
        
    out = logistic_model(x_data)
    loss = criterion(out, y_data)
    print_loss = loss.data.item()
    mask = out.ge(0.5).float()  # 以0.5为阈值进行分类
    correct = (mask == y_data).sum()  # 计算正确预测的样本个数x
    acc = correct.item() / x_data.size(0)  # 计算精度
    optimizer.zero_grad()  # 将梯度更换为0
    loss.backward()  # 反向传播计算梯度
    optimizer.step()  # 更新梯度
    # 每隔20轮打印一下当前的误差和精度
    if (epoch + 1) % 20 == 0:
        print('*'*10)
        print('epoch {}'.format(epoch+1)) # 训练轮数
        print('loss is {:.4f}'.format(print_loss))  # 误差
        print('acc is {:.4f}'.format(acc))  # 精度
        writer.add_scalar("data/loss", print_loss, epoch)
        writer.add_scalar("data/acc", acc, epoch)
        
# 结果可视化
w0, w1 = logistic_model.lr.weight[0]
w0 = float(w0.item())
w1 = float(w1.item())
b = float(logistic_model.lr.bias.item())
plot_x = np.arange(-7, 7, 0.1)
plot_y = (-w0 * plot_x - b) / w1
plt.scatter(X.data.numpy()[:, 0], X.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.plot(plot_x, plot_y)
plt.show()
writer.close()

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


**********
epoch 20
loss is 0.4194
acc is 49.2828
**********
epoch 40
loss is 0.3461
acc is 49.5657
**********
epoch 60
loss is 0.3040
acc is 49.5051
**********
epoch 80
loss is 0.2866
acc is 49.5051
**********
epoch 100
loss is 0.2713
acc is 49.5051
**********
epoch 120
loss is 0.2584
acc is 49.5051
**********
epoch 140
loss is 0.2470
acc is 49.5051
**********
epoch 160
loss is 0.2370
acc is 49.5051
**********
epoch 180
loss is 0.2280
acc is 49.5051
**********
epoch 200
loss is 0.2200
acc is 49.5051
**********
epoch 220
loss is 0.2127
acc is 49.5051
**********
epoch 240
loss is 0.2060
acc is 49.5051
**********
epoch 260
loss is 0.2000
acc is 49.5051
**********
epoch 280
loss is 0.1944
acc is 49.5051
**********
epoch 300
loss is 0.1892
acc is 49.5051
**********
epoch 320
loss is 0.1845
acc is 49.5051
**********
epoch 340
loss is 0.1800
acc is 49.5051
**********
epoch 360
loss is 0.1759
acc is 49.5051
**********
epoch 380
loss is 0.1720
acc is 49.5051
**********
epoch 400
loss is 0.1684

**********
epoch 3220
loss is 0.0644
acc is 49.5152
**********
epoch 3240
loss is 0.0642
acc is 49.5152
**********
epoch 3260
loss is 0.0640
acc is 49.5152
**********