# Logistic regression 逻辑回归模型 

In [4]:
import torch

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

## 1.读取数据

In [6]:
data = pd.read_csv('credit-a.csv', header=None)

In [7]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0,30.83,0.000,0,0,9,0,1.25,0,0,1,1,0,202,0.0,-1
1,1,58.67,4.460,0,0,8,1,3.04,0,0,6,1,0,43,560.0,-1
2,1,24.50,0.500,0,0,8,1,1.50,0,1,0,1,0,280,824.0,-1
3,0,27.83,1.540,0,0,9,0,3.75,0,0,5,0,0,100,3.0,-1
4,0,20.17,5.625,0,0,9,0,1.71,0,1,0,1,2,120,0.0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
648,0,21.08,10.085,1,1,11,1,1.25,1,1,0,1,0,260,0.0,1
649,1,22.67,0.750,0,0,0,0,2.00,1,0,2,0,0,200,394.0,1
650,1,25.25,13.500,1,1,13,7,2.00,1,0,1,0,0,200,1.0,1
651,0,17.92,0.205,0,0,12,0,0.04,1,1,0,1,0,280,750.0,1


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 653 entries, 0 to 652
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       653 non-null    int64  
 1   1       653 non-null    float64
 2   2       653 non-null    float64
 3   3       653 non-null    int64  
 4   4       653 non-null    int64  
 5   5       653 non-null    int64  
 6   6       653 non-null    int64  
 7   7       653 non-null    float64
 8   8       653 non-null    int64  
 9   9       653 non-null    int64  
 10  10      653 non-null    int64  
 11  11      653 non-null    int64  
 12  12      653 non-null    int64  
 13  13      653 non-null    int64  
 14  14      653 non-null    float64
 15  15      653 non-null    int64  
dtypes: float64(4), int64(12)
memory usage: 81.8 KB


In [9]:
X = data.iloc[:, :-1]  # 取出特征值，iloc[第x1行:第x2行, 第y1列:第y2列]

In [10]:
Y = data.iloc[:, -1]  # 取出目标值

In [11]:
Y = Y.replace(-1, 0)  # 把-1替换为0

## 2.数据预处理

In [12]:
X = torch.from_numpy(X.values.astype(np.float32))

In [13]:
Y = torch.from_numpy(Y.values.reshape(-1, 1).astype(np.float32))

In [14]:
X.shape

torch.Size([653, 15])

In [15]:
Y.shape

torch.Size([653, 1])

## 3.初始化模型、损失函数、目标函数

In [16]:
from torch import nn

In [17]:
model = nn.Sequential(
    nn.Linear(15, 1),  # 线性层，输入特征数15，输出数1
    nn.Sigmoid() # Sigmoid激活层
)

<img src="https://ss1.bdstatic.com/70cFvXSh_Q1YnxGkpoWK1HF6hhy/it/u=1883846040,893574025&fm=15&gp=0.jpg", width="50%">
<img src="https://5b0988e595225.cdn.sohucs.com/images/20181019/a4fe1ff6079142908d7ec4b97fbfb01c.jpeg", width="30%">
$$Sigmoid函数$$

In [18]:
loss_fn = nn.BCELoss()  # 二元交叉熵损失函数

In [19]:
opt = torch.optim.Adam(model.parameters(), lr=0.0001)

## 4.训练模型

In [20]:
batches = 16
num_of_batch = 653//16

In [27]:
epoches = 1000

In [28]:
for epoch in range(epoches):
    for batch in range(num_of_batch):
        start = batches*batch  # 起始序号
        end = start + batches  # 结束序号
        x = X[start: end]
        y = Y[start: end]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()

In [29]:
# 模型状态
model.state_dict()

OrderedDict([('0.weight',
              tensor([[-1.7034e-01, -1.1006e-02,  5.5152e-03,  1.8256e-01,  1.9852e-01,
                       -1.4826e-02,  1.7177e-01, -1.3228e-01,  3.1783e+00,  3.3447e-01,
                       -1.5465e-01, -2.4882e-01, -7.4810e-02,  8.1937e-04, -3.3655e-04]])),
             ('0.bias', tensor([-0.1647]))])

## 5.检查结果

In [30]:
# 正确率
((model(X).data.numpy() > 0.5).astype("int") == Y.numpy()).mean()

0.8728943338437979