# Logistic Regression

In [1]:
# 导入
import sys

sys.path.append("E:/dataFiles/github/MFlow")

In [2]:
# 数据生成
import numpy as np

# 生成男性数据
male = {
    "height": np.random.normal(171, 6, 500),  # 身高
    "weight": np.random.normal(70, 10, 500),  # 体重
    "bfr": np.random.normal(16, 2, 500),  # 体脂率
    "label": [1] * 500  # 标签
}

# 生成女性数据
female = {
    "height": np.random.normal(158, 5, 500),
    "weight": np.random.normal(57, 8, 500),
    "bfr": np.random.normal(22, 2, 500),
    "label": [-1] * 500
}

# 训练数据
train_data = np.array([
    np.concatenate((male["height"], female["height"])),
    np.concatenate((male["weight"], female["weight"])),
    np.concatenate((male["bfr"], female["bfr"])),
    np.concatenate((male["label"], female["label"]))
]).T
np.random.shuffle(train_data)  # 打乱数据

print(train_data.shape)
print(train_data)

(1000, 4)
[[164.26310835  55.93403323  21.15938099  -1.        ]
 [167.9436747   65.03795909  16.83634029   1.        ]
 [174.96691709  75.2623049   18.12819751   1.        ]
 ...
 [160.19576109  59.49484938  21.38799803  -1.        ]
 [177.08489204  66.9367337   10.30163415   1.        ]
 [162.64241359  54.22057884  20.37421834  -1.        ]]


In [3]:
# 训练
from mflow import core, ops, opts

# 超参数
lr = 0.0001
epoch = 50
batch_size = 16

with core.NameScope("LogisticRegression"):
    # 初始化变量
    x = core.Variable(size=(3, 1), trainable=False)
    y = core.Variable(size=(1, 1), trainable=False)
    w = core.Variable(size=(1, 3), trainable=True)
    b = core.Variable(size=(1, 1), trainable=True)
    # 模型定义
    pred = ops.Add(ops.MatMal(w, x), b)
    predicter = ops.Logistic(pred)
    loss = ops.loss.LogLoss(ops.Multiply(y, pred))
    adam = opts.Adam(core.DefaultGraph, loss, lr)
    # 开始训练
    for ep in range(1, epoch + 1):
        bs_idx = 0  # 批次计数
        # 这是一个epoch的过程
        for i, data in enumerate(train_data):
            x.setValue(np.mat(data[:-1]).T)
            y.setValue(np.mat(data[-1]))
            adam.step()
            bs_idx += 1
            if bs_idx == batch_size:
                adam.update()
                bs_idx = 0
        # 一个epoch完成后进行评估
        preds = []
        for data in train_data:
            x.setValue(np.mat(data[:-1]).T) 
            predicter.forward()
            preds.append(predicter.value[0, 0])  # 结果
        preds = (np.array(preds) > 0.5).astype("int") * 2 - 1  # 0/1转为-1/1
        acc = (train_data[:, -1] == preds).astype("uint8").sum() / len(train_data)
        print("Epoch: {:d}, acc: {:.3f}.".format(ep, acc))

Epoch: 1, acc: 0.500.
Epoch: 2, acc: 0.508.
Epoch: 3, acc: 0.567.
Epoch: 4, acc: 0.646.
Epoch: 5, acc: 0.710.
Epoch: 6, acc: 0.762.
Epoch: 7, acc: 0.798.
Epoch: 8, acc: 0.822.
Epoch: 9, acc: 0.846.
Epoch: 10, acc: 0.861.
Epoch: 11, acc: 0.869.
Epoch: 12, acc: 0.880.
Epoch: 13, acc: 0.885.
Epoch: 14, acc: 0.888.
Epoch: 15, acc: 0.891.
Epoch: 16, acc: 0.897.
Epoch: 17, acc: 0.900.
Epoch: 18, acc: 0.900.
Epoch: 19, acc: 0.905.
Epoch: 20, acc: 0.906.
Epoch: 21, acc: 0.905.
Epoch: 22, acc: 0.910.
Epoch: 23, acc: 0.911.
Epoch: 24, acc: 0.913.
Epoch: 25, acc: 0.914.
Epoch: 26, acc: 0.913.
Epoch: 27, acc: 0.916.
Epoch: 28, acc: 0.916.
Epoch: 29, acc: 0.920.
Epoch: 30, acc: 0.921.
Epoch: 31, acc: 0.921.
Epoch: 32, acc: 0.922.
Epoch: 33, acc: 0.925.
Epoch: 34, acc: 0.926.
Epoch: 35, acc: 0.928.
Epoch: 36, acc: 0.928.
Epoch: 37, acc: 0.929.
Epoch: 38, acc: 0.929.
Epoch: 39, acc: 0.929.
Epoch: 40, acc: 0.929.
Epoch: 41, acc: 0.929.
Epoch: 42, acc: 0.930.
Epoch: 43, acc: 0.932.
Epoch: 44, acc: 0.93