# Logit模型

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats, optimize
import pandas as pd
import statsmodels.api as sm # 使用 statsmodels 包实现 Logit 模型

In [2]:
# 设置随机种子以确保结果可重现
np.random.seed(42)

# 生成自变量：学习时间（小时）
n_samples = 50  # 样本量
study_hours_new = np.random.uniform(1, 8, n_samples)

# 生成线性预测值
true_beta0 = -3.5  # 真实截距
true_beta1 = 0.8   # 真实斜率
linear_pred = true_beta0 + true_beta1 * study_hours_new

# 计算通过概率
pass_prob = 1 / (1 + np.exp(-linear_pred))

# 生成二元因变量：是否通过考试
pass_exam_new = np.random.binomial(1, pass_prob) # 生成服从二项分布的随机数的函数

# 创建DataFrame
df_logit = pd.DataFrame({
    'study_hours': study_hours_new,
    'pass_exam': pass_exam_new,
    'pass_prob': pass_prob
})

df_logit.head()

Unnamed: 0,study_hours,pass_exam,pass_prob
0,3.621781,1,0.353755
1,7.655,1,0.93239
2,6.123958,0,0.802052
3,5.190609,0,0.657571
4,2.09213,0,0.13868


In [9]:
# 准备数据
X_new = sm.add_constant(study_hours_new)
y_new = pass_exam_new

# 拟合 Logit 模型
logit_model_new = sm.Logit(y_new, X_new)

# 使用不同方法拟合并比较结果
result_newton_new = logit_model_new.fit(method='newton', disp=0)
result_bfgs_new = logit_model_new.fit(method='bfgs', disp=0)

# 输出两种方法的结果比较
print("\n=== Newton方法结果 ===")
print(result_newton_new.summary())

print("\n=== BFGS方法结果 ===")
print(result_bfgs_new.summary())



=== Newton方法结果 ===
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                   50
Model:                          Logit   Df Residuals:                       48
Method:                           MLE   Df Model:                            1
Date:                Wed, 09 Apr 2025   Pseudo R-squ.:                  0.3385
Time:                        01:45:30   Log-Likelihood:                -22.501
converged:                       True   LL-Null:                       -34.015
Covariance Type:            nonrobust   LLR p-value:                 1.596e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -4.1170      1.109     -3.712      0.000      -6.291      -1.943
x1             0.8870      0.240      3.700      0.000       0.417       1.357

=== BFGS方法结果 ===
              

- **模型：**
  $$P(y_i=1|x_i) = \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}$$

- **似然函数：**
  $$L(\beta_0,\beta_1) = \prod_{i=1}^{50} \left[\frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}\right]^{y_i}\left[\frac{1}{1+e^{\beta_0+\beta_1 x_i}}\right]^{1-y_i}$$

- **对数似然：**
  $$\ln L = \sum_{i=1}^{50} \{y_i(\beta_0+\beta_1 x_i) - \ln(1+e^{\beta_0+\beta_1 x_i})\}$$

- **梯度（一阶导数）：**
  $$\frac{\partial \ln L}{\partial \beta_0} = \sum_{i=1}^{50} \left[y_i - \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}\right] = \sum_{i=1}^{50} (y_i - p_i)$$
  
  $$\frac{\partial \ln L}{\partial \beta_1} = \sum_{i=1}^{50} \left[y_i x_i - \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}x_i\right] = \sum_{i=1}^{50} (y_i - p_i)x_i$$

  其中 $p_i = \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}$ 是第 $i$ 个观测的预测概率。

In [6]:
# 优化的Logit模型对数似然函数（向量化实现）
def neg_log_likelihood_optimized(params, x, y):
    beta0, beta1 = params
    linear_pred = beta0 + beta1 * x
    
    # 直接按照对数似然公式计算: Σ[y_i(β₀+β₁x_i) - ln(1+e^(β₀+β₁x_i))]
    log_likelihood = np.sum(y * linear_pred - np.log(1 + np.exp(linear_pred)))
    # print(f"参数: beta0={beta0:.4f}, beta1={beta1:.4f}, 对数似然: {log_likelihood:.4f}")
    return -log_likelihood

# 计算梯度以加速优化
def neg_log_likelihood_grad(params, x, y):
    beta0, beta1 = params
    linear_pred = beta0 + beta1 * x
    p = 1 / (1 + np.exp(-linear_pred))  # 计算预测概率
    # 梯度计算
    grad_beta0 = -np.sum(y - p)
    grad_beta1 = -np.sum((y - p) * x)
    return np.array([grad_beta0, grad_beta1])

# 使用DataFrame中的数据
x = df_logit['study_hours'].values
y = df_logit['pass_exam'].values

# 初始猜测值
initial_guess = [0, 0]


result_optimized = optimize.minimize(
    neg_log_likelihood_optimized, 
    initial_guess, 
    args=(x, y),
    method='L-BFGS-B',
    jac=neg_log_likelihood_grad,
    options={'disp': True}
)

# 提取最优参数
beta0_opt, beta1_opt = result_optimized.x
print("\n优化方法最终结果:")
print(f"beta0 = {beta0_opt:.4f}")
print(f"beta1 = {beta1_opt:.4f}")
print(f"最大对数似然值 = {-result_optimized.fun:.4f}")
print(f"收敛状态: {result_optimized.success}")
print(f"迭代次数: {result_optimized.nit}")



优化方法最终结果:
beta0 = -4.1171
beta1 = 0.8870
最大对数似然值 = -22.5005
收敛状态: True
迭代次数: 12


- **梯度（一阶导数）：**
  $$\frac{\partial \ln L}{\partial \beta_0} = \sum_{i=1}^{50} \left[y_i - \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}\right] = \sum_{i=1}^{50} (y_i - p_i)$$
  
  $$\frac{\partial \ln L}{\partial \beta_1} = \sum_{i=1}^{50} \left[y_i x_i - \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}x_i\right] = \sum_{i=1}^{50} (y_i - p_i)x_i$$

  其中 $p_i = \frac{e^{\beta_0+\beta_1 x_i}}{1+e^{\beta_0+\beta_1 x_i}}$ 是第 $i$ 个观测的预测概率。

**Hessian矩阵（二阶导数）：**
  $$\frac{\partial^2 \ln L}{\partial \beta_0^2} = -\sum_{i=1}^{50} p_i(1-p_i)$$
  
  $$\frac{\partial^2 \ln L}{\partial \beta_0 \partial \beta_1} = -\sum_{i=1}^{50} p_i(1-p_i)x_i$$
  
  $$\frac{\partial^2 \ln L}{\partial \beta_1 \partial \beta_0} = -\sum_{i=1}^{50} p_i(1-p_i)x_i$$
  
  $$\frac{\partial^2 \ln L}{\partial \beta_1^2} = -\sum_{i=1}^{50} p_i(1-p_i)x_i^2$$

- **信息矩阵（Fisher Information Matrix）：**
  $$I(\beta) = -E\left[\frac{\partial^2 \ln L}{\partial \beta \partial \beta'}\right] = \begin{bmatrix} 
  \sum_{i=1}^{50} p_i(1-p_i) & \sum_{i=1}^{50} p_i(1-p_i)x_i \\
  \sum_{i=1}^{50} p_i(1-p_i)x_i & \sum_{i=1}^{50} p_i(1-p_i)x_i^2
  \end{bmatrix}$$

- **参数估计的协方差矩阵：**
  $$Var(\hat{\beta}) = I(\hat{\beta})^{-1}$$

- **标准误：**
  $$SE(\hat{\beta}_0) = \sqrt{[I(\hat{\beta})^{-1}]_{11}}$$
  
  $$SE(\hat{\beta}_1) = \sqrt{[I(\hat{\beta})^{-1}]_{22}}$$

  其中 $[I(\hat{\beta})^{-1}]_{jj}$ 表示信息矩阵逆矩阵的第 $j$ 个对角线元素。

In [12]:
# 计算标准误
def compute_standard_errors(params, x, y):
    beta0, beta1 = params
    linear_pred = beta0 + beta1 * x
    p = 1 / (1 + np.exp(-linear_pred))
    
    # 计算信息矩阵（Fisher Information Matrix）
    # 对于Logit模型，信息矩阵是 X'DX，其中D是对角矩阵，对角线元素为p_i(1-p_i)
    w = p * (1 - p)  # 权重
    
    # 构建设计矩阵X
    X = np.column_stack((np.ones_like(x), x))
    
    # 计算信息矩阵
    info_matrix = np.zeros((2, 2))
    for i in range(len(x)):
        x_i = X[i].reshape(-1, 1)  # 转为列向量
        info_matrix += w[i] * (x_i @ x_i.T)  # 外积
    
    # 计算协方差矩阵（信息矩阵的逆）
    cov_matrix = np.linalg.inv(info_matrix)
    
    # 标准误是协方差矩阵对角线元素的平方根
    se_beta0 = np.sqrt(cov_matrix[0, 0])
    se_beta1 = np.sqrt(cov_matrix[1, 1])
    
    return se_beta0, se_beta1, cov_matrix

# 计算最优参数的标准误
se_beta0, se_beta1, cov_matrix = compute_standard_errors([beta0_opt, beta1_opt], x, y)

# 输出结果
print("\n参数估计的标准误:")
print(f"beta0的标准误 = {se_beta0:.4f}")
print(f"beta1的标准误 = {se_beta1:.4f}")

# 计算z统计量和p值
z_beta0 = beta0_opt / se_beta0
z_beta1 = beta1_opt / se_beta1


# 创建结果汇总表
results_summary = pd.DataFrame({
    '参数': ['beta0', 'beta1'],
    '估计值': [beta0_opt, beta1_opt],
    '标准误': [se_beta0, se_beta1],
    'z值': [z_beta0, z_beta1],
})

# 设置浮点数显示格式为小数点后四位
pd.set_option('display.float_format', '{:.4f}'.format)

print("\n结果汇总表:")
results_summary


参数估计的标准误:
beta0的标准误 = 1.1090
beta1的标准误 = 0.2397

结果汇总表:


Unnamed: 0,参数,估计值,标准误,z值
0,beta0,-4.1171,1.109,-3.7123
1,beta1,0.887,0.2397,3.6998
