# LOGISTIC REGRESSION

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(2)

X = np.array([[0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 
              2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]])
y = np.array([[0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]])

# Tinh chỉnh dữ liệu 
X = np.concatenate((np.ones((1, X.shape[1])), X), axis = 0).T
y = y.reshape(20,1)

Tạo hàm tính sigmoid và hàm mất mát

In [None]:
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

def cost_func(X, y, w):
  m = X.shape[0] 
  h = sigmoid(X @ w)
  cost = -(1/m) * (y.T @ np.log(h) + (1 - y).T @ np.log(1 - h))
  return cost[0, 0]

Tiến hành training

In [None]:
epochs = 350
eta = 0.05
w = np.random.randn(2, 1)
for i in range(epochs):
  w += eta * X.T @ (y - sigmoid(X@w))
  total_cost = cost_func(X, y, w)
  if i % 10 == 0:
    print(total_cost)

print("Finished!")

In ra dự đoán

In [62]:
print(sigmoid(X@w))

Vẽ biểu đồ hiển thị kết quả

In [63]:
# Plotting the logistic regression results
plt.figure(figsize=(10, 6))

# Extract original x values (without bias term)
x_original = X[:, 1]  # Second column contains the original x values

# Create a range of x values for plotting the sigmoid curve
x_range = np.linspace(0, 6, 100)
X_range = np.column_stack([np.ones(100), x_range])  # Add bias term
y_pred_range = sigmoid(X_range @ w)

# Plot the training data points
plt.scatter(x_original[y.flatten() == 0], y.flatten()[y.flatten() == 0], 
           color='red', marker='o', s=100, label='Class 0', alpha=0.7)
plt.scatter(x_original[y.flatten() == 1], y.flatten()[y.flatten() == 1], 
           color='blue', marker='s', s=100, label='Class 1', alpha=0.7)

# Plot the sigmoid curve
plt.plot(x_range, y_pred_range, 'green', linewidth=2, label='Logistic Regression')

# Add decision boundary (where probability = 0.5)
decision_boundary_x = -w[0] / w[1]  # x value where w0 + w1*x = 0
plt.axvline(x=decision_boundary_x, color='orange', linestyle='--', 
           linewidth=2, label=f'Decision Boundary (x={decision_boundary_x[0]:.2f})')

# Add horizontal line at y=0.5
plt.axhline(y=0.5, color='gray', linestyle=':', alpha=0.5)

# Labels and formatting
plt.xlabel('X', fontsize=12)
plt.ylabel('Probability', fontsize=12)
plt.title('Logistic Regression Results', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(-0.1, 1.1)
plt.xlim(0, 6)

# Add text with final weights
plt.text(0.5, 0.8, f'Final weights:\nw0 = {w[0][0]:.3f}\nw1 = {w[1][0]:.3f}', 
         bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue", alpha=0.7))

plt.tight_layout()
plt.show()