<a href="https://colab.research.google.com/github/f8sle/report/blob/main/report0418.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# 1. 데이터 로딩
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Social_Network_Ads.csv')

# 2. 데이터 전처리, 특성 추출
X = data[['Age', 'EstimatedSalary']].to_numpy()
y = data['Purchased'].to_numpy()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. 학습 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 4. 로지스틱 회귀 모델 구성
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def h(x1, x2, w0, w1, w2):
    z = w0 + w1 * x1 + w2 * x2
    return sigmoid(z)

def log_likelihood_single(X_b, y, w):
    h = sigmoid(np.dot(X_b, w))
    epsilon = 1e-15
    h = np.clip(h, epsilon, 1 - epsilon)
    loss = -np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    return loss

#5 경사하강법
def gradient_descent(X, y, learning_rate=0.01, n_iterations=1000):
    m, n = X.shape
    X_b = np.c_[np.ones((m, 1)), X]
    w = np.zeros(n + 1)
    loss_history = []

    for iteration in range(n_iterations):
        h = sigmoid(np.dot(X_b, w))
        gradient = (1/m) * np.dot(X_b.T, (h - y))
        w -= learning_rate * gradient



        loss = log_likelihood_single(X_b, y, w)
        loss_history.append(loss)

        if iteration % 100 == 0:
            print(f"반복 {iteration}, 손실: {loss:.6f}")

    return w, loss_history



#6 시각화
def plot_decision_boundary(X, y, w, title):
    plt.figure(figsize=(8, 6))


    x1_min, x1_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    x2_min, x2_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, 0.01), np.arange(x2_min, x2_max, 0.01))
    X_grid = np.c_[np.ones((xx1.ravel().shape[0], 1)), xx1.ravel(), xx2.ravel()]

    Z = sigmoid(np.dot(X_grid, w))
    Z = (Z > 0.5).astype(int).reshape(xx1.shape)


    plt.contour(xx1, xx2, Z, levels=[0.5], colors='red')


    plt.scatter(X[:, 0], X[:, 1], c=y,  edgecolors='k', cmap='coolwarm', s=30)

    plt.xlabel('Age')
    plt.ylabel('Estimated_Salary')
    plt.title(title)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.show()



if __name__ == "__main__":


    w, history  = gradient_descent(X_train, y_train, learning_rate=0.1, n_iterations=1000)
    print("\nfinal param:")
    print(f"gradient param: w0={w[0].item():.4f}, w1={w[1].item():.4f}, w2={w[2].item():.4f}")


    plot_decision_boundary(X_train, y_train, w, 'Logistic_Regression')
#7 scikit-learn
clf = LogisticRegression()
clf.fit(X_train, y_train)

print("\nscikit-learn param:")
print(f"w0={clf.intercept_[0]:.4f}")
print(f"w1={clf.coef_[0][0]:.4f}, w2={clf.coef_[0][1]:.4f}")

#8 시각화
w_sklearn = np.hstack([clf.intercept_, clf.coef_.flatten()])
plot_decision_boundary(X_train, y_train, w_sklearn, 'Logistic_Regression (scikit-learn)')
