In [None]:
# # TensorFlow 설치
# %pip install tensorflow

# # PyTorch 설치 (기본 CPU 버전)
# %pip install torch torchvision torchaudio

# PyTorch로 만든 모델을 scikit-learn처럼 편하게 학습/평가/튜닝할 때 사용
# %pip install skorch

#### 데이터 불러오기

In [None]:
# # 데이터 불러오기 1 : 기존 수치형 데이터
# import pandas as pd
# url = "C:\\Users\\Playdata2\\Downloads\\tree_model_preprocessed.csv"
# df = pd.read_csv(url)
# df.head()

In [None]:
# 데이터 불러오기 2 : 로그변환 수치형 데이터
import pandas as pd
url = "C:\\Users\\Playdata2\\Downloads\\re_log_model_preprocessed.csv"
df = pd.read_csv(url)
df.head()

In [None]:
df.count().sum()

In [None]:
df.shape

In [None]:
df['churn'].value_counts()

#### 레이어층 2, 뉴런조합별 성능 출력
```
64-32-1
32-16-1
16-8-1
```

#### 레이어층 3, 뉴런조합별 성능 출력
- 피라미드형
    ```
    128 -64 - 32 - 1
    64 - 32 - 16 - 1
    32 - 16 - 8 - 1
    ```
- 늘렸다 줄이기
- 줄였다 늘리기
```
과적합 일어날시 Drop out을 사용하지만
Drop out적용해서 성능 수치 올라가는 경우도 있다고 하니, Drop out사용하기
```

##### 피라미드 형

In [None]:
# 포함 사항
# PyTorch + skorch 기반
# GPU 자동 사용
# ColumnScaler 파이프라인
# GridSearchCV 탐색 항목
# layers (뉴런 수)
# activation (ReLU, Tanh)
# optimizer (Adam, SGD)
# lr (learning rate)
# weight_decay (규제 L2)
# batch_size
# max_epochs
# scaler (Standard, MinMax, Robust)
# criterion (BCELoss, BCEWithLogitsLoss)
# BCEWithLogitsLoss 사용 시 출력층 Sigmoid 제거
# 테스트 데이터 classification metrics 출력
# 최적 하이퍼파라미터 출력

In [None]:
# ======================================
# 필요 라이브러리
# ======================================
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np

# ======================================
# GPU 자동 선택
# ======================================
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# ======================================
# 데이터 준비
# ======================================
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.int64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ======================================
# PyTorch 모델 정의
# ======================================
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU, use_sigmoid=True):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))
        if use_sigmoid:  # BCEWithLogitsLoss를 쓰면 False
            layer_list.append(nn.Sigmoid())
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X)

# ======================================
# 스케일러 정의
# ======================================
class ColumnScaler(BaseEstimator, TransformerMixin):
    def __init__(self, scaler=StandardScaler()):
        self.scaler = scaler
    def fit(self, X, y=None):
        self.scaler.fit(X)
        return self
    def transform(self, X):
        return self.scaler.transform(X)

# ======================================
# skorch 래퍼
# ======================================
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=10,
    lr=0.001,
    batch_size=32,
    optimizer=optim.Adam,
    criterion=nn.BCELoss,          # 기본값, GridSearch에서 바꿀 수 있음
    module__use_sigmoid=True,      # BCEWithLogitsLoss일 경우 False
    iterator_train__shuffle=True,
    device=device,
    verbose=1,
)

# ======================================
# pipeline 구성
# ======================================
pipeline = Pipeline([
    ('scaler', ColumnScaler()),
    ('net', net)
])

# ======================================
# GridSearchCV 파라미터
# ======================================
param_grid = {
    'net__module__layers': [[128,64,32], [64,32,16], [32,16,8]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [32, 64, 128],
    'net__max_epochs': [10, 20, 30],
    'net__criterion': [nn.BCELoss, nn.BCEWithLogitsLoss],
    'net__module__use_sigmoid': [True, False],  # BCEWithLogitsLoss → False
    'scaler__scaler': [StandardScaler(), MinMaxScaler(), RobustScaler()]
}

# ======================================
# GridSearchCV 실행
# ======================================
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=2)
grid_result = grid.fit(X_train, y_train)

# ======================================
# 테스트 데이터 예측
# ======================================
y_pred = grid_result.predict(X_test)

# ======================================
# classification metrics
# ======================================
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# ======================================
# 최적 하이퍼파라미터 확인
# ======================================
print("Best Parameters Found:")
print(grid_result.best_params_)

##### 줄였다 늘리기

In [None]:
# ======================================
# 필요 라이브러리
# ======================================
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np

# ======================================
# GPU 자동 선택
# ======================================
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# ======================================
# 데이터 준비
# ======================================
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.int64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ======================================
# PyTorch 모델 정의
# ======================================
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU, use_sigmoid=True):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))
        if use_sigmoid:  # BCEWithLogitsLoss를 쓰면 False
            layer_list.append(nn.Sigmoid())
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X)

# ======================================
# 스케일러 정의
# ======================================
class ColumnScaler(BaseEstimator, TransformerMixin):
    def __init__(self, scaler=StandardScaler()):
        self.scaler = scaler
    def fit(self, X, y=None):
        self.scaler.fit(X)
        return self
    def transform(self, X):
        return self.scaler.transform(X)

# ======================================
# skorch 래퍼
# ======================================
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=10,
    lr=0.001,
    batch_size=32,
    optimizer=optim.Adam,
    criterion=nn.BCELoss,          # 기본값, GridSearch에서 바꿀 수 있음
    module__use_sigmoid=True,      # BCEWithLogitsLoss일 경우 False
    iterator_train__shuffle=True,
    device=device,
    verbose=1,
)

# ======================================
# pipeline 구성
# ======================================
pipeline = Pipeline([
    ('scaler', ColumnScaler()),
    ('net', net)
])

# ======================================
# GridSearchCV 파라미터
# ======================================
param_grid = {
    'net__module__layers': [[128,64,32], [64,32,16], [32,16,8]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [32, 64, 128],
    'net__max_epochs': [10, 20, 30],
    'net__criterion': [nn.BCELoss, nn.BCEWithLogitsLoss],
    'net__module__use_sigmoid': [True, False],  # BCEWithLogitsLoss → False
    'scaler__scaler': [StandardScaler(), MinMaxScaler(), RobustScaler()]
}

# ======================================
# GridSearchCV 실행
# ======================================
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=2)
grid_result = grid.fit(X_train, y_train)

# ======================================
# 테스트 데이터 예측
# ======================================
y_pred = grid_result.predict(X_test)

# ======================================
# classification metrics
# ======================================
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# ======================================
# 최적 하이퍼파라미터 확인
# ======================================
print("Best Parameters Found:")
print(grid_result.best_params_)