In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 1. Загрузка и подготовка данных (из предыдущего кода)
df = pd.read_csv('data.csv')

# Удаление лишних целевых признаков
target_column = 'gb_60_ever'
to_drop = ['gb_90_ever', 'gb_cum_slq90']
df.drop(columns=to_drop, inplace=True, errors='ignore')

# Очистка данных
columns_to_drop = ['customer_id', 'application_id', 'account_id', 'application_date', 'date_loan_granted', 'date_of_birth']
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna('unknown', inplace=True)

# Удаление признаков с высокой корреляцией
corr_matrix = df.corr().abs()
correlation_threshold = 0.8
upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
high_corr_features = [column for column in upper_triangle.columns if any(upper_triangle[column] > correlation_threshold)]
df.drop(columns=high_corr_features, inplace=True, errors='ignore')

# Разделение на X и y
X = df.drop(columns=[target_column])
y = df[target_column]

# Применение Target Encoding
categorical_columns = X.select_dtypes(include=['object']).columns
target_encoder = TargetEncoder(cols=categorical_columns)
X[categorical_columns] = target_encoder.fit_transform(X[categorical_columns], y)

# Масштабирование
numeric_columns = X.select_dtypes(include=['number']).columns
columns_to_scale = ['loan_amount', 'interest_rate', 'instalment_or_min_instalment_amount', 'age']
scaler = StandardScaler()
X[columns_to_scale] = scaler.fit_transform(X[columns_to_scale])

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Преобразование в тензоры
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Создаем DataLoader для мини-батчей
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
# Определение модели
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# Инициализация модели
input_size = X_train.shape[1]
model = SimpleNN(input_size)


In [None]:
# Определение оптимизатора и функции потерь
criterion = nn.BCELoss()  # Бинарная кросс-энтропия
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Обучение модели
epochs = 20
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Эпоха {epoch+1}/{epochs}, Потеря: {epoch_loss/len(train_loader):.4f}")


In [None]:
# Оценка модели
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred_labels = (y_pred > 0.5).float()
    accuracy = (y_pred_labels.view(-1) == y_test_tensor.view(-1)).float().mean()
    print(f"Точность на тестовой выборке: {accuracy:.4f}")
