In [48]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, roc_auc_score

In [82]:
all_train_pgs = pd.read_csv("./pgs_results_calculated/all_train_pgs.csv")
validation_pgs = pd.read_csv("./pgs_results_calculated/validation_pgs.csv")

In [83]:
X_train = all_train_pgs.iloc[:, 2:-1].to_numpy()
y_train = (all_train_pgs["y"] - 1).to_numpy()
X_val = validation_pgs.iloc[:, 2:-1].to_numpy()
y_val = (validation_pgs["y"] - 1).to_numpy()

In [115]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)

batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
hiden_size = 2000

class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hiden_size),
            nn.ReLU(),
            nn.BatchNorm1d(hiden_size),
            nn.Dropout(0.3),
            
            nn.Linear(hiden_size, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.model(x)

input_size = X_train.shape[1]  # 73 признака
model = BinaryClassifier(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 400
history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_predictions = []
    train_targets = []
    
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_predictions.extend(outputs.detach().numpy())
        train_targets.extend(targets.numpy())
    
    train_loss /= len(train_loader)
    train_preds = np.array(train_predictions) >= 0.5
    train_accuracy = roc_auc_score(train_targets, train_predictions)
    
    model.eval()
    val_loss = 0
    val_predictions = []
    val_targets = []
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            val_loss += loss.item()
            val_predictions.extend(outputs.numpy())
            val_targets.extend(targets.numpy())
    
    val_loss /= len(val_loader)
    val_preds = np.array(val_predictions) >= 0.5
    val_accuracy = roc_auc_score(val_targets, val_predictions)
    
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_acc'].append(train_accuracy)
    history['val_acc'].append(val_accuracy)
    
    print(f'Эпоха {epoch+1}/{num_epochs}, Потери: {train_loss:.4f}, roc-auc: {train_accuracy:.4f}, '
          f'Вал. потери: {val_loss:.4f}, Вал. roc-auc: {val_accuracy:.4f}')

model.eval()
with torch.no_grad():
    val_outputs = model(X_val_tensor)
    val_preds = (val_outputs >= 0.5).numpy().flatten()
    accuracy = accuracy_score(y_val, val_preds)
    try:
        auc = roc_auc_score(y_val, val_outputs.numpy())
        print(f'Итоговая точность: {accuracy:.4f}, AUC: {auc:.4f}')
    except:
        print(f'Итоговая точность: {accuracy:.4f}')
print(np.mean(history['val_acc'][-10:]))


Эпоха 1/400, Потери: 0.6690, roc-auc: 0.5734, Вал. потери: 0.6381, Вал. roc-auc: 0.4273
Эпоха 2/400, Потери: 0.6503, roc-auc: 0.5870, Вал. потери: 0.6361, Вал. roc-auc: 0.5943
Эпоха 3/400, Потери: 0.6152, roc-auc: 0.6294, Вал. потери: 0.6444, Вал. roc-auc: 0.5133
Эпоха 4/400, Потери: 0.6146, roc-auc: 0.6279, Вал. потери: 0.7113, Вал. roc-auc: 0.5918
Эпоха 5/400, Потери: 0.6181, roc-auc: 0.6400, Вал. потери: 0.6497, Вал. roc-auc: 0.5905
Эпоха 6/400, Потери: 0.6017, roc-auc: 0.6611, Вал. потери: 0.6328, Вал. roc-auc: 0.5978
Эпоха 7/400, Потери: 0.6034, roc-auc: 0.6617, Вал. потери: 0.6448, Вал. roc-auc: 0.5813
Эпоха 8/400, Потери: 0.6058, roc-auc: 0.6573, Вал. потери: 0.6313, Вал. roc-auc: 0.5995
Эпоха 9/400, Потери: 0.6012, roc-auc: 0.6641, Вал. потери: 0.8076, Вал. roc-auc: 0.5595
Эпоха 10/400, Потери: 0.6091, roc-auc: 0.6454, Вал. потери: 0.6435, Вал. roc-auc: 0.6035
Эпоха 11/400, Потери: 0.6048, roc-auc: 0.6581, Вал. потери: 0.6319, Вал. roc-auc: 0.5855
Эпоха 12/400, Потери: 0.6142, 

In [116]:
rocs = []
for i in range(1, 6):
    all_train_pgs = pd.read_csv(f"./pgs_results_calculated/train_{i}_pgs.csv")
    validation_pgs = pd.read_csv(f"./pgs_results_calculated/test_{i}_pgs.csv")
    X_train = all_train_pgs.iloc[:, 2:-1].to_numpy()
    y_train = (all_train_pgs["y"] - 1).to_numpy()
    X_val = validation_pgs.iloc[:, 2:-1].to_numpy()
    y_val = (validation_pgs["y"] - 1).to_numpy()

    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)

    batch_size = 32
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    hiden_size = 2000

    class BinaryClassifier(nn.Module):
        def __init__(self, input_size):
            super(BinaryClassifier, self).__init__()
            self.model = nn.Sequential(
                nn.Linear(input_size, hiden_size),
                nn.ReLU(),
                nn.BatchNorm1d(hiden_size),
                nn.Dropout(0.3),
                
                nn.Linear(hiden_size, 1),
                nn.Sigmoid()
            )
            
        def forward(self, x):
            return self.model(x)

    input_size = X_train.shape[1]
    model = BinaryClassifier(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 400
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_predictions = []
        train_targets = []
        
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_predictions.extend(outputs.detach().numpy())
            train_targets.extend(targets.numpy())
        
        train_loss /= len(train_loader)
        train_preds = np.array(train_predictions) >= 0.5
        train_accuracy = roc_auc_score(train_targets, train_predictions)
        
        model.eval()
        val_loss = 0
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                val_predictions.extend(outputs.numpy())
                val_targets.extend(targets.numpy())
        
        val_loss /= len(val_loader)
        val_preds = np.array(val_predictions) >= 0.5
        val_accuracy = roc_auc_score(val_targets, val_predictions)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_accuracy)
        history['val_acc'].append(val_accuracy)

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_preds = (val_outputs >= 0.5).numpy().flatten()
        accuracy = accuracy_score(y_val, val_preds)
        try:
            auc = roc_auc_score(y_val, val_outputs.numpy())
            rocs.append(auc)
            print(f'Итоговая точность: {accuracy:.4f}, AUC: {auc:.4f}')
        except:
            print(f'Итоговая точность: {accuracy:.4f}')
    print(np.mean(history['val_acc'][-10:]))
print("Итоговое качество:", np.mean(rocs))


Итоговая точность: 0.6786, AUC: 0.7335
0.7202793539938892
Итоговая точность: 0.7366, AUC: 0.7556
0.7611633109619687
Итоговая точность: 0.6652, AUC: 0.7199
0.7102059330490792
Итоговая точность: 0.6920, AUC: 0.6687
0.6651185682326621
Итоговая точность: 0.6875, AUC: 0.7365
0.7333603603603605
Итоговое качество: 0.7228509117135267


In [106]:
import optuna
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif

def objective(trial):
    k_features = trial.suggest_int('k_features', 9, 23)
    hidden_size = trial.suggest_int('hidden_size', 1000, 2000)
    num_layers = trial.suggest_int('num_layers', 1, 2)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    
    selector = SelectKBest(f_classif, k=k_features)
    X_train_s = selector.fit_transform(X_train, y_train)
    X_val_s = selector.transform(X_val)
    
    X_train_tensor = torch.FloatTensor(X_train_s)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val_s)
    y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    class BinaryClassifier(nn.Module):
        def __init__(self, input_size):
            super(BinaryClassifier, self).__init__()
                
            layers = []
            
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.LeakyReLU())
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(dropout_rate))
            
            for _ in range(num_layers - 1):
                layers.append(nn.Linear(hidden_size, hidden_size))
                layers.append(nn.LeakyReLU())
                layers.append(nn.BatchNorm1d(hidden_size))
                layers.append(nn.Dropout(dropout_rate))
            
            layers.append(nn.Linear(hidden_size, 1))
            layers.append(nn.Sigmoid())
            
            self.model = nn.Sequential(*layers)
            
        def forward(self, x):
            return self.model(x)
    
    input_size = X_train_tensor.shape[1]
    model = BinaryClassifier(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    num_epochs = 200
    history = {'val_acc': []}
    
    for epoch in range(num_epochs):
        model.train()
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                val_predictions.extend(outputs.numpy())
                val_targets.extend(targets.numpy())
        
        val_accuracy = roc_auc_score(val_targets, val_predictions)
        history['val_acc'].append(val_accuracy)
        
        if epoch % 10 == 0:
            trial.report(val_accuracy, epoch)
            
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
    
    return np.mean(history['val_acc'][-10:])

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

for key, value in study.best_params.items():
    print(f"{key}: {value}")
print(f"Лучшее значение: {study.best_value:.4f}")

try:
    optuna.visualization.plot_param_importances(study)
    optuna.visualization.plot_optimization_history(study)
except:
    pass




[I 2025-04-23 22:54:43,626] A new study created in memory with name: no-name-d393b062-d9b0-4661-ae43-d1d2f3467c81
[I 2025-04-23 22:55:51,131] Trial 0 finished with value: 0.6458873873873874 and parameters: {'k_features': 14, 'hidden_size': 1502, 'num_layers': 2, 'learning_rate': 0.00023570721124239344, 'dropout_rate': 0.48695894156691094, 'batch_size': 64}. Best is trial 0 with value: 0.6458873873873874.
[I 2025-04-23 22:58:01,791] Trial 1 finished with value: 0.6429144144144144 and parameters: {'k_features': 12, 'hidden_size': 1200, 'num_layers': 2, 'learning_rate': 0.00048714398163255735, 'dropout_rate': 0.2389065570879329, 'batch_size': 16}. Best is trial 0 with value: 0.6458873873873874.
[I 2025-04-23 22:58:08,981] Trial 2 finished with value: 0.7672792792792793 and parameters: {'k_features': 21, 'hidden_size': 1754, 'num_layers': 1, 'learning_rate': 0.00038403856426750887, 'dropout_rate': 0.2341018358710066, 'batch_size': 64}. Best is trial 2 with value: 0.7672792792792793.
[I 202

k_features: 18
hidden_size: 1274
num_layers: 1
learning_rate: 0.0024018101480537866
dropout_rate: 0.24942614281780365
batch_size: 16
Лучшее значение: 0.7720


In [109]:
import optuna
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif

def objective(trial):
    k_features = trial.suggest_int('k_features', 11, 23)
    hidden_size = trial.suggest_int('hidden_size', 1100, 2000)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    batch_size = 32
    
    selector = SelectKBest(f_classif, k=k_features)
    X_train_s = selector.fit_transform(X_train, y_train)
    X_val_s = selector.transform(X_val)
    
    X_train_tensor = torch.FloatTensor(X_train_s)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val_s)
    y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    class BinaryClassifier(nn.Module):
        def __init__(self, input_size):
            super(BinaryClassifier, self).__init__()
                
            layers = []
            
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.LeakyReLU())
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.Dropout(dropout_rate))
            
            layers.append(nn.Linear(hidden_size, 1))
            layers.append(nn.Sigmoid())
            
            self.model = nn.Sequential(*layers)
            
        def forward(self, x):
            return self.model(x)
    
    input_size = X_train_tensor.shape[1]
    model = BinaryClassifier(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    num_epochs = 200
    history = {'val_acc': []}
    
    for epoch in range(num_epochs):
        model.train()
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                val_predictions.extend(outputs.numpy())
                val_targets.extend(targets.numpy())
        
        val_accuracy = roc_auc_score(val_targets, val_predictions)
        history['val_acc'].append(val_accuracy)
        
        if epoch % 10 == 0:
            trial.report(val_accuracy, epoch)
            
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
    
    return np.mean(history['val_acc'][-10:])

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=40)

for key, value in study.best_params.items():
    print(f"{key}: {value}")
print(f"Лучшее значение: {study.best_value:.4f}")

try:
    optuna.visualization.plot_param_importances(study)
    optuna.visualization.plot_optimization_history(study)
except:
    pass


[I 2025-04-23 23:26:10,855] A new study created in memory with name: no-name-b849a6eb-99a0-43c1-abaf-94db691aeae5
[I 2025-04-23 23:26:22,580] Trial 0 finished with value: 0.7672882882882883 and parameters: {'k_features': 19, 'hidden_size': 1314, 'learning_rate': 0.0003221601053858557, 'dropout_rate': 0.3090234793811377}. Best is trial 0 with value: 0.7672882882882883.
[I 2025-04-23 23:26:34,164] Trial 1 finished with value: 0.7692792792792793 and parameters: {'k_features': 22, 'hidden_size': 1307, 'learning_rate': 0.004826099307910697, 'dropout_rate': 0.36683404558451804}. Best is trial 1 with value: 0.7692792792792793.
[I 2025-04-23 23:26:45,839] Trial 2 finished with value: 0.7627477477477477 and parameters: {'k_features': 17, 'hidden_size': 1455, 'learning_rate': 7.749544176680637e-05, 'dropout_rate': 0.4701738959878607}. Best is trial 1 with value: 0.7692792792792793.
[I 2025-04-23 23:26:58,072] Trial 3 finished with value: 0.7532612612612614 and parameters: {'k_features': 20, 'hid

k_features: 19
hidden_size: 1470
learning_rate: 0.0027892696743691014
dropout_rate: 0.2953557339848568
Лучшее значение: 0.7712


In [117]:
selector = SelectKBest(f_classif, k=19)
X_train_s = selector.fit_transform(X_train, y_train)
X_val_s = selector.transform(X_val)

X_train_tensor = torch.FloatTensor(X_train_s)
y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
X_val_tensor = torch.FloatTensor(X_val_s)
y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)

batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
hiden_size = 1470

class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hiden_size),
            nn.LeakyReLU(),
            nn.BatchNorm1d(hiden_size),
            nn.Dropout(0.2953557339848568),
            
            nn.Linear(hiden_size, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        res = self.model(x)
        return res

input_size = X_train_tensor.shape[1]  # 73 признака
model = BinaryClassifier(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0027892696743691014)

num_epochs = 390
history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_predictions = []
    train_targets = []
    
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_predictions.extend(outputs.detach().numpy())
        train_targets.extend(targets.numpy())
    
    train_loss /= len(train_loader)
    train_preds = np.array(train_predictions) >= 0.5
    train_accuracy = roc_auc_score(train_targets, train_predictions)
    
    model.eval()
    val_loss = 0
    val_predictions = []
    val_targets = []
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            val_loss += loss.item()
            val_predictions.extend(outputs.numpy())
            val_targets.extend(targets.numpy())
    
    val_loss /= len(val_loader)
    val_preds = np.array(val_predictions) >= 0.5
    val_accuracy = roc_auc_score(val_targets, val_predictions)
    
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_acc'].append(train_accuracy)
    history['val_acc'].append(val_accuracy)
    
    print(f'Эпоха {epoch+1}/{num_epochs}, Потери: {train_loss:.4f}, roc-auc: {train_accuracy:.4f}, '
          f'Вал. потери: {val_loss:.4f}, Вал. roc-auc: {val_accuracy:.4f}')

model.eval()
with torch.no_grad():
    val_outputs = model(X_val_tensor)
    val_preds = (val_outputs >= 0.5).numpy().flatten()
    accuracy = accuracy_score(y_val, val_preds)
    try:
        auc = roc_auc_score(y_val, val_outputs.numpy())
        print(f'Итоговая точность: {accuracy:.4f}, AUC: {auc:.4f}')
    except:
        print(f'Итоговая точность: {accuracy:.4f}')
print(np.mean(history['val_acc'][-10:]))


Эпоха 1/390, Потери: 0.6400, roc-auc: 0.5636, Вал. потери: 0.6371, Вал. roc-auc: 0.7521
Эпоха 2/390, Потери: 0.6116, roc-auc: 0.6836, Вал. потери: 0.6314, Вал. roc-auc: 0.7523
Эпоха 3/390, Потери: 0.5784, roc-auc: 0.7180, Вал. потери: 0.7753, Вал. roc-auc: 0.7535
Эпоха 4/390, Потери: 0.5665, roc-auc: 0.7273, Вал. потери: 6.6716, Вал. roc-auc: 0.7558
Эпоха 5/390, Потери: 0.5610, roc-auc: 0.7320, Вал. потери: 1.9125, Вал. roc-auc: 0.7577
Эпоха 6/390, Потери: 0.5640, roc-auc: 0.7299, Вал. потери: 0.5946, Вал. roc-auc: 0.7576
Эпоха 7/390, Потери: 0.5582, roc-auc: 0.7366, Вал. потери: 0.6857, Вал. roc-auc: 0.7589
Эпоха 8/390, Потери: 0.5571, roc-auc: 0.7379, Вал. потери: 1.1043, Вал. roc-auc: 0.7592
Эпоха 9/390, Потери: 0.5617, roc-auc: 0.7321, Вал. потери: 1.0248, Вал. roc-auc: 0.7607
Эпоха 10/390, Потери: 0.5548, roc-auc: 0.7420, Вал. потери: 2.5820, Вал. roc-auc: 0.7621
Эпоха 11/390, Потери: 0.5502, roc-auc: 0.7459, Вал. потери: 0.5411, Вал. roc-auc: 0.7622
Эпоха 12/390, Потери: 0.5680, 

In [118]:
rocs = []
for i in range(1, 6):
    all_train_pgs = pd.read_csv(f"./pgs_results_calculated/train_{i}_pgs.csv")
    validation_pgs = pd.read_csv(f"./pgs_results_calculated/test_{i}_pgs.csv")
    X_train = all_train_pgs.iloc[:, 2:-1].to_numpy()
    y_train = (all_train_pgs["y"] - 1).to_numpy()
    X_val = validation_pgs.iloc[:, 2:-1].to_numpy()
    y_val = (validation_pgs["y"] - 1).to_numpy()

    selector = SelectKBest(f_classif, k=19)
    X_train_s = selector.fit_transform(X_train, y_train)
    X_val_s = selector.transform(X_val)

    X_train_tensor = torch.FloatTensor(X_train_s)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val_s)
    y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)

    batch_size = 32
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    hiden_size = 1470

    class BinaryClassifier(nn.Module):
        def __init__(self, input_size):
            super(BinaryClassifier, self).__init__()
            self.model = nn.Sequential(
                nn.Linear(input_size, hiden_size),
                nn.LeakyReLU(),
                nn.BatchNorm1d(hiden_size),
                nn.Dropout(0.2953557339848568),
                
                nn.Linear(hiden_size, 1),
                nn.Sigmoid()
            )
            
        def forward(self, x):
            return self.model(x)

    input_size = X_train_tensor.shape[1]
    model = BinaryClassifier(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0027892696743691014)

    num_epochs = 390
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_predictions = []
        train_targets = []
        
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_predictions.extend(outputs.detach().numpy())
            train_targets.extend(targets.numpy())
        
        train_loss /= len(train_loader)
        train_preds = np.array(train_predictions) >= 0.5
        train_accuracy = roc_auc_score(train_targets, train_predictions)
        
        model.eval()
        val_loss = 0
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                val_predictions.extend(outputs.numpy())
                val_targets.extend(targets.numpy())
        
        val_loss /= len(val_loader)
        val_preds = np.array(val_predictions) >= 0.5
        val_accuracy = roc_auc_score(val_targets, val_predictions)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_accuracy)
        history['val_acc'].append(val_accuracy)

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_preds = (val_outputs >= 0.5).numpy().flatten()
        accuracy = accuracy_score(y_val, val_preds)
        auc = roc_auc_score(y_val, val_outputs.numpy())
        rocs.append(auc)
        print(f'ROC-AUC: {auc:.4f}')
print("Итоговое качество:", np.mean(rocs))


ROC-AUC: 0.7700
ROC-AUC: 0.7766
ROC-AUC: 0.7515
ROC-AUC: 0.7086
ROC-AUC: 0.7712
Итоговое качество: 0.7555879617836014
