In [1]:
!pip install torch==2.7.1 tqdm==4.66.4 scikit-learn==1.4.2 pandas numpy

Defaulting to user installation because normal site-packages is not writeable


In [35]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import (
    StandardScaler,
    MinMaxScaler,
    LabelEncoder,
    OneHotEncoder,
    OrdinalEncoder,
)

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import accuracy_score, classification_report

In [36]:
df = pd.read_csv('data/other/stroke_data.csv')
df.sample(10)

Unnamed: 0.1,Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
3805,3805,5500,Female,50.0,0.0,1.0,Yes,Govt_job,Urban,68.09,35.5,smokes,0
190,190,24905,Female,65.0,0.0,0.0,Yes,Private,Urban,205.77,46.0,,1
1952,1952,48364,Male,52.0,0.0,0.0,Yes,Govt_job,Urban,223.35,27.3,formerly smoked,0
2173,2173,38143,Female,67.0,1.0,0.0,Yes,Private,Urban,90.01,34.4,smokes,0
4718,4718,22477,Male,41.0,,0.0,Yes,Private,Rural,79.66,25.1,Unknown,0
707,707,72882,Male,47.0,,0.0,Yes,Private,Rural,75.3,25.0,formerly smoked,0
3873,3873,72041,Male,23.0,0.0,0.0,No,Private,Urban,82.53,20.7,smokes,0
4222,4222,27416,Female,34.0,0.0,0.0,Yes,Private,Rural,86.92,22.0,Unknown,0
4216,4216,41263,Female,16.0,0.0,0.0,No,Private,Urban,75.06,23.5,never smoked,0
2651,2651,3746,Female,66.0,0.0,0.0,Yes,Private,Urban,76.83,26.0,never smoked,0


In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5110 entries, 0 to 5109
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         5110 non-null   int64  
 1   id                 5110 non-null   int64  
 2   gender             5110 non-null   object 
 3   age                5110 non-null   float64
 4   hypertension       4380 non-null   float64
 5   heart_disease      4716 non-null   float64
 6   ever_married       5110 non-null   object 
 7   work_type          5110 non-null   object 
 8   Residence_type     5110 non-null   object 
 9   avg_glucose_level  5110 non-null   float64
 10  bmi                4909 non-null   float64
 11  smoking_status     4599 non-null   object 
 12  stroke             5110 non-null   int64  
dtypes: float64(5), int64(3), object(5)
memory usage: 519.1+ KB


In [38]:
df["stroke"].value_counts()

stroke
0    4861
1     249
Name: count, dtype: int64

In [39]:
df["stroke"].value_counts(normalize=True)

stroke
0    0.951272
1    0.048728
Name: proportion, dtype: float64

In [40]:
df.drop(columns=['Unnamed: 0', 'id'], inplace=True)
df.sample()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
481,Female,53.0,0.0,,Yes,Self-employed,Rural,96.88,31.4,Unknown,0


In [41]:
num_cols = df.select_dtypes(include=["int64", "float64"]).columns.tolist()
num_cols

['age', 'hypertension', 'heart_disease', 'avg_glucose_level', 'bmi', 'stroke']

In [42]:
cat_cols = df.select_dtypes(include=["object"]).columns.tolist()
cat_cols

['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']

In [43]:
df.isna().sum()

gender                 0
age                    0
hypertension         730
heart_disease        394
ever_married           0
work_type              0
Residence_type         0
avg_glucose_level      0
bmi                  201
smoking_status       511
stroke                 0
dtype: int64

In [44]:
label_encoder = LabelEncoder() #–û–≤–∞ —Å–æ–∑–¥–∞–≤–∞ –æ–±—ò–µ–∫—Ç –æ–¥ –∫–ª–∞—Å–∞—Ç–∞ LabelEncoder –∫–æ—ò —Å–µ –∫–æ—Ä–∏—Å—Ç–∏ –∑–∞ –ø—Ä–µ—Ç–≤–æ—Ä–∞—ö–µ –Ω–∞ –∫–∞—Ç–µ–≥–æ—Ä–∏—Å–∫–∏ –ø–æ–¥–∞—Ç–æ—Ü–∏ (—Ç–µ–∫—Å—Ç) –≤–æ –±—Ä–æ–µ–≤–∏.


In [45]:
df['gender'] = label_encoder.fit_transform(df['gender'])
df['ever_married'] = label_encoder.fit_transform(df['ever_married'])
df['work_type'] = label_encoder.fit_transform(df['work_type'])
df['Residence_type'] = label_encoder.fit_transform(df['Residence_type'])

In [46]:
df['smoking_status'] = label_encoder.fit_transform(df['smoking_status'])
label_encoder.classes_

array(['Unknown', 'formerly smoked', 'never smoked', 'smokes', nan],
      dtype=object)

In [47]:
df.loc[df['smoking_status'] == 4, 'smoking_status'] = np.nan
#–û–≤–æ—ò –∫–æ–¥ –≥–∏ –∑–∞–º–µ–Ω–∏ —Å–∏—Ç–µ –≤—Ä–µ–¥–Ω–æ—Å—Ç–∏ 4 –≤–æ –∫–æ–ª–æ–Ω–∞—Ç–∞ smoking_status —Å–æ NaN (–Ω–µ–¥–æ—Å—Ç–∞—Å—É–≤–∞ –≤—Ä–µ–¥–Ω–æ—Å—Ç).
#–ù–∞—ò–≤–µ—Ä–æ—ò–∞—Ç–Ω–æ 4 –ø—Ä–µ—Ç—Å—Ç–∞–≤—É–≤–∞ "unknown" –∏–ª–∏ "–Ω–µ–≤–∞–∂–µ—á–∫–∞ –≤—Ä–µ–¥–Ω–æ—Å—Ç" –≤–æ –∫–æ–ª–æ–Ω–∞—Ç–∞ –∑–∞ –ø—É—à–µ—ö–µ.


In [48]:
df.sample(3)

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
2918,0,63.0,0.0,1.0,1,3,0,239.95,32.2,3.0,0
140,1,82.0,,1.0,1,0,0,103.68,25.0,,1
1392,0,2.0,0.0,0.0,0,4,0,109.56,16.4,0.0,0


In [49]:
median_imputer = SimpleImputer(strategy="median")
#–ê–ª–∞—Ç–∫–∞ –æ–¥ scikit-learn –∫–æ—ò–∞ –∞–≤—Ç–æ–º–∞—Ç—Å–∫–∏ –≥–∏ –ø–æ–ø–æ–ª–Ω—É–≤–∞ NaN (–Ω–µ–¥–æ—Å—Ç–∞—Å—É–≤–∞—á–∫–∏) –≤—Ä–µ–¥–Ω–æ—Å—Ç–∏ —Å–æ –∏–∑–±—Ä–∞–Ω–∞ —Å—Ç—Ä–∞—Ç–µ–≥–∏—ò–∞.


In [50]:
df[['bmi']] = median_imputer.fit_transform(df[['bmi']])
#–û–≤–æ—ò –∫–æ–¥ –≥–∏ –ø–æ–ø–æ–ª–Ω—É–≤–∞ –Ω–µ–¥–æ—Å—Ç–∞—Å—É–≤–∞—á–∫–∏—Ç–µ –≤—Ä–µ–¥–Ω–æ—Å—Ç–∏ (NaN) –≤–æ –∫–æ–ª–æ–Ω–∞—Ç–∞ bmi —Å–æ –º–µ–¥–∏—ò–∞–Ω–∞—Ç–∞ –Ω–∞ —Ç–∞–∞ –∫–æ–ª–æ–Ω–∞.
#–ó–æ—à—Ç–æ? BMI –º–æ–∂–µ –¥–∞ –∏–º–∞ outliers (–µ–∫—Å—Ç—Ä–µ–º–Ω–∏ –≤—Ä–µ–¥–Ω–æ—Å—Ç–∏), –ø–∞ –º–µ–¥–∏—ò–∞–Ω–∞—Ç–∞ –µ –ø–æ—Ä–æ–±—É—Å–Ω–∞.

In [51]:
knn_imputer = KNNImputer(n_neighbors=31)
#–û–≤–∞ —Å–æ–∑–¥–∞–≤–∞ KNN Imputer –æ–±—ò–µ–∫—Ç –∫–æ—ò –≥–∏ –ø–æ–ø–æ–ª–Ω—É–≤–∞ –Ω–µ–¥–æ—Å—Ç–∞—Å—É–≤–∞—á–∫–∏—Ç–µ –≤—Ä–µ–¥–Ω–æ—Å—Ç–∏ –∫–æ—Ä–∏—Å—Ç–µ—ò—ú–∏ K-Nearest Neighbors (K-–Ω–∞—ò–±–ª–∏—Å–∫–∏ —Å–æ—Å–µ–¥–∏) –∞–ª–≥–æ—Ä–∏—Ç–∞–º —Å–æ 31 —Å–æ—Å–µ–¥.

In [52]:
df['smoking_status'] = knn_imputer.fit_transform(df[['smoking_status']])
df['heart_disease'] = knn_imputer.fit_transform(df[['heart_disease']])
df['hypertension'] = knn_imputer.fit_transform(df[['hypertension']])

In [53]:
df.sample(3)

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
3500,1,44.0,0.097945,0.0,1,3,0,188.13,44.7,1.381605,0
1438,1,47.0,0.0,0.0,1,2,0,93.55,31.4,2.0,0
973,0,49.0,0.097945,0.0,0,2,0,65.81,32.3,0.0,0


In [54]:
df.isna().sum()

gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

In [55]:
data_stroke = df.loc[df['stroke'] == 1]    # –ó–µ–º–∞ stroke —Å–ª—É—á–∞i
#–ò–º–∞—à –æ–≥—Ä–æ–º–µ–Ω –¥–∏—Å–±–∞–ª–∞–Ω—Å - –º–Ω–æ–≥—É –ø–æ–≤–µ—ú–µ –ª—É—ì–µ –±–µ–∑ stroke –æ—Ç–∫–æ–ª–∫—É —Å–æ stroke!

data_stroke.info()

<class 'pandas.core.frame.DataFrame'>
Index: 249 entries, 0 to 248
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             249 non-null    int64  
 1   age                249 non-null    float64
 2   hypertension       249 non-null    float64
 3   heart_disease      249 non-null    float64
 4   ever_married       249 non-null    int64  
 5   work_type          249 non-null    int64  
 6   Residence_type     249 non-null    int64  
 7   avg_glucose_level  249 non-null    float64
 8   bmi                249 non-null    float64
 9   smoking_status     249 non-null    float64
 10  stroke             249 non-null    int64  
dtypes: float64(6), int64(5)
memory usage: 23.3 KB


In [56]:
data_no_stroke = df.loc[df['stroke'] == 0]
data_no_stroke.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4861 entries, 249 to 5109
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             4861 non-null   int64  
 1   age                4861 non-null   float64
 2   hypertension       4861 non-null   float64
 3   heart_disease      4861 non-null   float64
 4   ever_married       4861 non-null   int64  
 5   work_type          4861 non-null   int64  
 6   Residence_type     4861 non-null   int64  
 7   avg_glucose_level  4861 non-null   float64
 8   bmi                4861 non-null   float64
 9   smoking_status     4861 non-null   float64
 10  stroke             4861 non-null   int64  
dtypes: float64(6), int64(5)
memory usage: 455.7 KB


In [57]:
data_no_stroke = data_no_stroke.sample(350)
data_no_stroke.info() 

<class 'pandas.core.frame.DataFrame'>
Index: 350 entries, 3533 to 3316
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             350 non-null    int64  
 1   age                350 non-null    float64
 2   hypertension       350 non-null    float64
 3   heart_disease      350 non-null    float64
 4   ever_married       350 non-null    int64  
 5   work_type          350 non-null    int64  
 6   Residence_type     350 non-null    int64  
 7   avg_glucose_level  350 non-null    float64
 8   bmi                350 non-null    float64
 9   smoking_status     350 non-null    float64
 10  stroke             350 non-null    int64  
dtypes: float64(6), int64(5)
memory usage: 32.8 KB


In [58]:
data = pd.concat([data_no_stroke, data_stroke])  #–ì–∏ —Å–ø–æ—ò—É–≤–∞ –¥–≤–∞—Ç–∞ DataFrame-–∞ –≤–µ—Ä—Ç–∏–∫–∞–ª–Ω–æ (–µ–¥–µ–Ω –ø–æ–¥ –¥—Ä—É–≥).
data = data.sample(frac=1)
data

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
249,1,3.0,0.000000,0.000000,0,4,0,95.12,18.0,0.000000,0
216,1,78.0,0.000000,0.000000,1,3,1,116.10,27.1,2.000000,1
224,1,49.0,0.097945,0.000000,0,2,0,104.86,31.9,3.000000,1
110,1,79.0,0.000000,1.000000,1,2,0,129.98,22.6,1.381605,1
17,1,75.0,1.000000,0.000000,1,2,1,221.29,25.8,3.000000,1
...,...,...,...,...,...,...,...,...,...,...,...
1671,0,65.0,0.000000,0.000000,1,2,1,84.75,21.4,0.000000,0
246,0,75.0,0.000000,0.000000,1,3,0,78.80,29.3,1.000000,1
2945,0,61.0,0.000000,0.000000,1,3,1,77.06,27.0,2.000000,0
416,0,3.0,0.000000,0.054283,0,4,1,73.20,16.8,0.000000,0


In [87]:
data.loc[data['stroke']==0]

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
249,1,3.0,0.0,0.000000,0,4,0,95.12,18.0,0.0,0
572,1,54.0,0.0,0.054283,1,2,1,81.26,26.4,1.0,0
3184,0,36.0,0.0,0.000000,1,2,0,74.14,31.2,1.0,0
4595,1,40.0,0.0,0.000000,1,0,0,72.84,26.1,0.0,0
2593,1,5.0,0.0,0.000000,0,4,0,74.79,19.4,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...
3613,0,76.0,1.0,0.000000,1,3,1,78.70,27.6,1.0,0
2508,1,62.0,0.0,0.000000,1,2,0,59.61,32.5,0.0,0
1671,0,65.0,0.0,0.000000,1,2,1,84.75,21.4,0.0,0
2945,0,61.0,0.0,0.000000,1,3,1,77.06,27.0,2.0,0


### Train/Validation Split and Scaling

We now:

1. Separate **X** (features) and **y** (target).
2. Split into training and validation sets using `train_test_split` (80% train, 20% validation). 
3. Use `StandardScaler` to scale features so that each numeric column has zero mean and unit variance.

Why scaling?
- Neural networks train more efficiently when inputs are on a similar scale.
- Gradient descent steps become more stable and less sensitive to the magnitude of features.


In [78]:
X = data.drop(columns=['stroke']).values.astype('float32') 
y = data['stroke'].values.astype('float32') 

X.shape, y.shape

((599, 10), (599,))

In [60]:
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2)

X_train.shape, X_val.shape

((479, 10), (120, 10))

In [61]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train[:3]


#–ó–æ—à—Ç–æ –æ–≤–∞ –µ –ø–æ—Ç—Ä–µ–±–Ω–æ?–ù–µ–≤—Ä–æ–Ω—Å–∫–∏ –º—Ä–µ–∂–∏, KNN, SVM

array([[-0.925521  ,  0.6023508 , -0.44439068, -0.14673108,  0.6393931 ,
        -0.14227016, -1.0448459 , -0.28776953, -0.16966811, -0.44710907],
       [-0.925521  , -1.3039874 , -0.44439068, -0.33538178, -1.5639831 ,
        -2.1175573 ,  0.9570789 , -0.09621874,  0.9944056 , -0.06457657],
       [-0.925521  , -1.4772909 , -0.15907274, -0.33538178, -1.5639831 ,
        -2.1175573 , -1.0448459 , -0.01837307, -0.82529604,  0.5553223 ]],
      dtype=float32)

### PyTorch Dataset and DataLoader

To train neural networks efficiently, we use **mini-batches** instead of feeding all data at once.

Concepts:

- **Dataset**: an object that knows how to return a single sample (`(X[i], y[i])`).
- **DataLoader**: wraps a Dataset and:
  - creates batches,
  - shuffles the data for training,
  - can use multiple workers to load data in parallel.

We define a custom `StrokeDataset` that converts NumPy arrays into PyTorch tensors.


In [62]:
class StrokeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_ds = StrokeDataset(X_train, y_train)
val_ds = StrokeDataset(X_val, y_val)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)

len(train_ds), len(val_ds)

(479, 120)

In [None]:
"""
‚ö†Ô∏è –ú–ù–û–ì–£ –ß–ï–°–¢–ê –ì–†–ï–®–ö–ê (–≤–∞–∂–Ω–æ –∑–∞ —Ç–µ–±–µ)
–ê–∫–æ –∫–æ—Ä–∏—Å—Ç–∏—à:
criterion = nn.BCEWithLogitsLoss()
—Ç–æ–≥–∞—à:
y –º–æ—Ä–∞ –¥–∞ –±–∏–¥–µ float
shape –æ–±–∏—á–Ω–æ: (batch_size, 1)

–ê–∫–æ –∫–æ—Ä–∏—Å—Ç–∏—à:
nn.CrossEntropyLoss()
—Ç–æ–≥–∞—à:
y –º–æ—Ä–∞ –¥–∞ –±–∏–¥–µ long
shape: (batch_size,)
"""

### Building a Neural Network in PyTorch

A **feed-forward neural network** (Multi-Layer Perceptron) is composed of layers of **neurons**:

- Each neuron computes:  
  \[ z = w \cdot x + b \]  
  where:
  - \( x \) is the input vector,
  - \( w \) is a vector of weights,
  - \( b \) is a bias term.


- Then we apply a non-linear **activation function**, such as **ReLU** (Rectified Linear Unit):  
  \[ \text{ReLU}(z) = \max(0, z) \]

Stacking multiple layers allows the network to learn complex, non-linear relationships between input features and the target.

### Our architecture

- Input layer: size = number of features
- Hidden layer 1: 64 neurons + ReLU
- Hidden layer 2: 32 neurons + ReLU
- Output layer: 1 neuron producing a **logit** (a raw score before applying sigmoid)

We will use loss which applies **sigmoid** internally to convert the logit to a probability in \([0, 1]\) and then computes **binary cross-entropy** loss.


In [63]:
class StrokeNet(nn.Module):
    def __init__(self, in_features: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),  # one logit
        )

    def forward(self, x):
        # Output shape: (batch_size,)
        return self.net(x).squeeze(1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = StrokeNet(in_features=X_train.shape[1]).to(device)
model

Using device: cpu


StrokeNet(
  (net): Sequential(
    (0): Linear(in_features=10, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

### Loss Function and Optimizer

**Loss function**: measures how far the model's predictions are from the true labels.

We use:
- `nn.BCEWithLogitsLoss(pos_weight=...)`  
  - Combines a **sigmoid** activation and **binary cross-entropy** in a numerically stable way.
  - `pos_weight` tells the loss to **penalize errors on positive examples more**, to counter class imbalance.

**Optimizer**: algorithm that updates the model's weights to minimize the loss.

We use:
- `torch.optim.Adam`  
  - An adaptive gradient-based optimizer (a popular variant of stochastic gradient descent).
  - Takes learning rate `lr` as a key hyperparameter (we use `1e-3`).

In [88]:
pos_weight_value = 250/350      #pos_weight = neg_count / pos_count        --0/1
pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float32).to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
"""
3Ô∏è‚É£ criterion = nn.BCEWithLogitsLoss(pos_weight=...)
üëâ –®—Ç–æ –ø—Ä–∞–≤–∏ –æ–≤–∞ —Ä–µ–∞–ª–Ω–æ:
–∞–∫–æ –º–æ–¥–µ–ª–æ—Ç —É—Ç–Ω–µ stroke (1)
‚Üí –¥–æ–±–∏–≤–∞ –ø–æ–≥–æ–ª–µ–º–∞ –∫–∞–∑–Ω–∞
–∞–∫–æ —É—Ç–Ω–µ 0
‚Üí –ø–æ–º–∞–ª–∞ –∫–∞–∑–Ω–∞

–°–æ –¥—Ä—É–≥–∏ –∑–±–æ—Ä–æ–≤–∏:
‚Äû–ü–æ–¥–æ–±—Ä–æ –µ –¥–∞ —É—Ç–Ω–µ—à –∑–¥—Ä–∞–≤, –æ—Ç–∫–æ–ª–∫—É –¥–∞ —É—Ç–Ω–µ—à stroke –ø–∞—Ü–∏–µ–Ω—Ç‚Äú
"""

### Training loop steps for each epoch:

For each batch:
1. Move data to device (CPU or GPU).
2. Call `model(X_batch)` to get predictions (logits).
3. Compute loss by comparing predictions to true labels.
4. Call `loss.backward()` to compute gradients of loss w.r.t. each parameter.
5. Call `optimizer.step()` to update the parameters in the opposite direction of the gradient.
6. Call `optimizer.zero_grad()` before the next batch to reset gradients.

We repeat this for several **epochs** ‚Äì full passes over the training data.

### Evaluation

To evaluate on the validation set:
1. Set `model.eval()` to switch off training-specific behavior (like dropout, if used).
2. Disable gradient computation with `torch.no_grad()` for efficiency.
3. Apply `torch.sigmoid` to logits to get probabilities.
4. Threshold at 0.5 to get class predictions (0 or 1).
5. Use `accuracy_score` and `classification_report` to summarize performance.


In [89]:
def train_one_epoch(epoch_idx: int):
    model.train()
    losses = []

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch_idx+1}", leave=False)

    for X_batch, y_batch in progress_bar:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()              #–ì–∏ –±—Ä–∏—à–µ—à —Å—Ç–∞—Ä–∏—Ç–µ –≥—Ä–µ—à–∫–∏
        logits = model(X_batch)
        loss = criterion(logits, y_batch)

        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    return float(np.mean(losses))


def evaluate():
    model.eval()
    all_probs = []
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            logits = model(X_batch)

            probs = torch.sigmoid(logits).cpu().numpy()
            preds = (probs >= 0.5).astype(int)

            all_probs.append(probs)
            all_preds.append(preds)
            all_targets.append(y_batch.numpy())

    all_probs = np.concatenate(all_probs).reshape(-1)
    all_preds = np.concatenate(all_preds).reshape(-1)
    all_targets = np.concatenate(all_targets).reshape(-1)

    acc = accuracy_score(all_targets, all_preds)
    return acc, all_targets, all_preds

In [None]:
"""
from sklearn.metrics import f1_score

f1 = f1_score(all_targets, all_preds)
print("F1 score:", f1)

"""

### Run Training

We now train the model for a fixed number of epochs (e.g., 20).

During training, we print:

- **Training loss**: should generally go down over time.
- **Validation accuracy**: helps us see how well the model generalizes to unseen data.

Note: Because of class imbalance, accuracy can be misleading; we will inspect precision/recall later.


In [90]:
EPOCHS = 20

for epoch in range(EPOCHS):
    train_loss = train_one_epoch(epoch)
    val_acc, _, _ = evaluate()
    print(
        f"Epoch {epoch+1:02d}/{EPOCHS} | "
        f"train_loss = {train_loss:.4f} | val_acc = {val_acc:.4f}"
    )

print("Training finished!")

                                                                                

Epoch 01/20 | train_loss = 2.2474 | val_acc = 0.4083


                                                                                

Epoch 02/20 | train_loss = 1.8645 | val_acc = 0.4083


                                                                                

Epoch 03/20 | train_loss = 1.5393 | val_acc = 0.4083


                                                                                

Epoch 04/20 | train_loss = 1.2586 | val_acc = 0.4083


                                                                                

Epoch 05/20 | train_loss = 1.0228 | val_acc = 0.4083


                                                                                

Epoch 06/20 | train_loss = 0.8004 | val_acc = 0.4083


                                                                                

Epoch 07/20 | train_loss = 0.6389 | val_acc = 0.5833


                                                                                

Epoch 08/20 | train_loss = 0.5700 | val_acc = 0.6417


                                                                                

Epoch 09/20 | train_loss = 0.5352 | val_acc = 0.5750


                                                                                

Epoch 10/20 | train_loss = 0.5173 | val_acc = 0.6000


                                                                                

Epoch 11/20 | train_loss = 0.4922 | val_acc = 0.6417


                                                                                

Epoch 12/20 | train_loss = 0.4702 | val_acc = 0.7250


                                                                                

Epoch 13/20 | train_loss = 0.4539 | val_acc = 0.7250


                                                                                

Epoch 14/20 | train_loss = 0.4296 | val_acc = 0.7250


                                                                                

Epoch 15/20 | train_loss = 0.4172 | val_acc = 0.7417


                                                                                

Epoch 16/20 | train_loss = 0.4087 | val_acc = 0.7333


                                                                                

Epoch 17/20 | train_loss = 0.3920 | val_acc = 0.7333


                                                                                

Epoch 18/20 | train_loss = 0.3897 | val_acc = 0.7250


                                                                                

Epoch 19/20 | train_loss = 0.3808 | val_acc = 0.7250


                                                                                

Epoch 20/20 | train_loss = 0.3732 | val_acc = 0.7333
Training finished!


In [91]:
val_acc, y_true, y_pred = evaluate()
print("Validation accuracy:", val_acc)
print()
print("Classification report:")
print(classification_report(y_true, y_pred, digits=3))

Validation accuracy: 0.7333333333333333

Classification report:
              precision    recall  f1-score   support

         0.0      0.760     0.803     0.781        71
         1.0      0.689     0.633     0.660        49

    accuracy                          0.733       120
   macro avg      0.724     0.718     0.720       120
weighted avg      0.731     0.733     0.731       120

