## Классификация средствами PyTorch


In [33]:
import pandas as pd

In [34]:
data = pd.read_csv('../data/classification_preprocessed.csv')
data

Unnamed: 0,Airline_1,Airline_2,Airline_3,Airline_4,Flight,Time,Length,Delay
0,0.0,1.0,1.0,0.0,0.081669,0.895731,0.135878,1
1,1.0,0.0,0.0,0.0,0.087558,0.913226,0.464122,0
2,1.0,1.0,0.0,1.0,0.062596,0.528341,0.106870,0
3,1.0,1.0,0.0,1.0,0.079621,0.748775,0.181679,0
4,1.0,0.0,0.0,1.0,0.374296,0.593422,0.305344,0
...,...,...,...,...,...,...,...,...
173777,0.0,1.0,1.0,0.0,0.140553,0.832050,0.206107,0
173778,0.0,1.0,0.0,0.0,0.327701,0.864241,0.099237,1
173779,0.0,1.0,0.0,1.0,0.071301,0.538838,0.235115,0
173780,1.0,1.0,0.0,1.0,0.084229,0.412876,0.595420,1


In [35]:
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.utils.rnn import pad_sequence

In [91]:
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

X = data.drop('Delay', axis=1)
y = data['Delay']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


class_weights = compute_class_weight('balanced', classes= np.array([0, 1]), y=y)
weights = torch.tensor(class_weights, dtype=torch.float).to('cuda:0')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .3, random_state=42)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32, device='cuda:0')
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32, device='cuda:0')
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long, device='cuda:0')
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long, device='cuda:0')

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

test_ds = TensorDataset(X_test_tensor, y_test_tensor)
test_dl = DataLoader(test_ds, batch_size=64, shuffle=False)

In [95]:
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=7, out_features=2),
    torch.nn.Dropout(p=0.5)
)

In [96]:
model.to('cuda:0')

Sequential(
  (0): Linear(in_features=7, out_features=2, bias=True)
  (1): Dropout(p=0.5, inplace=False)
)

In [99]:
criterion = torch.nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005)

epochs = 25
for epoch in range(epochs):
    total_loss = 0
    for x, y in train_dl:
        y_pred = model(x)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

    print(f'Epoch: [{epoch+1}/{epochs}], Loss: {total_loss/len(train_dl)}')


Epoch: [1/25], Loss: 0.6820040695420194
Epoch: [2/25], Loss: 0.6822198803082445
Epoch: [3/25], Loss: 0.6813095647494835
Epoch: [4/25], Loss: 0.6817577913960051
Epoch: [5/25], Loss: 0.6818584642054093
Epoch: [6/25], Loss: 0.6819884159701677
Epoch: [7/25], Loss: 0.6816396380273497
Epoch: [8/25], Loss: 0.6819508130584248
Epoch: [9/25], Loss: 0.6816603400279572
Epoch: [10/25], Loss: 0.6814604272722007
Epoch: [11/25], Loss: 0.6816216077320454
Epoch: [12/25], Loss: 0.6816328329390315
Epoch: [13/25], Loss: 0.6822406398754631
Epoch: [14/25], Loss: 0.6818157302461632
Epoch: [15/25], Loss: 0.6819612802109676
Epoch: [16/25], Loss: 0.6816439197478578
Epoch: [17/25], Loss: 0.6817653640077843
Epoch: [18/25], Loss: 0.6813560522022779
Epoch: [19/25], Loss: 0.6815985583556695
Epoch: [20/25], Loss: 0.681811615832538
Epoch: [21/25], Loss: 0.6819825135112624
Epoch: [22/25], Loss: 0.6817651189420801
Epoch: [23/25], Loss: 0.6816863668460335
Epoch: [24/25], Loss: 0.6816697964412423
Epoch: [25/25], Loss: 0.68