In [120]:
%load_ext pycodestyle_magic

In [121]:
%pycodestyle_on

In [122]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [123]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [124]:
print(train_data.shape)
print(test_data.shape)
train_data.head(5)
# test_data.head(5)
ids = test_data['id'].values
print(type(ids))

In [125]:
# data preprocessing
X_pre = train_data.drop(['failure', 'id'],
                        axis=1).to_numpy()
test_pre = test_data.drop('id', axis=1).to_numpy()
Y = train_data['failure'].values


def preprossing(X_pre, test_pre):
    enc = OrdinalEncoder()
    enc.fit(X_pre)
    X_pre = enc.transform(X_pre)
    test_pre = enc.fit_transform(test_pre)

    imp_mean = SimpleImputer(missing_values=np.nan,
                             strategy='median')
    imp_mean.fit(X_pre)
    X_pre = imp_mean.transform(X_pre)
    test_pre = imp_mean.transform(test_pre)

    pca = PCA(n_components=2)
    X_pre = pca.fit_transform(X_pre)
    test_pre = pca.transform(test_pre)
    scaler = MinMaxScaler()
    X_pre = scaler.fit_transform(X_pre)
    test_pre = scaler.transform(test_pre)

    return X_pre, test_pre


# split training and validation data
X, test = preprossing(X_pre, test_pre)
train_X, val_X, train_y, val_y = train_test_split(X,
                                                  Y,
                                                  test_size=0.1,
                                                  shuffle=True)

val_y = val_y.reshape(val_y.shape[0], 1)
train_y = train_y.reshape(train_y.shape[0], 1)

In [126]:
print(type(test))

In [127]:
print(val_X)
print(train_X)

In [128]:
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch
from torch import optim
import torch.nn.functional as F

In [129]:
BATCH_SIZE = 100
device = "cuda" if torch.cuda.is_available() else "cpu"

In [130]:
train_ds = TensorDataset(torch.FloatTensor(train_X),
                         torch.from_numpy(train_y))
train_dl = DataLoader(train_ds, shuffle=True,
                      batch_size=BATCH_SIZE,
                      drop_last=True)

val_ds = TensorDataset(torch.FloatTensor(val_X),
                       torch.from_numpy(val_y))
val_dl = DataLoader(val_ds, shuffle=True,
                    batch_size=BATCH_SIZE,
                    drop_last=True)

In [131]:
test_ds = TensorDataset(torch.FloatTensor(test))
test_dl = DataLoader(test_ds,
                     shuffle=False,
                     batch_size=BATCH_SIZE)
print(test_dl)

In [132]:

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=3),
            nn.BatchNorm1d(32),
            nn.Conv1d(32, 32, kernel_size=3),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(32, 48, kernel_size=3),
            nn.BatchNorm1d(48),
            nn.Conv1d(48, 64, kernel_size=5),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )

        self.layer_1 = nn.Linear(128, 64)
        self.layer_2 = nn.Linear(64, 16)
        self.layer_out = nn.Linear(16, 1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.sigmoid = nn.Sigmoid()
        self.input = nn.Linear(2, 24)

    def forward(self, x):
        batch, len = x.shape
        x = x.view(batch, 1, len)
        x = F.relu(self.input(x))
        x = self.conv1(x)
        x = self.dropout(x)

        x = self.conv2(x)
        x = self.dropout(x)

        x = x.view(batch, -1)
        x = F.relu(self.layer_1(x))
        x = self.dropout(x)

        x = F.relu(self.layer_2(x))
        x = self.layer_out(x)
        x = self.sigmoid(x)
        # print(x.size())
        return x

In [133]:

model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
loss_fn = nn.BCELoss()

best_acc = 0
min_loss = 10000000
for epoch in range(10):
    print(f"Epoch [{epoch}]")
    model.train()
    for attr, label in train_dl:
        attr = attr.to(device)
        label = label.to(device, dtype=torch.float)
        # label = torch.cuda.LongTensor(label)

        pred = model(attr)
        # print(pred)
        # print(label)
        loss = loss_fn(pred, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    sample_count = 0
    correct_count = 0
    model.eval()
    epoch_loss = 0

    for attr, label in val_dl:
        attr = attr.to(device)
        label = label.to(device, dtype=torch.float)

        pred = model(attr)
        # print(pred.size())
        # print(ohlabel.size())
        loss = loss_fn(pred, label)
        pred = torch.round(pred)
        # print(loss)
        # print(pred)
        sample_count += len(attr)
        correct_count += torch.eq(pred, label).int().sum()
        epoch_loss += loss.item()

    val_acc = correct_count / sample_count
    print("accuracy:", val_acc, " val loss: ", epoch_loss)
    if epoch_loss <= min_loss:
        min_loss = epoch_loss
        PATH = f"model.pt"
        torch.save({
                'epoch': epoch+1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, PATH)

In [134]:
import csv

In [135]:
# load model
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
loss_fn = nn.CrossEntropyLoss()
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
# print(epoch)
model.eval()
predict = []
for attr in test_dl:
    # print(type(attr[0]))
    att = attr[0].to(device)

    pred = model(att)
    # pred = torch.gt(pred,0).int()
    # pred = torch.mul(pred, 0.5)
    # pred = torch.add(pred, 0.5)
    print(pred)
    for i in pred:
        predict.append(i.item())

In [136]:
import csv
with open('submission.csv', 'w', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "failure"])
    count = 0
    for id, p in zip(ids, predict):
        csv_writer.writerow([id, p])