In [1]:
import csv
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
PATH = "model3.pt" # load model

In [3]:
# data preprocessing
ids = test_data['id'].values
X_pre = train_data.drop(['failure', 'id'],
                        axis=1).to_numpy()
test_pre = test_data.drop('id', axis=1).to_numpy()
Y = train_data['failure'].values


def preprossing(X_pre, test_pre):
    enc = OrdinalEncoder()
    enc.fit(X_pre)
    X_pre = enc.transform(X_pre)
    test_pre = enc.fit_transform(test_pre)

    imp_mean = SimpleImputer(missing_values=np.nan,
                             strategy='median')
    imp_mean.fit(X_pre)
    X_pre = imp_mean.transform(X_pre)
    test_pre = imp_mean.transform(test_pre)

    pca = PCA(n_components=2)
    X_pre = pca.fit_transform(X_pre)
    test_pre = pca.transform(test_pre)
    scaler = MinMaxScaler()
    X_pre = scaler.fit_transform(X_pre)
    test_pre = scaler.transform(test_pre)

    return X_pre, test_pre


# split training and validation data
X, test = preprossing(X_pre, test_pre)
train_X, val_X, train_y, val_y = train_test_split(X,
                                                  Y,
                                                  test_size=0.1,
                                                  shuffle=True)

val_y = val_y.reshape(val_y.shape[0], 1)
train_y = train_y.reshape(train_y.shape[0], 1)

In [4]:
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch
from torch import optim
import torch.nn.functional as F

In [5]:
BATCH_SIZE = 100
device = "cuda" if torch.cuda.is_available() else "cpu"

In [6]:
test_ds = TensorDataset(torch.FloatTensor(test))
test_dl = DataLoader(test_ds,
                     shuffle=False,
                     batch_size=BATCH_SIZE)
print(test_dl)

<torch.utils.data.dataloader.DataLoader object at 0x000002BC7BEC02C8>


In [7]:

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=3),
            nn.BatchNorm1d(32),
            nn.Conv1d(32, 32, kernel_size=3),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(32, 48, kernel_size=3),
            nn.BatchNorm1d(48),
            nn.Conv1d(48, 64, kernel_size=5),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )

        self.layer_1 = nn.Linear(128, 64)
        self.layer_2 = nn.Linear(64, 16)
        self.layer_out = nn.Linear(16, 1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.sigmoid = nn.Sigmoid()
        self.input = nn.Linear(2, 24)

    def forward(self, x):
        batch, len = x.shape
        x = x.view(batch, 1, len)
        x = F.relu(self.input(x))
        x = self.conv1(x)
        x = self.dropout(x)

        x = self.conv2(x)
        x = self.dropout(x)

        x = x.view(batch, -1)
        x = F.relu(self.layer_1(x))
        x = self.dropout(x)

        x = F.relu(self.layer_2(x))
        x = self.layer_out(x)
        x = self.sigmoid(x)
        # print(x.size())
        return x

In [8]:
# load model

model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
loss_fn = nn.CrossEntropyLoss()
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
# print(epoch)
model.eval()
predict = []
for attr in test_dl:
    # print(type(attr[0]))
    att = attr[0].to(device)

    pred = model(att)
    # pred = torch.gt(pred,0).int()
    # pred = torch.mul(pred, 0.5)
    # pred = torch.add(pred, 0.5)
    print(pred)
    for i in pred:
        predict.append(i.item())

tensor([[0.1928],
        [0.1805],
        [0.1830],
        [0.1812],
        [0.3127],
        [0.1659],
        [0.1573],
        [0.2444],
        [0.1448],
        [0.1625],
        [0.1520],
        [0.1907],
        [0.1925],
        [0.1802],
        [0.2236],
        [0.1653],
        [0.1741],
        [0.1628],
        [0.2306],
        [0.1755],
        [0.1957],
        [0.2455],
        [0.2334],
        [0.1811],
        [0.1648],
        [0.2587],
        [0.2467],
        [0.1810],
        [0.2856],
        [0.1853],
        [0.1883],
        [0.2396],
        [0.3064],
        [0.2227],
        [0.1608],
        [0.1622],
        [0.1481],
        [0.1866],
        [0.1562],
        [0.2424],
        [0.1689],
        [0.1622],
        [0.1423],
        [0.2167],
        [0.1673],
        [0.1791],
        [0.1617],
        [0.1498],
        [0.3212],
        [0.1734],
        [0.2257],
        [0.1700],
        [0.1839],
        [0.2062],
        [0.1896],
        [0

In [9]:
with open('submission.csv', 'w', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "failure"])
    count = 0
    for id, p in zip(ids, predict):
        csv_writer.writerow([id, p])