In [6]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [7]:
PATH = 'data/'
device = 'cpu'
test_df = pd.read_csv(PATH+'test.csv')

In [8]:
test_df['miss_m3'] = test_df['measurement_3'].isna()
test_df['miss_m5'] = test_df['measurement_5'].isna()

In [9]:
mat_list = ['material_5', 'material_6', 'material_7', 'material_8']
test_df['attribute_0'] = [mat_list.index(item) for item in test_df['attribute_0']]
test_df['attribute_1'] = [mat_list.index(item) for item in test_df['attribute_1']]

In [10]:
missing_cols = test_df.columns[test_df.isna().sum(0) > 0]
imputer = SimpleImputer(strategy='mean')
test_df[missing_cols] = imputer.fit_transform(test_df[missing_cols])

In [11]:
test_df['area'] = test_df['attribute_2'] * test_df['attribute_3']

In [12]:
measure_list = list(test_df.iloc[:,10:24].columns)
test_df['measurement_sum'] = test_df[measure_list].sum(axis=1)
test_df['measurement_std'] = test_df[measure_list].std(axis=1)

In [13]:
target_list = ['loading', 'attribute_0', 'attribute_1', 'area', 'measurement_17', 'measurement_sum', 'measurement_std', 'miss_m3', 'miss_m5']
X_test = test_df[target_list].values

In [14]:
sc = StandardScaler()
X_test = sc.fit_transform(X_test)

In [15]:
class TestData(Dataset):
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [16]:
test_ds = TestData(torch.FloatTensor(X_test))
test_dl = DataLoader(dataset=test_ds, batch_size=1)

In [17]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(9, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(p=0.25),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(p=0.25),
            nn.Linear(32, 2),
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        return self.layers(x)

In [18]:
# Load model
model = Model().to(device)
model.load_state_dict(torch.load("/model"))

y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_dl:
        X_batch = X_batch.to(device)
        
        pred = model(X_batch)
        
        # pred[:, 0]        # Probability of success
        result = pred[:, 1] # Probability of failure
        
        y_pred_list.append(result.cpu().numpy())
        
y_pred_list = np.array([a.squeeze().tolist() for a in y_pred_list], dtype=np.float32)
round_y = np.around(y_pred_list, 4)
result = [[26570+i, y] for i, y in enumerate(round_y)]

# Output to csv
df = pd.DataFrame(result, columns=["id", "failure"])
df.to_csv('0816077.csv', index=False)