In [178]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
# Preprocessing

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import Dataset, TensorDataset

In [179]:
class CustomTensorDataset(Dataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors, transform=None):
        #assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
        self.tensors = tensors
        self.transform = transform

    def __getitem__(self, index):
        x = self.tensors[0][index]

        if self.transform:
            x = self.transform(x)

        y = self.tensors[1][index]

        return x, y

    def __len__(self):
        return self.tensors[0].size(0)

In [180]:
data = pd.read_csv('data_train.csv')
data = data.drop(['filename'],axis=1)
y = data.loc[:, 'label']
scaler = StandardScaler()

X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)
X_train, y_train = X, y
print(len(y_train))
print(len(y_val))
X_train.shape
X_train = torch.from_numpy(np.array(X_train)).float()
y_train = torch.from_numpy(np.array(y_train)).float()
X_val = torch.from_numpy(np.array(X_val)).float()
y_val = torch.from_numpy(np.array(y_val)).float()
X = CustomTensorDataset(tensors=(X_train, y_train))
Valid = CustomTensorDataset(tensors=(X_val, y_val))

220
22


In [181]:
data_test = pd.read_csv('data_test.csv')
data_test = data_test.drop(['filename'],axis=1)
scaler = StandardScaler()

X_test = scaler.fit_transform(np.array(data_test.iloc[:, :], dtype = float))
X_test.shape
X_test = torch.from_numpy(np.array(X_test)).float()
y_test = torch.FloatTensor(X_test.shape[0])
Test = CustomTensorDataset(tensors=(X_test, y_test))


In [182]:
# LENet
# Model structure
class LENet(nn.Module):
    def __init__(self):
        super(LENet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [183]:
class mlpNet(torch.nn.Module):
    def __init__(self, D_in) :
        super(mlpNet, self).__init__()
        self.linear1 = nn.Linear(D_in, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.linear3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.linear4 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.linear1(x)))
        x = self.relu(self.bn2(self.linear2(x)))
        x = self.relu(self.bn3(self.linear3(x)))
        x = self.linear4(x)
        return x

     

In [184]:
trainLoader = torch.utils.data.DataLoader(X, batch_size=4, shuffle=True, num_workers=2)
valLoader = torch.utils.data.DataLoader(Valid, batch_size=1, shuffle=False, num_workers=2)
testLoader = torch.utils.data.DataLoader(Test, batch_size=1, shuffle=False, num_workers=2)

In [185]:
MLP_model = mlpNet(25)
MLP_model = MLP_model.cuda()
# loss
criterion = nn.MSELoss(reduction='sum')
# optimizer
learning_rate = 1e-3
optimizer = torch.optim.Adam(MLP_model.parameters(), lr = learning_rate)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [186]:
# GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('GPU state:', device)

GPU state: cuda:0


In [187]:
def val(valLoader, model):
    with torch.no_grad():
        model.eval()
        total_loss = 0
        for data in valLoader:
            inputs, label = data
            inputs, label = inputs.to(device), label.to(device)
            MLP_model = model.cuda()
            outputs = model(inputs)
            outputs = outputs[0][0]
            #print('label:', label)
            total_loss += np.abs(np.sum((label - outputs).cpu().numpy()))
        return total_loss
        

In [188]:
def train(model):
    validation_error = []
    patience = 10
    last_loss = 1e9
    trigger_times = 0
    total_epoch = 200
    for epoch in range(total_epoch):
        running_loss = 0.0
        for times, data in enumerate(trainLoader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            #print(inputs.shape)
            # Zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            outputs = outputs.reshape(-1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if times+1 == len(trainLoader):
                print('[%d/%d, %d/%d] loss: %.3f' % (epoch+1, total_epoch, times+1, len(trainLoader), running_loss))
        validation_error.append(val(valLoader, model))
        print('validation_error:', validation_error[-1])
        if validation_error[-1] > last_loss:
            trigger_times += 1
            print('trigger times:', trigger_times)
        else:
            trigger_times = 0
            print('trigger times: 0')
        if trigger_times >= patience:
            print('Early stopping!\nStart to test process.')
            break
        last_loss = validation_error[-1]
    print('Finished Training')
    

    

In [189]:
train(MLP_model)

[1/200, 55/55] loss: 41.302
validation_error: 4.738628715276718
trigger times: 0
[2/200, 55/55] loss: 19.299
validation_error: 2.2750363945961
trigger times: 0
[3/200, 55/55] loss: 11.456
validation_error: 2.075043797492981
trigger times: 0
[4/200, 55/55] loss: 6.289
validation_error: 2.5136982798576355
trigger times: 1
[5/200, 55/55] loss: 2.959
validation_error: 1.459737628698349
trigger times: 0
[6/200, 55/55] loss: 2.138
validation_error: 1.4134275913238525
trigger times: 0
[7/200, 55/55] loss: 1.373
validation_error: 0.9549066424369812
trigger times: 0
[8/200, 55/55] loss: 0.924
validation_error: 0.9613268375396729
trigger times: 1
[9/200, 55/55] loss: 1.385
validation_error: 0.9294813871383667
trigger times: 0
[10/200, 55/55] loss: 2.627
validation_error: 1.3834345638751984
trigger times: 1
[11/200, 55/55] loss: 3.017
validation_error: 1.2279190719127655
trigger times: 0
[12/200, 55/55] loss: 1.570
validation_error: 0.609367847442627
trigger times: 0
[13/200, 55/55] loss: 1.155
v

In [190]:
PATH = './MLP_Net.pth'
torch.save(MLP_model.state_dict(), PATH)

In [191]:
PATH = './MLP_Net.pth'
net = mlpNet(25)
net.load_state_dict(torch.load(PATH))
net = net.cuda()

In [192]:
# inference
predicted_res = []
with torch.no_grad():
    net.eval()
    for data in testLoader:
        input, label = data
        input = input.to(device)
        output = net(input)
        output = output.cpu().numpy()
        predicted_res.append(output[0][0])
submission_df = pd.read_csv('sample_submission.csv')
submission_df['score'] = predicted_res

#submission_df = submission_df.drop(['score'], axis=1)
#submission_df = submission_df.insert(1, "score", predicted_res)
submission_df.to_csv('submission/MLP_submission.csv', index=False)
