# training demo for pytorch models

In [1]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import roc_auc_score
from sklearn import preprocessing

In [2]:
# load training data
Xtr_loadpath = 'Xtr.csv'
Xts_loadpath = 'Xts.csv'
ytr_loadpath = 'ytr.csv'

Xtr = np.loadtxt(Xtr_loadpath, delimiter=",")
Xts = np.loadtxt(Xts_loadpath, delimiter=",")
ytr = np.loadtxt(ytr_loadpath, delimiter=",")

In [75]:
scaler = preprocessing.StandardScaler().fit(Xtr)

# standardize the training data
Xtr_standardized = scaler.transform(Xtr) # revise this line as needed
Xts_standardized = scaler.transform(Xts) # revise this line as needed
ytr_standardized = ytr # revise this line as needed


# save the standardized training data
Xtr_savepath = 'Xtr_pytorch.csv'
Xts_savepath = 'Xts_pytorch.csv'
ytr_savepath = 'ytr_pytorch.csv'
yts_hat_savepath = 'yts_hat_pytorch.csv'

np.savetxt(Xtr_savepath, Xtr_standardized, delimiter=",")
np.savetxt(Xts_savepath, Xts_standardized, delimiter=",")
np.savetxt(ytr_savepath, ytr_standardized, delimiter=",")

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_val, y_train, y_val = train_test_split(Xtr_standardized, ytr_standardized, test_size=0.2, shuffle=True, random_state=42)

In [6]:
train_data = []
for i in range(len(X_train)):
   train_data.append([X_train[i], y_train[i]])

In [7]:
all_data = []
for i in range(len(Xtr_standardized)):
   all_data.append([Xtr_standardized[i], ytr_standardized[i]])

# Multilayer Perceptron (MLP)

First, we try MLP with 5 layers and 128 nodes.

In [8]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class MLP(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLP, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

model = MLP(5, 128)   

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

We use 10 epochs, adam, with learning rate = 1e-4.

In [9]:
n_epochs = 10

In [10]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=1)

In [11]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [12]:
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
      inputs, targets = data
      optimizer.zero_grad()
      outputs = model(inputs.float())[:, 0]
      loss = loss_function(outputs, targets)
      loss.backward()
      optimizer.step()
      current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5054535661407717 0.8259737377393941
Starting epoch 2
0.46425104191647665 0.8189104221613144
Starting epoch 3
0.46367769256881297 0.8217493234041819
Starting epoch 4
0.46401785087935876 0.8213991742945304
Starting epoch 5
0.46331089944267867 0.8190504818051749
Starting epoch 6
0.46380043747354555 0.8205114116288291
Starting epoch 7
0.4640483409225321 0.8213927100032753
Starting epoch 8
0.463547465964918 0.8217353174397959
Starting epoch 9
0.46397043197904736 0.8156599610418713
Starting epoch 10
0.46407858234473975 0.8187369636793023


Results are't very good.

In [13]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class MLPA(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPA, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.ReLU())
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.ReLU())
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

model = MLPA(5, 128)   

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

We add a ReLU. It's the best in activation.

In [14]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [15]:
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
      inputs, targets = data
      optimizer.zero_grad()
      outputs = model(inputs.float())[:, 0]
      loss = loss_function(outputs, targets)
      loss.backward()
      optimizer.step()
      current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5513488252417738 0.8528975108169141
Starting epoch 2
0.4229204962634984 0.8711731395769766
Starting epoch 3
0.4030552740888335 0.8812251124786679
Starting epoch 4
0.3880549971514155 0.8870160400613677
Starting epoch 5
0.37623526695808795 0.8884306424643602
Starting epoch 6
0.3711684930192926 0.8938218613711193
Starting epoch 7
0.3608013035322753 0.8950166778714381
Starting epoch 8
0.3540631747767757 0.8960541966178828
Starting epoch 9
0.34993483654660035 0.8966683042871179
Starting epoch 10
0.34325054836979746 0.8951933684990778


Results are better.

In [16]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class MLPB(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPB, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(p=0.2))
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(p=0.2))
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

model = MLPB(5, 128)   

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

We add a dropout.

In [17]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [18]:
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
      inputs, targets = data
      optimizer.zero_grad()
      outputs = model(inputs.float())[:, 0]
      loss = loss_function(outputs, targets)
      loss.backward()
      optimizer.step()
      current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.6111168710871134 0.8239579562496768
Starting epoch 2
0.45536657956961746 0.8402221130475255
Starting epoch 3
0.4330727564946297 0.8549434589991554
Starting epoch 4
0.4261213980894089 0.8558118287910913
Starting epoch 5
0.42307524787538775 0.8585386823188706
Starting epoch 6
0.41936601717788125 0.8627480133078208
Starting epoch 7
0.41669071418285547 0.8581745272448329
Starting epoch 8
0.40846976497855747 0.867884970091879
Starting epoch 9
0.40812468569109034 0.857869628173967
Starting epoch 10
0.40208128460603787 0.8672396183482443


Results are bad.

In [19]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class MLPC(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPC, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.ReLU())
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(p=0.1))
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

model = MLPC(5, 128)   

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

Then, we change the coefficients of dropout.

In [20]:
n_epochs = 10

In [21]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=1)

In [22]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [23]:
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
      inputs, targets = data
      optimizer.zero_grad()
      outputs = model(inputs.float())[:, 0]
      loss = loss_function(outputs, targets)
      loss.backward()
      optimizer.step()
      current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5667247169745242 0.8505261933081658
Starting epoch 2
0.42691413896939956 0.8607106841805865
Starting epoch 3
0.41464166230920324 0.8706613831859475
Starting epoch 4
0.40516830843996243 0.8754072503490716
Starting epoch 5
0.3954928434995254 0.8855971281308718
Starting epoch 6
0.38525390523708547 0.8852372825843376
Starting epoch 7
0.37784176310090684 0.8916186154350036
Starting epoch 8
0.3680772134413367 0.8917220440950855
Starting epoch 9
0.36410339514067797 0.8945609453379533
Starting epoch 10
0.35679744078121073 0.893918825739946


It looks better.

In [24]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class MLPA(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPA, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.ReLU())
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.ReLU())
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

In [25]:
n_epochs = 10

We test different layers and nodes.

In [26]:
n_layer_list = [5, 10, 15]
n_hidden_nodes_list = [32, 64, 128, 256]
for n_layer in n_layer_list:
    for n_hidden_nodes in n_hidden_nodes_list:
        print(n_layer, n_hidden_nodes)
        model = MLPA(n_layer, n_hidden_nodes)   
        loss_function = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
        for epoch in range(n_epochs): 
            print(f'Starting epoch {epoch+1}')
            current_loss = 0.0
            for i, data in enumerate(trainloader):
                inputs, targets = data
                optimizer.zero_grad()
                outputs = model(inputs.float())[:, 0]
                loss = loss_function(outputs, targets)
                loss.backward()
                optimizer.step()
                current_loss += loss.item()
            with torch.no_grad():
                predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

            auc = roc_auc_score(y_val,predict)
            print(current_loss / len(trainloader), auc)
            current_loss = 0.0

5 32
Starting epoch 1
0.6717895442913286 0.7858035544982849
Starting epoch 2
0.5845040188594721 0.8361334488286705
Starting epoch 3
0.44976805001447795 0.8501555739428729
Starting epoch 4
0.4223825761491985 0.8584234024581545
Starting epoch 5
0.41304674379159884 0.8625249952595199
Starting epoch 6
0.4076234583925347 0.8653919084311595
Starting epoch 7
0.40414094446145576 0.8679399165675474
Starting epoch 8
0.40166006070756327 0.8691799831066521
Starting epoch 9
0.39929758630491624 0.8707583475547742
Starting epoch 10
0.396829183841182 0.8726437658375137
5 64
Starting epoch 1
0.628025432755705 0.8251560048956234
Starting epoch 2
0.4458894456909056 0.8521972125976107
Starting epoch 3
0.4260092889289511 0.8626402751202358
Starting epoch 4
0.41435422991377724 0.8709285738911586
Starting epoch 5
0.4042082157352367 0.878163193187499
Starting epoch 6
0.3961412164425991 0.8818359880022755
Starting epoch 7
0.38940128310064787 0.8862974263501748
Starting epoch 8
0.3839844628679459 0.888885297615

Then, we try the seemingly the best one.

In [27]:
n_epochs = 50
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
model = MLPA(10, 256)   
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5239376040049392 0.8700160314423128
Starting epoch 2
0.4066072223409655 0.8826871196841979
Starting epoch 3
0.388207365955073 0.8898743341780007
Starting epoch 4
0.3755391801627264 0.8937065815104033
Starting epoch 5
0.3591369476954469 0.8969753581217356
Starting epoch 6
0.34837869137144467 0.9009638258261363
Starting epoch 7
0.33903869611942944 0.9002646049887091
Starting epoch 8
0.33266079483046324 0.9020724517763873
Starting epoch 9
0.32659682352171177 0.9025955206771129
Starting epoch 10
0.32023414751703866 0.9027738273775663
Starting epoch 11
0.3187962272938094 0.8984589129647824
Starting epoch 12
0.3126279931137938 0.9040666856285877
Starting epoch 13
0.3055676009959126 0.8978943648618366
Starting epoch 14
0.3106776260108044 0.905454353484684
Starting epoch 15
0.29934854478002904 0.9048316267604419
Starting epoch 16
0.29570852100558376 0.9035991018944682
Starting epoch 17
0.2938262941625306 0.908682189584734
Starting epoch 18
0.2886220541319852 0.90634642567788

In [28]:
n_epochs = 50
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
model = MLPA(5, 128)   
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5523627452963265 0.8511596938511662
Starting epoch 2
0.4199855882064271 0.8737416179690058
Starting epoch 3
0.4014262285303612 0.880897588388409
Starting epoch 4
0.3881188465238383 0.8850735205392082
Starting epoch 5
0.375094325030704 0.8898667925048698
Starting epoch 6
0.36566543429456716 0.8917188119494579
Starting epoch 7
0.359708913596397 0.8933952181482823
Starting epoch 8
0.3532517278635955 0.8958371041698987
Starting epoch 9
0.34749436089611113 0.8952881781041528
Starting epoch 10
0.34520803648646214 0.8968309889503715
Starting epoch 11
0.33914491534175994 0.8986776214855803
Starting epoch 12
0.3360897203246972 0.8955661426281223
Starting epoch 13
0.33570560769202984 0.8954939580424401
Starting epoch 14
0.32898205427114013 0.8982423592077364
Starting epoch 15
0.3257086323650081 0.9004348313250936
Starting epoch 16
0.3228063206130579 0.9003895812863076
Starting epoch 17
0.3180425393358185 0.898361948595956
Starting epoch 18
0.31685393657705563 0.896340780196859

Next, we use GPU for the deep learning and use early stop, otherwise, it's so slow.

In [29]:
device = torch.device('cuda:0')   

In [30]:
n_epochs = 500
model = MLPC(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5419350834320938 0.8677847735774249
Starting epoch 2
0.41497849388211205 0.8825783041147368
Starting epoch 3
0.38423603255034305 0.8934469324783231
Starting epoch 4
0.36513011513404386 0.8968379919325644
Starting epoch 5
0.3525338358253176 0.8997528485976797
Starting epoch 6
0.34415410942384883 0.9032823516229681
Starting epoch 7
0.34057416508251176 0.9060986278464429
Starting epoch 8
0.3344981500843262 0.9031821551085137
Starting epoch 9
0.3331608005568807 0.9019485528606644
Starting epoch 10
0.324282962247168 0.9027339642481599
Starting epoch 11
0.3215366905157406 0.9060953957008154
Starting epoch 12
0.31485509750490986 0.9028934167657857
Starting epoch 13
0.31253100049366717 0.9037434710658324
Starting epoch 14
0.3098059994666816 0.9032909706779749
Starting epoch 15
0.30339517120371623 0.9043327989519228
Starting epoch 16
0.30551535842660343 0.905431728465291
Starting epoch 17
0.3004792534390948 0.9040268224991812
Starting epoch 18
0.29602817308143076 0.9045030252

In [31]:
n_epochs = 500
model = MLPA(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5166030426966928 0.8706387581665546
Starting epoch 2
0.4020478748270643 0.8901878523038733
Starting epoch 3
0.3776672711555072 0.8955058092430745
Starting epoch 4
0.3568720729712784 0.8967157090896554
Starting epoch 5
0.3463206197706872 0.9002247418593026
Starting epoch 6
0.3375417239788635 0.8990083777214666
Starting epoch 7
0.3360020741343048 0.9048402458154488
Starting epoch 8
0.3216515325677559 0.9082038320318561
Starting epoch 9
0.31925732628327363 0.9085130406302253
Starting epoch 10
0.31379827061116544 0.9072234145248316
Starting epoch 11
0.3079761597813856 0.9081187188636637
Starting epoch 12
0.30209097041845573 0.9074701349744014
Starting epoch 13
0.301900502622021 0.9061654255227457
Starting epoch 14
0.29413863798227435 0.9109059057764908
Starting epoch 15
0.28215846392108346 0.9093383151471273
Starting epoch 16
0.2813346222706476 0.9063194911309923
Starting epoch 17
0.27254713251328017 0.9005070159107756
Starting epoch 18
0.27013869036249977 0.894765109203

In [32]:
n_epochs = 500
model = MLPA(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.51760094067879 0.8709447346192964
Starting epoch 2
0.4116401991372807 0.8442191998069332
Starting epoch 3
0.3863207785059928 0.8842525555498095
Starting epoch 4
0.3646212212307906 0.8995330626950061
Starting epoch 5
0.34942906661802237 0.9044071383013566
Starting epoch 6
0.3389652813954341 0.902761976176932
Starting epoch 7
0.3289522474725452 0.8992023064591198
Starting epoch 8
0.32460355557834136 0.905620270293565
Starting epoch 9
0.3222736582058347 0.9067202771888091
Starting epoch 10
0.31701285878589425 0.9095042319560083
Starting epoch 11
0.3062646071229106 0.9070434917515644
Starting epoch 12
0.30268339731397104 0.905714002516764
Starting epoch 13
0.3035432392966547 0.9050147816793367
Starting epoch 14
0.2916056577552947 0.8970798641636932
Starting epoch 15
0.28577531317708793 0.9039901915154023
Starting epoch 16
0.27865626991645787 0.9036863698264123
Starting epoch 17
0.2781891570168067 0.8958872024271259
Starting epoch 18
0.26367881266709825 0.8995233662581235

From the results, auc first goes up and then goes down, so it's overfitting.
We try to add the batchnorm and dropout.

In [33]:
class MLPD(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPD, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.BatchNorm1d(n_hidden_nodes))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(0.2))
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.BatchNorm1d(n_hidden_nodes))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(0.2))
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

In [34]:
n_epochs = 500
model = MLPD(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.6048369246124785 0.6780395097481512
Starting epoch 2
0.537072368294248 0.757888590094982
Starting epoch 3
0.49472445539550974 0.8100144800124113
Starting epoch 4
0.4772448772370117 0.82612026167451
Starting epoch 5
0.46506023217056647 0.8334464584302977
Starting epoch 6
0.4597622203435312 0.8459387012807916
Starting epoch 7
0.44120245334297215 0.8480083518643016
Starting epoch 8
0.4349439652254841 0.8605232197341883
Starting epoch 9
0.42888385949454094 0.875948096050749
Starting epoch 10
0.4169088445046873 0.8798299029494406
Starting epoch 11
0.42330774276305966 0.8811777076761304
Starting epoch 12
0.4097566038505829 0.872178336867146
Starting epoch 13
0.40933619906390234 0.880587302408164
Starting epoch 14
0.4039662671905389 0.880682112013239
Starting epoch 15
0.4030266058590714 0.8807273620520246
Starting epoch 16
0.40551377979225073 0.8859688248780404
Starting epoch 17
0.3965710679598851 0.8849722466428781
Starting epoch 18
0.39638535663219227 0.8922079433210944
S

The overfitting is better, try several more.

In [35]:
n_epochs = 500
model = MLPA(10, 256)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5286637745684493 0.8628589836410336
Starting epoch 2
0.40995539821115096 0.8783409611970144
Starting epoch 3
0.3852384265125964 0.887738963300064
Starting epoch 4
0.3643564077705871 0.8914483890986193
Starting epoch 5
0.349073428848497 0.8956200117219147
Starting epoch 6
0.340473740694489 0.8983382461946872
Starting epoch 7
0.33331514763071096 0.898759502508145
Starting epoch 8
0.32808206472310825 0.9010855699781076
Starting epoch 9
0.323097426885305 0.9008022185447588
Starting epoch 10
0.3179785347369096 0.9028190774163521
Starting epoch 11
0.31162201698828895 0.9044308407026254
Starting epoch 12
0.3088135482495174 0.9026316129699539
Starting epoch 13
0.30472876120387243 0.9066276223474858
Starting epoch 14
0.3021124049165192 0.9054769785040768
Starting epoch 15
0.29894460924212174 0.9026908689731259
Starting epoch 16
0.29164100479292776 0.9030464049921567
Starting epoch 17
0.28892645988933136 0.9029828394614815
Starting epoch 18
0.2871040163521984 0.903668054334522

We choose the coefficients from the highest auc score to run and save a model.

In [36]:
Allloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=1)

In [37]:
n_epochs = 128
model = MLPD(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    for i, data in enumerate(Allloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

Starting epoch 1
Starting epoch 2
Starting epoch 3
Starting epoch 4
Starting epoch 5
Starting epoch 6
Starting epoch 7
Starting epoch 8
Starting epoch 9
Starting epoch 10
Starting epoch 11
Starting epoch 12
Starting epoch 13
Starting epoch 14
Starting epoch 15
Starting epoch 16
Starting epoch 17
Starting epoch 18
Starting epoch 19
Starting epoch 20
Starting epoch 21
Starting epoch 22
Starting epoch 23
Starting epoch 24
Starting epoch 25
Starting epoch 26
Starting epoch 27
Starting epoch 28
Starting epoch 29
Starting epoch 30
Starting epoch 31
Starting epoch 32
Starting epoch 33
Starting epoch 34
Starting epoch 35
Starting epoch 36
Starting epoch 37
Starting epoch 38
Starting epoch 39
Starting epoch 40
Starting epoch 41
Starting epoch 42
Starting epoch 43
Starting epoch 44
Starting epoch 45
Starting epoch 46
Starting epoch 47
Starting epoch 48
Starting epoch 49
Starting epoch 50
Starting epoch 51
Starting epoch 52
Starting epoch 53
Starting epoch 54
Starting epoch 55
Starting epoch 56
S

In [38]:
model.eval() 

MLPD(
  (layers): Sequential(
    (0): Linear(in_features=8, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
    (15): Dropout(p=0.2, inplace=False)
    (16): Linear(in_features=512, out_features=512, bias=True)
    (17): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_runn

In [39]:
model.to(torch.device('cpu'))
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'MLP1.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(d_in) # random input
x = x[None,:] # add singleton batch index
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

In [40]:
yts_hat_savepath = 'MLP1.csv'

In [41]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training auc =  0.9401277162947026
test label confidences saved in MLP1.csv


In [42]:
n_epochs = 13
model = MLPA(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    for i, data in enumerate(Allloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

Starting epoch 1
Starting epoch 2
Starting epoch 3
Starting epoch 4
Starting epoch 5
Starting epoch 6
Starting epoch 7
Starting epoch 8
Starting epoch 9
Starting epoch 10
Starting epoch 11
Starting epoch 12
Starting epoch 13


In [43]:
model.eval() 

MLPA(
  (layers): Sequential(
    (0): Linear(in_features=8, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=512, bias=True)
    (11): ReLU()
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): ReLU()
    (14): Linear(in_features=512, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=512, bias=True)
    (17): ReLU()
    (18): Linear(in_features=512, out_features=512, bias=True)
    (19): ReLU()
    (20): Linear(in_features=512, out_features=512, bias=True)
    (21): ReLU()
    (22): Linear(in_features=512, out_features=512, bias=True)
    (23): ReLU()
    (24): Linear(in_fe

In [44]:
model.to(torch.device('cpu'))
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'MLP2.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(d_in) # random input
x = x[None,:] # add singleton batch index
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

In [45]:
yts_hat_savepath = 'MLP2.csv'

In [46]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training auc =  0.9335692906049922
test label confidences saved in MLP2.csv


The result is worse than expect, the score is low.

# CNN

In [47]:
# create a model
d_in = Xtr.shape[1]
d_out = 1

class CNN(nn.Module):
    def __init__(self, n_layer, n_hidden_nodes, n_channels, kernel_size):
        super(CNN, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in, n_hidden_nodes))
        self.layers.append(nn.Conv1d(1, n_channels, kernel_size))
        self.layers.append(nn.ReLU())
        for i in range(n_layer):
            self.layers.append(nn.Conv1d(n_channels, n_channels, kernel_size))
            self.layers.append(nn.ReLU())
        self.layers.append(nn.Flatten())
        self.layers.append(nn.Linear(n_channels * (n_hidden_nodes - (kernel_size - 1) * (n_layer + 1)), n_hidden_nodes))
        self.layers.append(nn.Linear(n_hidden_nodes, d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

# Usually, we would train the model at this point. 
# But this is only a demo, so we'll use the randomly initialized weights.

In [48]:
model = CNN(5, 128, 64, 3)

In [49]:
n_epochs = 50
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = model(inputs[:,None,:].float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val)[:,None,:]).detach().numpy().ravel()

    auc = roc_auc_score(y_val,predict)
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5368286321326912 0.8468447794383824
Starting epoch 2
0.42595150380100005 0.8716590388029857
Starting epoch 3
0.40394386922120745 0.8781200979124648
Starting epoch 4
0.3960970289185548 0.8787557532192172
Starting epoch 5
0.38404036774585393 0.8800949388909001
Starting epoch 6
0.37915191499410444 0.8865710813466412
Starting epoch 7
0.3766768257320468 0.8869276947475478
Starting epoch 8
0.36972685559748036 0.8898958818155178
Starting epoch 9
0.3649458913305592 0.8914677819723845
Starting epoch 10
0.3618112587723362 0.8937701470410785
Starting epoch 11
0.3591139923625429 0.8914710141180122
Starting epoch 12
0.3552314503991561 0.8919881574184207
Starting epoch 13
0.35211421315605795 0.8906866801123925
Starting epoch 14
0.34932493515977714 0.8920398717484614
Starting epoch 15
0.34498181184530974 0.8929211701229077
Starting epoch 16
0.3446531001006319 0.8932659323231802
Starting epoch 17
0.3406558523206521 0.8923652410749687
Starting epoch 18
0.3376515789573928 0.8956404819

The result is okay, but we don't think CNN is suitable for this kind of problem.

# feature selection

We borrow the indices from feature selection in tree-based.

In [74]:
scaler = preprocessing.StandardScaler().fit(Xtr)

# standardize the training data
Xtr_standardized = scaler.transform(Xtr)[:, [2,3,5,6,7]] # revise this line as needed
Xts_standardized = scaler.transform(Xts)[:, [2,3,5,6,7]] # revise this line as needed
ytr_standardized = ytr # revise this line as needed


# save the standardized training data
Xtr_savepath = 'Xtr_pytorch2.csv'
Xts_savepath = 'Xts_pytorch2.csv'
ytr_savepath = 'ytr_pytorch2.csv'
yts_hat_savepath = 'yts_hat_pytorch2.csv'

np.savetxt(Xtr_savepath, Xtr_standardized, delimiter=",")
np.savetxt(Xts_savepath, Xts_standardized, delimiter=",")
np.savetxt(ytr_savepath, ytr_standardized, delimiter=",")

In [51]:
from sklearn.model_selection import train_test_split

In [52]:
X_train, X_val, y_train, y_val = train_test_split(Xtr_standardized, ytr_standardized, test_size=0.2, shuffle=True, random_state=42)

In [53]:
train_data = []
for i in range(len(X_train)):
   train_data.append([X_train[i], y_train[i]])

In [54]:
all_data = []
for i in range(len(Xtr_standardized)):
   all_data.append([Xtr_standardized[i], ytr_standardized[i]])

In [55]:
d_in = Xtr_standardized.shape[1]
d_out = 1

In [56]:
device = torch.device('cuda:0')

In [57]:
class MLPD(nn.Module):
    def __init__(self,n_layer,n_hidden_nodes):
        super(MLPD, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in,n_hidden_nodes))
        self.layers.append(nn.BatchNorm1d(n_hidden_nodes))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(0.2))
        for i in range(n_layer):
            self.layers.append(nn.Linear(n_hidden_nodes,n_hidden_nodes))
            self.layers.append(nn.BatchNorm1d(n_hidden_nodes))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(0.2))
        self.layers.append(nn.Linear(n_hidden_nodes,d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

In [58]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=1)

In [59]:
Allloader = torch.utils.data.DataLoader(all_data, batch_size=32, shuffle=True, num_workers=1)

First, we try some deep MLP.

In [60]:
n_epochs = 500
model = MLPD(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.5780865664373851 0.7231678043819275
Starting epoch 2
0.5038453072265838 0.8215618589577839
Starting epoch 3
0.4550855139081614 0.8426634603782039
Starting epoch 4
0.4422248202261253 0.8488325489993276
Starting epoch 5
0.4276325636049005 0.8563752995121616
Starting epoch 6
0.4140186107094305 0.8670069038630605
Starting epoch 7
0.4141069663598391 0.8808932788609056
Starting epoch 8
0.4036070927361798 0.8809579217734567
Starting epoch 9
0.39998756833635163 0.8824215450518006
Starting epoch 10
0.39322589032112226 0.8859946820430608
Starting epoch 11
0.3914490696573048 0.888793720156522
Starting epoch 12
0.3946975353832895 0.8934350812776887
Starting epoch 13
0.3920657977093906 0.8956943510713486
Starting epoch 14
0.3824196977902529 0.8920818896416197
Starting epoch 15
0.3862215701235218 0.8913525021116682
Starting epoch 16
0.3761429365703607 0.9016964455017151
Starting epoch 17
0.3825812993380823 0.8964970005688577
Starting epoch 18
0.37378357194250567 0.8941418437882471

In [61]:
n_epochs = 108
model = MLPD(15, 512)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    for i, data in enumerate(Allloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

Starting epoch 1
Starting epoch 2
Starting epoch 3
Starting epoch 4
Starting epoch 5
Starting epoch 6
Starting epoch 7
Starting epoch 8
Starting epoch 9
Starting epoch 10
Starting epoch 11
Starting epoch 12
Starting epoch 13
Starting epoch 14
Starting epoch 15
Starting epoch 16
Starting epoch 17
Starting epoch 18
Starting epoch 19
Starting epoch 20
Starting epoch 21
Starting epoch 22
Starting epoch 23
Starting epoch 24
Starting epoch 25
Starting epoch 26
Starting epoch 27
Starting epoch 28
Starting epoch 29
Starting epoch 30
Starting epoch 31
Starting epoch 32
Starting epoch 33
Starting epoch 34
Starting epoch 35
Starting epoch 36
Starting epoch 37
Starting epoch 38
Starting epoch 39
Starting epoch 40
Starting epoch 41
Starting epoch 42
Starting epoch 43
Starting epoch 44
Starting epoch 45
Starting epoch 46
Starting epoch 47
Starting epoch 48
Starting epoch 49
Starting epoch 50
Starting epoch 51
Starting epoch 52
Starting epoch 53
Starting epoch 54
Starting epoch 55
Starting epoch 56
S

In [62]:
model.eval() 

MLPD(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
    (15): Dropout(p=0.2, inplace=False)
    (16): Linear(in_features=512, out_features=512, bias=True)
    (17): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_runn

In [63]:
model.to(torch.device('cpu'))
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'MLP3.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(d_in) # random input
x = x[None,:] # add singleton batch index
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

In [64]:
yts_hat_savepath = 'MLP3.csv'

In [65]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training auc =  0.9304228300782624
test label confidences saved in MLP3.csv


It looks good, we try CNN with it.

In [66]:
class CNNA(nn.Module):
    def __init__(self, n_layer, n_hidden_nodes, n_channels, kernel_size):
        super(CNNA, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in, n_hidden_nodes))
        self.layers.append(nn.Conv1d(1, n_channels, kernel_size))
        self.layers.append(nn.BatchNorm1d(n_channels))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(0.2))
        for i in range(n_layer):
            self.layers.append(nn.Conv1d(n_channels, n_channels, kernel_size))
            self.layers.append(nn.BatchNorm1d(n_channels))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(0.2))
        self.layers.append(nn.Flatten())
        self.layers.append(nn.Linear(n_channels * (n_hidden_nodes - (kernel_size - 1) * (n_layer + 1)), n_hidden_nodes))
        self.layers.append(nn.Linear(n_hidden_nodes, d_out))
    def forward(self,x):
        out = self.layers(x)
        return out

In [67]:
n_epochs = 500
model = CNNA(15, 512, 64, 5)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    current_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs[:,None,:].float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    with torch.no_grad():
        predict = model(torch.Tensor(X_val[:,None,:]).to(device)).to(torch.device('cpu')).detach().numpy().ravel()
    
    auc = roc_auc_score(y_val,predict)
    if auc > auc_max:
        auc_max = auc
        count = 0
    else:
        count += 1
    if count == 50:
        print(epoch - 50)
        break
    print(current_loss / len(trainloader), auc)
    current_loss = 0.0

Starting epoch 1
0.6092122762159274 0.8143283170433193
Starting epoch 2
0.5099515029262208 0.8456758201030838
Starting epoch 3
0.4713648650897441 0.8475019823826516
Starting epoch 4
0.443837797770616 0.8550479650411129
Starting epoch 5
0.4178286838517422 0.874970910689352
Starting epoch 6
0.40903361242839076 0.877467204495699
Starting epoch 7
0.39983566650853275 0.8794840633672923
Starting epoch 8
0.3873676279312793 0.8802877902466774
Starting epoch 9
0.38504950340242694 0.8790035510506629
Starting epoch 10
0.37346288955375156 0.8860442416093499
Starting epoch 11
0.3792730956779335 0.8887226129527158
Starting epoch 12
0.36996710834620006 0.8886579700401647
Starting epoch 13
0.36965415245250066 0.8900025426212269
Starting epoch 14
0.36363060749366116 0.8906662098567514
Starting epoch 15
0.3638016209380183 0.8914117581148403
Starting epoch 16
0.36019916809863894 0.8916056868524935
Starting epoch 17
0.3609921459781564 0.8942915998689904
Starting epoch 18
0.35476768808760434 0.894715010946

In [68]:
class CNNA(nn.Module):
    def __init__(self, n_layer, n_hidden_nodes, n_channels, kernel_size):
        super(CNNA, self).__init__()
        self.layers = nn.Sequential()
        self.layers.append(nn.Linear(d_in, n_hidden_nodes))
        self.layers.append(nn.Conv1d(1, n_channels, kernel_size))
        self.layers.append(nn.BatchNorm1d(n_channels))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(0.2))
        for i in range(n_layer):
            self.layers.append(nn.Conv1d(n_channels, n_channels, kernel_size))
            self.layers.append(nn.BatchNorm1d(n_channels))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(0.2))
        self.layers.append(nn.Flatten())
        self.layers.append(nn.Linear(n_channels * (n_hidden_nodes - (kernel_size - 1) * (n_layer + 1)), n_hidden_nodes))
        self.layers.append(nn.Linear(n_hidden_nodes, d_out))
    def forward(self,x):
        out = self.layers(torch.unsqueeze(x, 1))
        return out

In [69]:
n_epochs = 65
model = CNNA(15, 512, 64, 5)
device = torch.device('cuda:0')
model.to(device)
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
count = 0
auc_max = 0
for epoch in range(n_epochs): 
    print(f'Starting epoch {epoch+1}')
    for i, data in enumerate(Allloader):
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs.float())[:, 0]
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

Starting epoch 1
Starting epoch 2
Starting epoch 3
Starting epoch 4
Starting epoch 5
Starting epoch 6
Starting epoch 7
Starting epoch 8
Starting epoch 9
Starting epoch 10
Starting epoch 11
Starting epoch 12
Starting epoch 13
Starting epoch 14
Starting epoch 15
Starting epoch 16
Starting epoch 17
Starting epoch 18
Starting epoch 19
Starting epoch 20
Starting epoch 21
Starting epoch 22
Starting epoch 23
Starting epoch 24
Starting epoch 25
Starting epoch 26
Starting epoch 27
Starting epoch 28
Starting epoch 29
Starting epoch 30
Starting epoch 31
Starting epoch 32
Starting epoch 33
Starting epoch 34
Starting epoch 35
Starting epoch 36
Starting epoch 37
Starting epoch 38
Starting epoch 39
Starting epoch 40
Starting epoch 41
Starting epoch 42
Starting epoch 43
Starting epoch 44
Starting epoch 45
Starting epoch 46
Starting epoch 47
Starting epoch 48
Starting epoch 49
Starting epoch 50
Starting epoch 51
Starting epoch 52
Starting epoch 53
Starting epoch 54
Starting epoch 55
Starting epoch 56
S

In [70]:
model.eval() 

CNNA(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=512, bias=True)
    (1): Conv1d(1, 64, kernel_size=(5,), stride=(1,))
    (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.2, inplace=False)
    (5): Conv1d(64, 64, kernel_size=(5,), stride=(1,))
    (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Conv1d(64, 64, kernel_size=(5,), stride=(1,))
    (10): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Dropout(p=0.2, inplace=False)
    (13): Conv1d(64, 64, kernel_size=(5,), stride=(1,))
    (14): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): ReLU()
    (16): Dropout(p=0.2, inplace=False)
    (17): Conv1d(64, 64, kernel_size=(5,), stride=(1,))
    (18): BatchNorm1d(64, eps=1e-05, momentum=0.1, a

In [71]:
model.to(torch.device('cpu'))
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'MLP4.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(2, d_in) # random input
x = x # add singleton batch index
print(x.shape)
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

torch.Size([2, 5])


In [72]:
yts_hat_savepath = 'MLP4.csv'

In [73]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training auc =  0.9351776030566457
test label confidences saved in MLP4.csv
