In [132]:
# 가상환경 py37에서 실행

In [133]:
!pip install rdkit



In [134]:
import random
import os

import numpy as np
import pandas as pd

import umap
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from rdkit import DataStructs
from rdkit.Chem import PandasTools, AllChem

In [135]:
print(torch.cuda.is_available())

True


In [136]:
# GPU 이름 체크(cuda:0에 연결된 그래픽 카드 기준)
print(torch.cuda.get_device_name()) # 'N
# 사용 가능 GPU 개수 체크
print(torch.cuda.device_count()) # 

NVIDIA GeForce RTX 3090
2


In [137]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [138]:
print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 2


In [139]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)

seed_everything(42) # Seed 고정

In [140]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [141]:
PandasTools.AddMoleculeColumnToFrame(train,'SMILES','Molecule')
PandasTools.AddMoleculeColumnToFrame(test,'SMILES','Molecule')

In [142]:
def mol2fp(mol):
    fp = AllChem.GetHashedMorganFingerprint(mol, 6, nBits=4096)
    ar = np.zeros((1,), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fp, ar)
    return ar

In [143]:
# FPs column 추가
train["FPs"] = train.Molecule.apply(mol2fp)
test["FPs"] = test.Molecule.apply(mol2fp)

In [144]:
# 사용할 column만 추출
train = train[['FPs','MLM', 'HLM']]
test = test[['FPs']]

In [145]:
class CustomDataset(Dataset):
    def __init__(self, df, target, transform, is_test=False):
        self.df = df
        self.target = target # HLM or MLM
        self.is_test = is_test # train,valid / test

        self.feature_select = transform
        if not self.is_test: 
            self.fp = self.feature_select.fit_transform(np.stack(df['FPs']))
        else: # valid or test
            self.fp = self.feature_select.transform(np.stack(df['FPs']))

    def __getitem__(self, index):
        fp = self.fp[index]
        if not self.is_test: # test가 아닌 경우(label 존재)
            label = self.df[self.target][index]
            return torch.tensor(fp).to('cuda').float(), torch.tensor(label).to('cuda').float().unsqueeze(dim=-1) # feature, label

        else: # test인 경우
            return torch.tensor(fp).to('cuda').float() # feature
        
    def __len__(self):
        return len(self.df)

In [146]:
import umap
transform = umap.UMAP(n_components = 256)

train_MLM = CustomDataset(df=train, target='MLM', transform=transform, is_test=False)
train_HLM = CustomDataset(df=train, target='HLM', transform=transform, is_test=False)

input_size = train_MLM.fp.shape[1]
input_size

251

In [147]:
# Hyperparameter
CFG = {'BATCH_SIZE': 256,
       'EPOCHS': 30000,
       'INPUT_SIZE': input_size,
       'HIDDEN_SIZE': 1024,
       'OUTPUT_SIZE': 1,
       'DROPOUT_RATE': 0.5,
       'LEARNING_RATE': 0.001}

In [148]:
# train,valid split
train_MLM_dataset, valid_MLM_dataset = train_test_split(train_MLM, test_size=0.2, random_state=42)
train_HLM_dataset, valid_HLM_dataset = train_test_split(train_HLM, test_size=0.2, random_state=42)

In [149]:
train_MLM_loader = DataLoader(dataset=train_MLM_dataset,
                              batch_size=CFG['BATCH_SIZE'],
                              shuffle=True)

valid_MLM_loader = DataLoader(dataset=valid_MLM_dataset,
                              batch_size=CFG['BATCH_SIZE'],
                              shuffle=False)


train_HLM_loader = DataLoader(dataset=train_HLM_dataset,
                              batch_size=CFG['BATCH_SIZE'],
                              shuffle=True)

valid_HLM_loader = DataLoader(dataset=valid_HLM_dataset,
                              batch_size=CFG['BATCH_SIZE'],
                              shuffle=False)

In [150]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_rate, out_size):
        super(Net, self).__init__()
        
        # fc 레이어 3개와 출력 레이어
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc_out = nn.Linear(hidden_size, out_size)
        
        # 정규화
        self.ln1 = nn.LayerNorm(hidden_size)
        self.ln2 = nn.LayerNorm(hidden_size)
        self.ln3 = nn.LayerNorm(hidden_size)        
        
        # 활성화 함수
        self.activation = nn.LeakyReLU()
        
        # Dropout
        self.dropout = nn.Dropout(dropout_rate)
     
    def forward(self, x):
        out = self.fc1(x).to(device)
        out = self.ln1(out)
        out = self.activation(out)
        out = self.dropout(out)
        
        out = self.fc2(out)
        out = self.ln2(out)
        out = self.activation(out)
        out = self.dropout(out)
        
        out = self.fc3(out)
        out = self.ln3(out)
        out = self.activation(out)
        out = self.dropout(out)

        out = self.fc_out(out)
        return out

In [151]:
model_MLM = Net(CFG['INPUT_SIZE'],CFG['HIDDEN_SIZE'],CFG['DROPOUT_RATE'],CFG['OUTPUT_SIZE']).to(device)
model_HLM = Net(CFG['INPUT_SIZE'],CFG['HIDDEN_SIZE'],CFG['DROPOUT_RATE'],CFG['OUTPUT_SIZE']).to(device)

In [152]:
criterion = nn.MSELoss()
optimizer_MLM = torch.optim.Adam(model_MLM.parameters(), lr=CFG['LEARNING_RATE'])
optimizer_HLM = torch.optim.Adam(model_HLM.parameters(), lr=CFG['LEARNING_RATE'])

In [153]:
from tqdm import tqdm

In [154]:
def train(train_loader, valid_loader, model, criterion, optimizer, epochs):
    model.train()
    
    for epoch in tqdm(range(epochs)):
        running_loss = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            
            output = model(inputs)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        if epoch % 100 == 0:
            valid_loss = 0
            with torch.no_grad():
                for inputs, targets in valid_loader:
                    output = model(inputs)
                    loss = criterion(output, targets)
                    valid_loss += loss.item()
                    
            print(f'Epoch: {epoch}/{epochs}, Train Loss: {running_loss/len(train_loader)}, Valid Loss: {valid_loss/len(valid_HLM_loader)}')
            
            model.train()
    
    return model

In [155]:
print("Training Start: MLM")
model_MLM = train(train_MLM_loader, valid_MLM_loader, model_MLM, criterion, optimizer_MLM, epochs=CFG['EPOCHS'])

print("Training Start: HLM")
model_HLM = train(train_HLM_loader, valid_HLM_loader, model_HLM, criterion, optimizer_HLM, epochs=CFG['EPOCHS'])  

Training Start: MLM


  0%|                                                                              | 1/30000 [00:00<1:12:18,  6.92it/s]

Epoch: 0/30000, Train Loss: 2008.3892156427557, Valid Loss: 1851.6894124348958


  0%|▎                                                                             | 103/30000 [00:06<33:55, 14.68it/s]

Epoch: 100/30000, Train Loss: 77.88386535644531, Valid Loss: 1458.7698567708333


  1%|▌                                                                             | 202/30000 [00:14<42:05, 11.80it/s]

Epoch: 200/30000, Train Loss: 41.328147714788265, Valid Loss: 1446.737060546875


  1%|▊                                                                             | 304/30000 [00:20<27:34, 17.95it/s]

Epoch: 300/30000, Train Loss: 33.534136685458094, Valid Loss: 1456.4073079427083


  1%|█                                                                             | 403/30000 [00:26<30:44, 16.05it/s]

Epoch: 400/30000, Train Loss: 39.06482211026278, Valid Loss: 1577.4278564453125


  2%|█▎                                                                            | 503/30000 [00:33<27:12, 18.06it/s]

Epoch: 500/30000, Train Loss: 22.225637262517754, Valid Loss: 1599.4338785807292


  2%|█▌                                                                            | 602/30000 [00:39<43:06, 11.37it/s]

Epoch: 600/30000, Train Loss: 34.145423195578836, Valid Loss: 1571.1864827473958


  2%|█▊                                                                            | 703/30000 [00:48<45:07, 10.82it/s]

Epoch: 700/30000, Train Loss: 22.59498370777477, Valid Loss: 1561.1478678385417


  3%|██                                                                            | 803/30000 [00:57<34:52, 13.95it/s]

Epoch: 800/30000, Train Loss: 21.60078057375821, Valid Loss: 1567.1563720703125


  3%|██▎                                                                           | 903/30000 [01:05<33:07, 14.64it/s]

Epoch: 900/30000, Train Loss: 16.728891372680664, Valid Loss: 1538.7019449869792


  3%|██▌                                                                          | 1003/30000 [01:11<34:03, 14.19it/s]

Epoch: 1000/30000, Train Loss: 25.623600873080168, Valid Loss: 1660.308349609375


  4%|██▊                                                                          | 1104/30000 [01:18<27:44, 17.36it/s]

Epoch: 1100/30000, Train Loss: 17.130517266013406, Valid Loss: 1604.7333984375


  4%|███                                                                          | 1203/30000 [01:24<36:03, 13.31it/s]

Epoch: 1200/30000, Train Loss: 15.336972323330967, Valid Loss: 1634.0383707682292


  4%|███▎                                                                         | 1302/30000 [01:32<38:57, 12.28it/s]

Epoch: 1300/30000, Train Loss: 16.962816758589312, Valid Loss: 1601.6568196614583


  5%|███▌                                                                         | 1402/30000 [01:39<32:38, 14.60it/s]

Epoch: 1400/30000, Train Loss: 14.409580924294211, Valid Loss: 1713.3385009765625


  5%|███▊                                                                         | 1505/30000 [01:46<31:29, 15.08it/s]

Epoch: 1500/30000, Train Loss: 13.86344506523826, Valid Loss: 1647.306884765625


  5%|████                                                                         | 1602/30000 [01:52<38:12, 12.39it/s]

Epoch: 1600/30000, Train Loss: 13.281712401996959, Valid Loss: 1663.2221272786458


  6%|████▎                                                                        | 1704/30000 [01:59<24:53, 18.95it/s]

Epoch: 1700/30000, Train Loss: 15.661531881852584, Valid Loss: 1722.9652506510417


  6%|████▋                                                                        | 1802/30000 [02:08<44:33, 10.55it/s]

Epoch: 1800/30000, Train Loss: 13.689496820623225, Valid Loss: 1572.5479736328125


  6%|████▉                                                                        | 1902/30000 [02:16<40:31, 11.56it/s]

Epoch: 1900/30000, Train Loss: 16.876429904590953, Valid Loss: 1617.5333251953125


  7%|█████▏                                                                       | 2004/30000 [02:25<37:41, 12.38it/s]

Epoch: 2000/30000, Train Loss: 14.71933564272794, Valid Loss: 1713.1858723958333


  7%|█████▍                                                                       | 2103/30000 [02:33<31:07, 14.94it/s]

Epoch: 2100/30000, Train Loss: 12.13081355528398, Valid Loss: 1703.3609619140625


  7%|█████▋                                                                       | 2203/30000 [02:42<43:27, 10.66it/s]

Epoch: 2200/30000, Train Loss: 15.776798768477006, Valid Loss: 1734.6671956380208


  8%|█████▉                                                                       | 2303/30000 [02:50<31:01, 14.88it/s]

Epoch: 2300/30000, Train Loss: 11.82923785122958, Valid Loss: 1759.5040283203125


  8%|██████▏                                                                      | 2403/30000 [02:56<30:15, 15.20it/s]

Epoch: 2400/30000, Train Loss: 11.723573164506393, Valid Loss: 1766.0818277994792


  8%|██████▍                                                                      | 2503/30000 [03:02<34:15, 13.38it/s]

Epoch: 2500/30000, Train Loss: 15.5945706800981, Valid Loss: 1601.88623046875


  9%|██████▋                                                                      | 2604/30000 [03:08<24:57, 18.29it/s]

Epoch: 2600/30000, Train Loss: 12.6618145162409, Valid Loss: 1743.7118733723958


  9%|██████▉                                                                      | 2705/30000 [03:14<23:32, 19.33it/s]

Epoch: 2700/30000, Train Loss: 13.577526265924627, Valid Loss: 1684.4997965494792


  9%|███████▏                                                                     | 2803/30000 [03:20<26:26, 17.14it/s]

Epoch: 2800/30000, Train Loss: 10.821149652654475, Valid Loss: 1591.3170166015625


 10%|███████▍                                                                     | 2903/30000 [03:26<36:06, 12.51it/s]

Epoch: 2900/30000, Train Loss: 11.289246082305908, Valid Loss: 1707.7146402994792


 10%|███████▋                                                                     | 3004/30000 [03:34<34:39, 12.98it/s]

Epoch: 3000/30000, Train Loss: 12.995333584872158, Valid Loss: 1693.4762776692708


 10%|███████▉                                                                     | 3102/30000 [03:41<26:30, 16.91it/s]

Epoch: 3100/30000, Train Loss: 11.077226508747447, Valid Loss: 1710.7118733723958


 11%|████████▏                                                                    | 3204/30000 [03:48<25:00, 17.86it/s]

Epoch: 3200/30000, Train Loss: 11.439267765391957, Valid Loss: 1722.3769124348958


 11%|████████▍                                                                    | 3304/30000 [03:55<35:01, 12.70it/s]

Epoch: 3300/30000, Train Loss: 11.345851334658535, Valid Loss: 1616.4115804036458


 11%|████████▋                                                                    | 3402/30000 [04:04<41:38, 10.65it/s]

Epoch: 3400/30000, Train Loss: 13.98029340397228, Valid Loss: 1745.6945393880208


 12%|████████▉                                                                    | 3502/30000 [04:12<40:39, 10.86it/s]

Epoch: 3500/30000, Train Loss: 11.2243258302862, Valid Loss: 1744.6866455078125


 12%|█████████▏                                                                   | 3602/30000 [04:19<25:34, 17.21it/s]

Epoch: 3600/30000, Train Loss: 10.746116508137096, Valid Loss: 1679.8490397135417


 12%|█████████▌                                                                   | 3703/30000 [04:26<26:02, 16.83it/s]

Epoch: 3700/30000, Train Loss: 12.178452448411422, Valid Loss: 1653.1875


 13%|█████████▊                                                                   | 3802/30000 [04:32<22:44, 19.20it/s]

Epoch: 3800/30000, Train Loss: 11.18814503062855, Valid Loss: 1708.8419189453125


 13%|██████████                                                                   | 3904/30000 [04:38<21:13, 20.49it/s]

Epoch: 3900/30000, Train Loss: 11.271885915236039, Valid Loss: 1794.2352701822917


 13%|██████████▎                                                                  | 4003/30000 [04:45<29:27, 14.71it/s]

Epoch: 4000/30000, Train Loss: 10.479997504841197, Valid Loss: 1731.6306966145833


 14%|██████████▌                                                                  | 4103/30000 [04:51<28:49, 14.97it/s]

Epoch: 4100/30000, Train Loss: 11.137507265264338, Valid Loss: 1720.8555094401042


 14%|██████████▊                                                                  | 4203/30000 [04:58<39:09, 10.98it/s]

Epoch: 4200/30000, Train Loss: 10.72835659980774, Valid Loss: 1726.8076171875


 14%|███████████                                                                  | 4303/30000 [05:07<39:27, 10.85it/s]

Epoch: 4300/30000, Train Loss: 11.994914098219438, Valid Loss: 1603.5342203776042


 15%|███████████▎                                                                 | 4404/30000 [05:16<24:45, 17.23it/s]

Epoch: 4400/30000, Train Loss: 10.132131121375345, Valid Loss: 1816.7868245442708


 15%|███████████▌                                                                 | 4502/30000 [05:24<38:44, 10.97it/s]

Epoch: 4500/30000, Train Loss: 10.533564827658914, Valid Loss: 1691.9072672526042


 15%|███████████▊                                                                 | 4604/30000 [05:33<32:04, 13.19it/s]

Epoch: 4600/30000, Train Loss: 10.607751846313477, Valid Loss: 1766.57177734375


 16%|████████████                                                                 | 4702/30000 [05:39<28:06, 15.00it/s]

Epoch: 4700/30000, Train Loss: 10.488414894450795, Valid Loss: 1685.4012044270833


 16%|████████████▎                                                                | 4802/30000 [05:46<31:31, 13.32it/s]

Epoch: 4800/30000, Train Loss: 9.953016888011586, Valid Loss: 1675.2276204427083


 16%|████████████▌                                                                | 4904/30000 [05:53<21:58, 19.04it/s]

Epoch: 4900/30000, Train Loss: 10.644741708582098, Valid Loss: 1789.2151692708333


 17%|████████████▊                                                                | 5003/30000 [05:59<26:36, 15.65it/s]

Epoch: 5000/30000, Train Loss: 10.235571254383434, Valid Loss: 1704.873291015625


 17%|█████████████                                                                | 5105/30000 [06:05<21:44, 19.09it/s]

Epoch: 5100/30000, Train Loss: 9.740760499780828, Valid Loss: 1730.7915852864583


 17%|█████████████▎                                                               | 5203/30000 [06:11<27:17, 15.15it/s]

Epoch: 5200/30000, Train Loss: 9.98381688378074, Valid Loss: 1647.1615397135417


 18%|█████████████▌                                                               | 5303/30000 [06:17<23:35, 17.45it/s]

Epoch: 5300/30000, Train Loss: 13.302120598879727, Valid Loss: 1693.0078531901042


 18%|█████████████▊                                                               | 5403/30000 [06:24<26:44, 15.33it/s]

Epoch: 5400/30000, Train Loss: 9.99129405888644, Valid Loss: 1699.6485188802083


 18%|██████████████                                                               | 5502/30000 [06:31<27:26, 14.88it/s]

Epoch: 5500/30000, Train Loss: 9.910194787112149, Valid Loss: 1749.2057291666667


 19%|██████████████▍                                                              | 5604/30000 [06:37<23:44, 17.13it/s]

Epoch: 5600/30000, Train Loss: 10.161821343682028, Valid Loss: 1717.343017578125


 19%|██████████████▋                                                              | 5704/30000 [06:43<27:05, 14.95it/s]

Epoch: 5700/30000, Train Loss: 9.725348255851053, Valid Loss: 1723.2351888020833


 19%|██████████████▉                                                              | 5802/30000 [06:49<26:05, 15.46it/s]

Epoch: 5800/30000, Train Loss: 9.7683884230527, Valid Loss: 1760.22705078125


 20%|███████████████▏                                                             | 5904/30000 [06:55<21:23, 18.78it/s]

Epoch: 5900/30000, Train Loss: 9.69906811280684, Valid Loss: 1723.9529622395833


 20%|███████████████▍                                                             | 6003/30000 [07:01<20:59, 19.05it/s]

Epoch: 6000/30000, Train Loss: 9.876824834130026, Valid Loss: 1708.1233317057292


 20%|███████████████▋                                                             | 6103/30000 [07:09<37:06, 10.73it/s]

Epoch: 6100/30000, Train Loss: 11.716057083823465, Valid Loss: 1689.2744140625


 21%|███████████████▉                                                             | 6203/30000 [07:17<27:13, 14.57it/s]

Epoch: 6200/30000, Train Loss: 9.351237232034856, Valid Loss: 1747.0522867838542


 21%|████████████████▏                                                            | 6303/30000 [07:25<34:51, 11.33it/s]

Epoch: 6300/30000, Train Loss: 9.229978561401367, Valid Loss: 1680.0625


 21%|████████████████▍                                                            | 6405/30000 [07:33<20:01, 19.64it/s]

Epoch: 6400/30000, Train Loss: 9.413267027248036, Valid Loss: 1732.8352457682292


 22%|████████████████▋                                                            | 6502/30000 [07:41<30:21, 12.90it/s]

Epoch: 6500/30000, Train Loss: 9.580617861314254, Valid Loss: 1822.349609375


 22%|████████████████▉                                                            | 6602/30000 [07:49<35:55, 10.86it/s]

Epoch: 6600/30000, Train Loss: 9.861732786351984, Valid Loss: 1781.2181803385417


 22%|█████████████████▏                                                           | 6704/30000 [07:58<30:45, 12.62it/s]

Epoch: 6700/30000, Train Loss: 9.53225816379894, Valid Loss: 1723.119384765625


 23%|█████████████████▍                                                           | 6802/30000 [08:05<34:11, 11.31it/s]

Epoch: 6800/30000, Train Loss: 9.58575480634516, Valid Loss: 1773.0796712239583


 23%|█████████████████▋                                                           | 6904/30000 [08:14<24:39, 15.62it/s]

Epoch: 6900/30000, Train Loss: 9.83678867600181, Valid Loss: 1699.3037923177083


 23%|█████████████████▉                                                           | 7004/30000 [08:20<25:45, 14.88it/s]

Epoch: 7000/30000, Train Loss: 10.157345706766302, Valid Loss: 1752.781005859375


 24%|██████████████████▏                                                          | 7102/30000 [08:27<29:18, 13.02it/s]

Epoch: 7100/30000, Train Loss: 9.77359828081998, Valid Loss: 1692.0667724609375


 24%|██████████████████▍                                                          | 7202/30000 [08:34<26:40, 14.25it/s]

Epoch: 7200/30000, Train Loss: 9.564090772108598, Valid Loss: 1733.4933268229167


 24%|██████████████████▋                                                          | 7304/30000 [08:41<26:15, 14.40it/s]

Epoch: 7300/30000, Train Loss: 9.135817246003585, Valid Loss: 1676.7347819010417


 25%|███████████████████                                                          | 7405/30000 [08:48<22:27, 16.77it/s]

Epoch: 7400/30000, Train Loss: 9.338930108330466, Valid Loss: 1737.7356363932292


 25%|███████████████████▎                                                         | 7505/30000 [08:54<18:45, 19.99it/s]

Epoch: 7500/30000, Train Loss: 10.932522426952016, Valid Loss: 1697.8915608723958


 25%|███████████████████▌                                                         | 7604/30000 [08:59<20:47, 17.95it/s]

Epoch: 7600/30000, Train Loss: 10.313469149849631, Valid Loss: 1713.6304524739583


 26%|███████████████████▊                                                         | 7704/30000 [09:06<29:25, 12.63it/s]

Epoch: 7700/30000, Train Loss: 9.432011517611416, Valid Loss: 1717.3477376302083


 26%|████████████████████                                                         | 7804/30000 [09:14<21:39, 17.07it/s]

Epoch: 7800/30000, Train Loss: 9.340781060132114, Valid Loss: 1740.0554606119792


 26%|████████████████████▎                                                        | 7902/30000 [09:20<29:05, 12.66it/s]

Epoch: 7900/30000, Train Loss: 8.882227897644043, Valid Loss: 1763.4795328776042


 27%|████████████████████▌                                                        | 8002/30000 [09:29<35:07, 10.44it/s]

Epoch: 8000/30000, Train Loss: 9.342759349129416, Valid Loss: 1758.8849690755208


 27%|████████████████████▊                                                        | 8103/30000 [09:37<28:44, 12.69it/s]

Epoch: 8100/30000, Train Loss: 9.344640493392944, Valid Loss: 1701.703125


 27%|█████████████████████                                                        | 8204/30000 [09:44<24:17, 14.96it/s]

Epoch: 8200/30000, Train Loss: 9.485169237310236, Valid Loss: 1655.6664225260417


 28%|█████████████████████▎                                                       | 8302/30000 [09:52<27:13, 13.29it/s]

Epoch: 8300/30000, Train Loss: 10.68921336260709, Valid Loss: 1813.3055419921875


 28%|█████████████████████▌                                                       | 8402/30000 [10:00<31:26, 11.45it/s]

Epoch: 8400/30000, Train Loss: 9.561930829828436, Valid Loss: 1762.610107421875


 28%|█████████████████████▊                                                       | 8502/30000 [10:07<29:04, 12.32it/s]

Epoch: 8500/30000, Train Loss: 9.84436897798018, Valid Loss: 1786.881591796875


 29%|██████████████████████                                                       | 8604/30000 [10:13<18:44, 19.02it/s]

Epoch: 8600/30000, Train Loss: 9.431123516776346, Valid Loss: 1667.5669352213542


 29%|██████████████████████▎                                                      | 8704/30000 [10:18<16:44, 21.19it/s]

Epoch: 8700/30000, Train Loss: 9.34674223986539, Valid Loss: 1569.6073404947917


 29%|██████████████████████▌                                                      | 8804/30000 [10:26<28:35, 12.36it/s]

Epoch: 8800/30000, Train Loss: 9.526078072461216, Valid Loss: 1752.4021809895833


 30%|██████████████████████▊                                                      | 8903/30000 [10:33<27:04, 12.98it/s]

Epoch: 8900/30000, Train Loss: 8.82237627289512, Valid Loss: 1726.3175862630208


 30%|███████████████████████                                                      | 9002/30000 [10:41<33:11, 10.54it/s]

Epoch: 9000/30000, Train Loss: 9.620231476697056, Valid Loss: 1754.8900553385417


 30%|███████████████████████▎                                                     | 9102/30000 [10:51<30:15, 11.51it/s]

Epoch: 9100/30000, Train Loss: 9.909560051831333, Valid Loss: 1797.8578694661458


 31%|███████████████████████▌                                                     | 9202/30000 [11:00<32:56, 10.52it/s]

Epoch: 9200/30000, Train Loss: 9.373188257217407, Valid Loss: 1741.7738037109375


 31%|███████████████████████▉                                                     | 9302/30000 [11:07<26:24, 13.07it/s]

Epoch: 9300/30000, Train Loss: 10.6627544706518, Valid Loss: 1741.4108072916667


 31%|████████████████████████▏                                                    | 9404/30000 [11:14<20:12, 16.99it/s]

Epoch: 9400/30000, Train Loss: 9.16193500432101, Valid Loss: 1831.7163899739583


 31%|████████████████████████▏                                                    | 9427/30000 [11:16<24:36, 13.94it/s]


KeyboardInterrupt: 

In [None]:
test_MLM = CustomDataset(df=test, target=None, transform=transform, is_test=True)
test_HLM = CustomDataset(df=test, target=None, transform=transform, is_test=True)

test_MLM_loader = DataLoader(dataset=test_MLM,
                             batch_size=CFG['BATCH_SIZE'],
                             shuffle=False)

test_HLM_loader = DataLoader(dataset=test_HLM,
                             batch_size=CFG['BATCH_SIZE'],
                             shuffle=False)

In [None]:
def inference(test_loader, model):
    model.eval()
    preds = []
    
    with torch.no_grad():
        for inputs in test_loader:
            output = model(inputs)
            preds.extend(output.cpu().numpy().flatten().tolist())
    
    return preds

In [None]:
predictions_MLM = inference(test_MLM_loader, model_MLM)
predictions_HLM = inference(test_HLM_loader, model_HLM)

In [None]:
submission = pd.read_csv('data/sample_submission.csv')
submission

In [None]:
submission['MLM'] = predictions_MLM
submission['HLM'] = predictions_HLM
submission

In [None]:
submission.to_csv('data/baseline_submission.csv', index=False)

In [None]:
!pip install xgboost

In [None]:
# xgboost를 활용해보쟈
import xgboost

In [None]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [None]:
# 사용할 column만 추출
train_X = train[[ 'AlogP',
 'Molecular_Weight',
 'Num_H_Acceptors',
 'Num_H_Donors',
 'Num_RotatableBonds',
 'LogD',
 'Molecular_PolarSurfaceArea']]

train_MLM = train['MLM']

train_HLM = train['HLM']

In [None]:
test_X = test[[ 'AlogP',
 'Molecular_Weight',
 'Num_H_Acceptors',
 'Num_H_Donors',
 'Num_RotatableBonds',
 'LogD',
 'Molecular_PolarSurfaceArea']]

In [None]:
xgb_model = xgboost.XGBRegressor()

In [None]:
xgb_model.fit(train_X,train_MLM)
predictionsMLM = xgb_model.predict(test_X)