In [1]:
from typing import Optional, OrderedDict, Tuple
import math
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from sklearn import model_selection
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
writer = SummaryWriter()

# Preprocessing

## Pretraining

In [4]:
pretrain_features = pd.read_csv('pretrain_features.csv')
pretrain_labels = pd.read_csv('pretrain_labels.csv')

In [5]:
pretrain_features_t = torch.tensor(pretrain_features.iloc[:,2:].values).float()
pretrain_labels_t = torch.tensor(pretrain_labels['lumo_energy'].values).float()

In [6]:
pretrain_range = pretrain_labels_t.max() - pretrain_labels_t.min()
pretrain_range

tensor(2.6694)

## Transfer Learning

In [None]:
train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')

In [None]:
train_features_t = torch.tensor(train_features.iloc[:,2:].values).float()
train_labels_t = torch.tensor(train_labels['homo_lumo_gap'].values).float()

In [None]:
train_range = train_labels_t.max() - train_labels_t.min()
train_range

tensor(1.9678)

# Architecture

In [7]:
FEATURE_SIZE = 10

In [8]:
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.encoder = nn.Sequential(
      nn.Linear(1000, 256),
      nn.Dropout(),
      nn.ReLU(),
      nn.Linear(256, 64),
      nn.Dropout(),
      nn.ReLU(),
      nn.Linear(64, FEATURE_SIZE),
    )
    self.decoder = nn.Sequential(
      nn.Dropout(),
      nn.ReLU(),
      nn.Linear(FEATURE_SIZE, 1),
    )

  def forward(self, x):
    x = self.encoder(x)
    x = self.decoder(x)
    return x

In [9]:
model = Model().to(device)

In [10]:
criterion = nn.MSELoss()

## Utilities

In [11]:
def validate(net: nn.Module, loader: DataLoader) -> float:
  net.eval()
  running_loss = 0.0
  with torch.no_grad():
    for data in loader:
      X, y = data[0].to(device), data[1].to(device)

      y_pred = net(X).squeeze()
      loss = torch.sqrt(criterion(y_pred, y))
      running_loss += loss.item()
  return running_loss / len(test_loader)

In [13]:
BATCH_SIZE = 32

def prepare_loaders(X_train, X_test, y_train, y_test):
  train_set = TensorDataset(X_train, y_train)
  test_set = TensorDataset(X_test, y_test)

  train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
  test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)
  return train_loader, test_loader

In [15]:
def train(net: nn.Module, optimizer: optim.Optimizer, train_loader: DataLoader, val_loader: DataLoader, test_loader: Optional[DataLoader] = None, epochs=100, offset=0, outer_offset=0) -> Tuple[Tuple[float, float, float], OrderedDict[str, torch.Tensor]]:
  state = net.state_dict()
  opt_val_loss = math.inf
  opt_test_loss = math.inf
  opt_train_loss = math.inf
  
  for epoch in tqdm(range(epochs)):
    it = offset * epochs + epoch + outer_offset

    net.train()
    running_loss = 0.0
    for data in train_loader:
      X, y = data[0].to(device), data[1].to(device)

      optimizer.zero_grad()
      y_pred = net(X).squeeze()
      loss = torch.sqrt(criterion(y_pred, y))
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
    
    train_loss = running_loss / len(train_loader)
    writer.add_scalar('Loss/train', train_loss, it)
    val_loss = validate(net, val_loader)
    writer.add_scalar('Loss/val', val_loss, it)
    if test_loader is not None:
      test_loss = validate(net, test_loader)
      writer.add_scalar('Loss/test', test_loss, it)
    else:
      test_loss = math.inf

    if val_loss < opt_val_loss:
      state = net.state_dict()
      opt_val_loss = val_loss
      opt_test_loss = test_loss
      opt_train_loss = train_loss

  return (opt_val_loss, opt_test_loss, opt_train_loss), state

In [32]:
def compute_loss(net: nn.Module, X: torch.Tensor, y: torch.Tensor) -> float:
  with torch.no_grad():
    y_pred = net(X.to(device)).cpu()
    loss = mean_squared_error(y, y_pred)
    return np.sqrt(loss)

# Pretraining

In [12]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(pretrain_features_t, pretrain_labels_t, test_size=0.2, random_state=0)

In [16]:
pretraining_optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [25]:
train_loader, test_loader = prepare_loaders(X_train, X_test, y_train, y_test)
loss, state = train(model, pretraining_optimizer, train_loader, test_loader, epochs=50)
loss

100%|██████████| 50/50 [19:58<00:00, 23.96s/it]


(0.08659417209581445, inf, 0.17908804420232774)

In [27]:
torch.save(state, 'data/pretrained_model.pt')

Testing the performance of the pretrained model:

In [33]:
compute_loss(model, X_train, y_train) / pretrain_range, compute_loss(model, X_test, y_test) / pretrain_range

(tensor(0.0353), tensor(0.0373))

# Transfer Learning

In [34]:
writer = SummaryWriter()

In [17]:
model = Model().to(device)
model.load_state_dict(torch.load('data/pretrained_model.pt'))

<All keys matched successfully>

## Training

Freeze feature representation

In [18]:
model.encoder.requires_grad_(False)

Sequential(
  (0): Linear(in_features=1000, out_features=256, bias=True)
  (1): Dropout(p=0.5, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=64, bias=True)
  (4): Dropout(p=0.5, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=10, bias=True)
)

Reset parameters of feature-output mapping

In [19]:
for layer in model.decoder.children():
  if hasattr(layer, 'reset_parameters'):
    layer.reset_parameters()

In [23]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(train_features_t, train_labels_t, test_size=0.2)

In [40]:
training_optimizer = optim.Adam(model.parameters(), lr=1e-2)

In [41]:
val_set = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)
  
kf = model_selection.KFold()
for i, (train_index, test_index) in enumerate(kf.split(X_train)):
  train_loader, test_loader = prepare_loaders(X_train[train_index], X_train[test_index], y_train[train_index], y_train[test_index])
  loss, state = train(model, training_optimizer, train_loader, val_loader, test_loader, epochs=100, offset=i)
  model.load_state_dict(state)
  print(loss, validate(model, val_loader))

100%|██████████| 20/20 [00:31<00:00,  1.59s/it]


(0.16272294521331787, 0.1777612864971161, 0.18130498379468918) 0.16272294521331787


100%|██████████| 20/20 [00:30<00:00,  1.53s/it]


(0.1440454125404358, 0.12354667484760284, 0.1747995987534523) 0.14663028717041016


100%|██████████| 20/20 [00:29<00:00,  1.50s/it]


(0.13783419132232666, 0.1048850566148758, 0.1678953692317009) 0.13830260932445526


100%|██████████| 20/20 [00:31<00:00,  1.58s/it]


(0.13298127055168152, 0.09606873244047165, 0.19752303510904312) 0.14245416224002838


100%|██████████| 20/20 [00:28<00:00,  1.42s/it]


(0.14111925661563873, 0.06880681216716766, 0.18619517236948013) 0.15237370133399963


In [None]:
torch.save(state, 'data/trained_model.pt')

## Tuning

Unfreezing feature representation

In [39]:
model.encoder.requires_grad_(True)

Sequential(
  (0): Linear(in_features=1000, out_features=256, bias=True)
  (1): Dropout(p=0.5, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=64, bias=True)
  (4): Dropout(p=0.5, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=10, bias=True)
)

In [None]:
tuning_optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
OFFSET = 500

val_set = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)
  
kf = model_selection.KFold()
for i, (train_index, test_index) in enumerate(kf.split(X_train)):
  train_loader, test_loader = prepare_loaders(X_train[train_index], X_train[test_index], y_train[train_index], y_train[test_index])
  loss, state = train(model, tuning_optimizer, train_loader, val_loader, test_loader, epochs=20, offset=i, outer_offset=OFFSET)
  model.load_state_dict(state)
  print(loss, validate(model, val_loader))

100%|██████████| 20/20 [00:31<00:00,  1.59s/it]


(0.16272294521331787, 0.1777612864971161, 0.18130498379468918) 0.16272294521331787


100%|██████████| 20/20 [00:30<00:00,  1.53s/it]


(0.1440454125404358, 0.12354667484760284, 0.1747995987534523) 0.14663028717041016


100%|██████████| 20/20 [00:29<00:00,  1.50s/it]


(0.13783419132232666, 0.1048850566148758, 0.1678953692317009) 0.13830260932445526


100%|██████████| 20/20 [00:31<00:00,  1.58s/it]


(0.13298127055168152, 0.09606873244047165, 0.19752303510904312) 0.14245416224002838


100%|██████████| 20/20 [00:28<00:00,  1.42s/it]


(0.14111925661563873, 0.06880681216716766, 0.18619517236948013) 0.15237370133399963


In [44]:
torch.save(state, 'data/tuned_model.pt')

Testing performance of tuned model:

In [46]:
model.load_state_dict(torch.load('data/tuned_model.pt'))
model.eval()

Model(
  (encoder): Sequential(
    (0): Linear(in_features=1000, out_features=256, bias=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=64, bias=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
  (decoder): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=1, bias=True)
  )
)

In [49]:
compute_loss(model, X_test, y_test), compute_loss(model, train_features_t, train_labels_t)

(0.15237369, 0.11391682)

# Preparing Submission

In [50]:
test_features = pd.read_csv('test_features.csv')

In [51]:
test_features_t = torch.tensor(test_features.iloc[:,2:].values).float()

In [54]:
with torch.no_grad():
  test_labels_pred = model(test_features_t.to(device)).cpu()

In [55]:
submission = pd.DataFrame()
submission['Id'] = test_features['Id']
submission['y'] = test_labels_pred
submission

Unnamed: 0,Id,y
0,50100,1.899644
1,50101,2.150548
2,50102,1.763442
3,50103,2.263024
4,50104,1.884360
...,...,...
9995,60095,2.167699
9996,60096,1.791535
9997,60097,1.650751
9998,60098,1.781834


In [56]:
submission.to_csv('submission.csv', index=False)