In [1]:
import argparse
import json
import os
import sys
import time

import numpy as np
import torch
from sklearn.linear_model import LinearRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import torch_geometric as tg
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

sys.path.append('..')
from dataset import SCHetDataset
from model import SCHetNet, schetnet_train, schetnet_valid

# Statistical Models

In [5]:
train_x = np.loadtxt('../data/raw_dir/hydro/input.txt')
train_y = np.loadtxt('../data/raw_dir/hydro/output.txt')
valid_x = np.loadtxt('../data/raw_dir/hydro/input_test.txt')
valid_y = np.loadtxt('../data/raw_dir/hydro/output_test.txt')

In [7]:
# PLS
pls = PLSRegression(n_components=3)
pls.fit(train_x, train_y)
pred_y = pls.predict(valid_x)
print(((pred_y - valid_y)**2).mean()**0.5)
print(np.abs(pred_y - valid_y).mean())
print(np.abs(pred_y - valid_y).max())
print(r2_score(pred_y, valid_y))
print('---')

print(((pred_y - valid_y)[:, 1]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 1].mean())
print(np.abs(pred_y - valid_y)[:, 1].max())
print(r2_score(pred_y[:, 1], valid_y[:, 1]))
print('---')

print(((pred_y - valid_y)[:, 2]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 2].mean())
print(np.abs(pred_y - valid_y)[:, 2].max())
print(r2_score(pred_y[:, 2], valid_y[:, 2]))
print('---')

print(((pred_y - valid_y)[:, 4]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 4].mean())
print(np.abs(pred_y - valid_y)[:, 4].max())
print(r2_score(pred_y[:, 4], valid_y[:, 4]))
print('---')

print(((pred_y - valid_y)[:, 6]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 6].mean())
print(np.abs(pred_y - valid_y)[:, 6].max())
print(r2_score(pred_y[:, 6], valid_y[:, 6]))

start = time.time()
pls.predict(valid_x[0:1])
end = time.time()
print(end - start)

0.379263997896553
0.2620975878636084
1.8156373057109434
0.8883259448351079
---
0.23271747190406597
0.1839631386175293
0.8679721555887774
0.9362097328661668
---
0.6070222372435085
0.506037318185462
1.8156373057109434
0.9128602212754866
---
0.1870050709241606
0.14979272930748821
0.6226879465462645
0.9831132680940617
---
0.24388947081761167
0.1999635734695374
1.0205266208373374
0.9142564669050371
0.00031876564025878906


In [10]:
# SVR
svr = SVR(kernel='rbf', C=0.2)
scaler = StandardScaler()
scaler.fit(train_x)
pred_y = []
for i in range(0, 7):
    svr.fit(scaler.transform(train_x), train_y[:, i])
    pred_y.append(svr.predict(scaler.transform(valid_x)))
pred_y = np.array(pred_y).T
print(((pred_y - valid_y)**2).mean()**0.5)
print(np.abs(pred_y - valid_y).mean())
print(np.abs(pred_y - valid_y).max())
print(r2_score(pred_y, valid_y))
print('---')

print(((pred_y - valid_y)[:, 1]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 1].mean())
print(np.abs(pred_y - valid_y)[:, 1].max())
print(r2_score(pred_y[:, 1], valid_y[:, 1]))
print('---')

print(((pred_y - valid_y)[:, 2]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 2].mean())
print(np.abs(pred_y - valid_y)[:, 2].max())
print(r2_score(pred_y[:, 2], valid_y[:, 2]))
print('---')

print(((pred_y - valid_y)[:, 4]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 4].mean())
print(np.abs(pred_y - valid_y)[:, 4].max())
print(r2_score(pred_y[:, 4], valid_y[:, 4]))
print('---')

print(((pred_y - valid_y)[:, 6]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 6].mean())
print(np.abs(pred_y - valid_y)[:, 6].max())
print(r2_score(pred_y[:, 6], valid_y[:, 6]))

start = time.time()
svr.predict(scaler.transform(valid_x[0:1]))
end = time.time()
print(end - start)

0.30122689162249583
0.1696010924440068
3.0020720117053017
0.8496933859877692
---
0.18603556666542376
0.12447605903024789
0.9787711670881443
0.9537520871796565
---
0.5476662193733921
0.3539031135203258
3.0020720117053017
0.9176088094025241
---
0.3320556599496977
0.21032636322523643
1.9277255914890965
0.9313130054500893
---
0.16785697054758733
0.11336547705962967
0.7557575478949765
0.9555478810402476
0.0004076957702636719


## Fully Connected Layers

In [14]:
from model import FC
from dataset import SCHetDataset

valid_dataset = SCHetDataset('../data', 'hydro', mode='test')
valid_loader = tg.data.DataLoader(valid_dataset, 500, shuffle=False)

fc = FC()
state_dict = torch.load('../ckpt/fc.ckpt')
fc.load_state_dict(state_dict)

batch = next(iter(valid_loader))
s_x = batch.species_x
r_x = batch.reactions_x
valid_y = batch.y.view(-1, 7).detach().numpy() / 100
pred_y = torch.softmax(fc(s_x, r_x), dim=1)
pred_y = pred_y.flatten()[batch.y_index].detach().numpy().reshape(-1, 7)
print(((pred_y - valid_y)**2).mean()**0.5)
print(np.abs(pred_y - valid_y).mean())
print(np.abs(pred_y - valid_y).max())
print(r2_score(pred_y.reshape((-1, )), valid_y.reshape((-1, ))))
print('---')

pred_y = pred_y.reshape((-1, 7))
valid_y = valid_y.reshape((-1, 7))
print(((pred_y - valid_y)[:, 1]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 1].mean())
print(np.abs(pred_y - valid_y)[:, 1].max())
print(r2_score(pred_y[:, 1], valid_y[:, 1]))
print('---')

print(((pred_y - valid_y)[:, 2]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 2].mean())
print(np.abs(pred_y - valid_y)[:, 2].max())
print(r2_score(pred_y[:, 2], valid_y[:, 2]))
print('---')

print(((pred_y - valid_y)[:, 4]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 4].mean())
print(np.abs(pred_y - valid_y)[:, 4].max())
print(r2_score(pred_y[:, 4], valid_y[:, 4]))
print('---')

print(((pred_y - valid_y)[:, 6]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 6].mean())
print(np.abs(pred_y - valid_y)[:, 6].max())
print(r2_score(pred_y[:, 6], valid_y[:, 6]))

start = time.time()
s_x = valid_dataset[0].species_x
r_x = valid_dataset[0].reactions_x
fc(s_x, r_x)
end = time.time()
print(end - start)

0.0028173797493670884
0.0019328526
0.016753629
0.9993555464315448
---
0.0019850187388119394
0.0015472533
0.006037742
0.9556551603095532
---
0.0022537021112977893
0.0017588305
0.008861899
0.9891727168287895
---
0.001239563437621359
0.0009309198
0.0056108832
0.9931122499334183
---
0.0026741787971761334
0.00215415
0.0107239485
0.8819055012697263
0.0020918846130371094


In [20]:
from model import CNN
from dataset import SCHetDataset

valid_dataset = SCHetDataset('../data', 'hydro', mode='test')
valid_loader = tg.data.DataLoader(valid_dataset, 500, shuffle=False)

cnn = CNN()
state_dict = torch.load('../ckpt/cnn.ckpt')
cnn.load_state_dict(state_dict)

batch = next(iter(valid_loader))
s_x = batch.species_x
r_x = batch.reactions_x
edge_index = batch.edge_index
valid_y = batch.y.detach().numpy() / 100
pred_y = cnn(s_x, r_x, edge_index)
pred_y = pred_y.flatten()[batch.y_index].detach().numpy()
print(((pred_y - valid_y)**2).mean()**0.5)
print(np.abs(pred_y - valid_y).mean())
print(np.abs(pred_y - valid_y).max())
print(r2_score(pred_y, valid_y))
print('---')

pred_y = pred_y.reshape((-1, 7))
valid_y = valid_y.reshape((-1, 7))
print(((pred_y - valid_y)[:, 1]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 1].mean())
print(np.abs(pred_y - valid_y)[:, 1].max())
print(r2_score(pred_y[:, 1], valid_y[:, 1]))
print('---')

print(((pred_y - valid_y)[:, 2]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 2].mean())
print(np.abs(pred_y - valid_y)[:, 2].max())
print(r2_score(pred_y[:, 2], valid_y[:, 2]))
print('---')

print(((pred_y - valid_y)[:, 4]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 4].mean())
print(np.abs(pred_y - valid_y)[:, 4].max())
print(r2_score(pred_y[:, 4], valid_y[:, 4]))
print('---')

print(((pred_y - valid_y)[:, 6]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 6].mean())
print(np.abs(pred_y - valid_y)[:, 6].max())
print(r2_score(pred_y[:, 6], valid_y[:, 6]))

start = time.time()
valid_loader = tg.data.DataLoader(valid_dataset, 2, shuffle=False)
batch = next(iter(valid_loader))
s_x = batch.species_x
r_x = batch.reactions_x
edge_index = batch.edge_index
cnn(s_x, r_x, edge_index)
end = time.time()
print(end - start)

0.002321203418756066
0.0017411935
0.009966075
0.9995616460392348
---
0.002465913007783222
0.0019478033
0.00918524
0.9321878312068067
---
0.0028240913275022228
0.0022217913
0.009966075
0.9832697617566281
---
0.002192066834003735
0.0016725493
0.008410513
0.9777418942247899
---
0.00256456119079571
0.0020688572
0.008746751
0.893831182174895
0.031928062438964844


In [38]:
from model import SCHetNet
from dataset import SCHetDataset

valid_dataset = SCHetDataset('../data', 'hydro', mode='test')
valid_loader = tg.data.DataLoader(valid_dataset, 500, shuffle=False)

schetnet = SCHetNet(num_layers=4, h_channels=24)
state_dict = torch.load('../ckpt/schetnet_4_24.ckpt')
schetnet.load_state_dict(state_dict)

batch = next(iter(valid_loader))
s_x = batch.species_x
r_x = batch.reactions_x
edge_index = batch.edge_index
valid_y = batch.y.detach().numpy() / 100
pred_y = schetnet(s_x, r_x, edge_index)
pred_y = pred_y.flatten().detach().numpy()
print(((pred_y - valid_y)**2).mean()**0.5)
print(np.abs(pred_y - valid_y).mean())
print(np.abs(pred_y - valid_y).max())
print(r2_score(pred_y, valid_y))
print('---')

pred_y = pred_y.reshape((-1, 7))
valid_y = valid_y.reshape((-1, 7))
print(((pred_y - valid_y)[:, 1]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 1].mean())
print(np.abs(pred_y - valid_y)[:, 1].max())
print(r2_score(pred_y[:, 1], valid_y[:, 1]))
print('---')

print(((pred_y - valid_y)[:, 2]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 2].mean())
print(np.abs(pred_y - valid_y)[:, 2].max())
print(r2_score(pred_y[:, 2], valid_y[:, 2]))
print('---')

print(((pred_y - valid_y)[:, 4]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 4].mean())
print(np.abs(pred_y - valid_y)[:, 4].max())
print(r2_score(pred_y[:, 4], valid_y[:, 4]))
print('---')

print(((pred_y - valid_y)[:, 6]**2).mean()**0.5)
print(np.abs(pred_y - valid_y)[:, 6].mean())
print(np.abs(pred_y - valid_y)[:, 6].max())
print(r2_score(pred_y[:, 6], valid_y[:, 6]))

start = time.time()
valid_loader = tg.data.DataLoader(valid_dataset, 2, shuffle=False)
batch = next(iter(valid_loader))
s_x = batch.species_x
r_x = batch.reactions_x
edge_index = batch.edge_index
schetnet(s_x, r_x, edge_index)
end = time.time()
print(end - start)

0.0011587790973422722
0.00082927104
0.012415804
0.999891297978231
---
0.0009548485798308203
0.0007798379
0.0034226328
0.9895592270354107
---
0.0014306644516847117
0.0010957428
0.008072555
0.9957771207319888
---
0.0009057707582573585
0.00065689645
0.0045681
0.9960562503524643
---
0.0007917352611090791
0.00061335054
0.0035221726
0.9913577903278569
0.029288768768310547


In [9]:
state_dict['lin_r.weight'].shape

torch.Size([16, 3])