From 2b974679ef15be2c5cdebb473df512e6549a5a2a Mon Sep 17 00:00:00 2001 From: Naruki yoshikawa Date: Tue, 13 Mar 2018 17:50:34 +0900 Subject: [PATCH 1/5] Add example of using own dataset --- examples/own_dataset/README.md | 13 ++ examples/own_dataset/dataset.csv | 101 +++++++++++++++ examples/own_dataset/train.py | 209 +++++++++++++++++++++++++++++++ 3 files changed, 323 insertions(+) create mode 100644 examples/own_dataset/README.md create mode 100644 examples/own_dataset/dataset.csv create mode 100644 examples/own_dataset/train.py diff --git a/examples/own_dataset/README.md b/examples/own_dataset/README.md new file mode 100644 index 00000000..5f3f2390 --- /dev/null +++ b/examples/own_dataset/README.md @@ -0,0 +1,13 @@ +# Example of using your own dataset +## Usage +``` +python train.py dataset.csv --label value1 value2 +``` + +## How to use your own dataset +1. Prepare a CSV file which contains the list of SMILES and the values you want to train. +The first line of the CSV file should be label names. +See `dataset.csv` as an example. + +2. Use `CSVFileParser` of Cheiner Chemistry to feed data to model. +See `train.csv` as an example. diff --git a/examples/own_dataset/dataset.csv b/examples/own_dataset/dataset.csv new file mode 100644 index 00000000..9bcc86fd --- /dev/null +++ b/examples/own_dataset/dataset.csv @@ -0,0 +1,101 @@ +SMILES,value1,value2 +CC1=CC2CC(CC1)O2,-0.227400004863739,0.010400000028312206 +O=Cc1nccn1C=O,-0.2678000032901764,-0.09380000084638596 +CCC(C)(C)C(O)C=O,-0.2685000002384186,-0.038100000470876694 +C#CCC(C)(CO)OC,-0.2535000145435333,0.044599998742341995 +Nc1coc(=O)nc1N,-0.2303999960422516,-0.04170000180602074 +CC12C=CC(CCC1)C2,-0.2312999963760376,0.02239999920129776 +CC12CCC1C2OC=O,-0.2605000138282776,0.005400000140070915 +CC1C2CC3(COC3)N12,-0.23430000245571136,0.0697999969124794 +O=C1NC=NC12CC2,-0.24070000648498535,-0.017000000923871994 +C1=CC2CN2CC2NC12,-0.22169999778270721,0.007699999958276749 +CC1C2COCC12O,-0.2467000037431717,0.07410000264644623 +CC(=O)C1OCOC1=O,-0.2590000033378601,-0.042500000447034836 +CC1N2C3CC1(C)C32,-0.2295999974012375,0.0835999995470047 +CC1=CC2OC2(C#N)C1,-0.25999999046325684,-0.019899999722838402 +OC1CCC1,-0.25600001215934753,0.08009999990463257 +C#CC1(O)COC1C#N,-0.2849000096321106,-0.01769999973475933 +CC1(C#N)CC12CCC2,-0.2685000002384186,0.03460000082850456 +CCCC(N)(C#N)CO,-0.25760000944137573,0.028999999165534973 +NC1=NC2(CC2)CC1=O,-0.22470000386238098,-0.053700000047683716 +C#CC12C3CC1(C)OC32,-0.2273000031709671,0.026900000870227814 +CC(C)C#CCC=O,-0.24539999663829803,-0.02669999934732914 +CC#CC(C=O)CC,-0.24169999361038208,-0.02539999969303608 +CC1OC2C1=CC1OC12,-0.2485000044107437,-0.01769999973475933 +CNC(=N)C(C#N)OC,-0.23420000076293945,-0.0013000000035390258 +C#CC(C#C)OCC=O,-0.26100000739097595,-0.031599998474121094 +CN1CC(O)C12CC2,-0.20479999482631683,0.08730000257492065 +OC1C2C3OC4C1C2C34,-0.24469999969005585,0.04230000078678131 +OCC1C(O)C2CC12O,-0.24169999361038208,0.05739999935030937 +O=C([O-])C12[NH2+]CC1C2O,-0.2508000135421753,-0.0003000000142492354 +Cn1cc(O)c(CO)n1,-0.2045000046491623,0.01850000023841858 +O=C1COC2C3OC2C13,-0.2498999983072281,-0.03700000047683716 +C1#CCCOC=NCC1,-0.24279999732971191,0.012600000016391277 +O=c1ocncc1CO,-0.2563000023365021,-0.06289999932050705 +CC1NC1C(O)C(N)=O,-0.2547999918460846,0.023800000548362732 +CC1OC(=N)CC2CC21,-0.2498999983072281,0.032499998807907104 +OC12CCC3CN3C1C2,-0.21709999442100525,0.07280000299215317 +C#CC(CCO)OC,-0.2581999897956848,0.033900000154972076 +CCC1COC(CO)=N1,-0.2540999948978424,0.019200000911951065 +ON=C1C=CC2C(O)C12,-0.2184000015258789,-0.04349999874830246 +CN=c1cconn1,-0.23919999599456787,-0.037700001150369644 +CC1(C)CC2CC2C1O,-0.2540999948978424,0.066600002348423 +CCC1CCC(=N)O1,-0.2526000142097473,0.032600000500679016 +O=C1C2CCC1C1NC21,-0.2282000035047531,-0.00279999990016222 +CCOc1ccc(C)o1,-0.19059999287128448,0.033799998462200165 +O=C1C2CC3C4C2C1N34,-0.23479999601840973,-0.026100000366568565 +O=C1C=CCC=CC1=O,-0.24130000174045563,-0.08780000358819962 +Cc1cc(F)c[nH]c1=O,-0.2117999941110611,-0.042100001126527786 +CC1=CCc2nocc21,-0.22419999539852142,-0.019200000911951065 +N#CC1(O)CN=COC1,-0.26980000734329224,-0.002400000113993883 +Nc1n[nH]cc1N1CC1,-0.18649999797344208,0.03739999979734421 +CN1C2CC3(O)C1C23C,-0.19619999825954437,0.07779999822378159 +N=c1nccco1,-0.23680000007152557,-0.0689999982714653 +COC12COC1(C)C2C,-0.22339999675750732,0.07020000368356705 +CCOC1COC(=N)O1,-0.2547000050544739,0.0560000017285347 +COC1(C(N)=O)CC1,-0.23800000548362732,0.0284000001847744 +C#CCC#CC1NC1C,-0.23970000445842743,0.03180000185966492 +C1NC1CN1C2CCC21,-0.2379000037908554,0.06539999693632126 +CC(O)c1cc(N)[nH]n1,-0.21449999511241913,0.029899999499320984 +CC1(O)C(O)C1C=O,-0.24230000376701355,-0.022099999710917473 +C#CC1(C)C2C3OC3C21,-0.23819999396800995,0.025800000876188278 +c1c[nH]c2cccc-2c1,-0.17229999601840973,-0.037300001829862595 +CCC1(O)C(C)C1C=O,-0.24089999496936798,-0.01810000091791153 +C1=C2C(CC1)CC1NC21,-0.2231999933719635,0.01940000057220459 +C#CC1C2C(O)C1C2O,-0.24420000612735748,0.041999999433755875 +CC1(C)CN2CC(C2)O1,-0.2093999981880188,0.07599999755620956 +CC1OC1C1C2CN1C2,-0.22990000247955322,0.08429999649524689 +CC(=O)C12CC(=O)C1C2,-0.25049999356269836,-0.04270000010728836 +CC12C3=NCC1CC2O3,-0.23119999468326569,-0.016599999740719795 +c1cc2onnc2[nH]1,-0.23520000278949738,-0.042399998754262924 +O=CCCC1OC2CC12,-0.24369999766349792,-0.01850000023841858 +OCCC1C2C3CC3N12,-0.2175000011920929,0.06040000170469284 +OCC#CC1CC1,-0.23720000684261322,0.03359999880194664 +OC1C2CC3C1N1C2C31,-0.22709999978542328,0.0640999972820282 +CC1(C=O)C=CC(=O)N1,-0.25369998812675476,-0.05649999901652336 +CC1CC23CC12CCO3,-0.20999999344348907,0.08139999955892563 +CC(O)(C(N)=O)C1CO1,-0.24469999969005585,0.02889999933540821 +CC1=NC2(CC2)C(=N)N1,-0.2134999930858612,0.0024999999441206455 +N#CCCC(=O)C(N)=O,-0.25949999690055847,-0.08160000294446945 +CC(O)(C#N)COC=N,-0.27379998564720154,0.00570000009611249 +CC12C=CC(C)(N1)C2O,-0.22859999537467957,-0.0012000000569969416 +CC12COC1CCO2,-0.2468000054359436,0.07940000295639038 +c1noc2c1CCOC2,-0.24819999933242798,-0.010700000450015068 +C#CC1CCCCOC1,-0.2467000037431717,0.053599998354911804 +CN1C2C3OC2(C=O)C31,-0.23469999432563782,-0.04619999974966049 +CCn1cc(O)nn1,-0.22519999742507935,0.0013000000035390258 +CCOC(=NC)C(C)=O,-0.23420000076293945,-0.05640000104904175 +CC12CC1(C#N)C1CC12,-0.26750001311302185,0.02070000022649765 +CC(=O)C1OC1CC=O,-0.251800000667572,-0.04360000044107437 +Nc1cc(=O)cno1,-0.23770000040531158,-0.053700000047683716 +O=C1CC=CCC1O,-0.25519999861717224,-0.027300000190734863 +CC1CC1CN1CC1C,-0.2190999984741211,0.08590000122785568 +C#CCC(=N)OC=O,-0.2750999927520752,-0.032999999821186066 +Cc1cnc(C=O)n1C,-0.23080000281333923,-0.053700000047683716 +N=COCC(C=O)CO,-0.26260000467300415,-0.043699998408555984 +CC1=C2CC3C(C1)C23C,-0.19580000638961792,-0.022700000554323196 +CC1C=CCC(C)C1O,-0.2443999946117401,0.019099999219179153 +C1c2n[nH]nc2C2CN12,-0.23350000381469727,-0.011800000444054604 +COC1(C#N)CCC1C,-0.27480000257492065,0.02250000089406967 +N=CNC(=O)C1CCO1,-0.25049999356269836,-0.020800000056624413 +O=CC1(O)COC1=O,-0.27869999408721924,-0.06939999759197235 diff --git a/examples/own_dataset/train.py b/examples/own_dataset/train.py new file mode 100644 index 00000000..405d1783 --- /dev/null +++ b/examples/own_dataset/train.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python + +from __future__ import print_function +import argparse +import sys + +from sklearn.preprocessing import StandardScaler + +try: + import matplotlib + matplotlib.use('Agg') +except ImportError: + pass + + +import chainer +from chainer import functions as F, cuda, Variable +from chainer import iterators as I +from chainer import links as L +from chainer import optimizers as O +from chainer.datasets import split_dataset_random +from chainer import training +from chainer.training import extensions as E +import numpy + +from chainer_chemistry.models import MLP, NFP, GGNN, SchNet, WeaveNet, RSGCN +from chainer_chemistry.dataset.converters import concat_mols +from chainer_chemistry.dataset.parsers import CSVFileParser +from chainer_chemistry.dataset.preprocessors import preprocess_method_dict +from chainer_chemistry.datasets import NumpyTupleDataset + +from rdkit import Chem + + +class GraphConvPredictor(chainer.Chain): + + def __init__(self, graph_conv, mlp=None): + """Initialize GraphConvPredictor + + Args: + graph_conv: graph convolution network to obtain molecule feature + representation + mlp: multi layer perceptron, used as final connected layer. + It can be `None` if no operation is necessary after + `graph_conv` calculation. + """ + + super(GraphConvPredictor, self).__init__() + with self.init_scope(): + self.graph_conv = graph_conv + if isinstance(mlp, chainer.Link): + self.mlp = mlp + if not isinstance(mlp, chainer.Link): + self.mlp = mlp + + def __call__(self, atoms, adjs): + x = self.graph_conv(atoms, adjs) + if self.mlp: + x = self.mlp(x) + return x + + +def main(): + # Supported preprocessing/network list + method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn'] + scale_list = ['standardize', 'none'] + + parser = argparse.ArgumentParser( + description='Regression with own dataset.') + parser.add_argument('datafile', type=str) + parser.add_argument('--method', '-m', type=str, choices=method_list, + default='nfp') + parser.add_argument('--label', '-l', nargs='+', + help='target label for regression') + parser.add_argument('--scale', type=str, choices=scale_list, + default='standardize', help='Label scaling method') + parser.add_argument('--conv-layers', '-c', type=int, default=4) + parser.add_argument('--batchsize', '-b', type=int, default=32) + parser.add_argument('--gpu', '-g', type=int, default=-1) + parser.add_argument('--out', '-o', type=str, default='result') + parser.add_argument('--epoch', '-e', type=int, default=20) + parser.add_argument('--unit-num', '-u', type=int, default=16) + parser.add_argument('--seed', '-s', type=int, default=777) + parser.add_argument('--train-data-ratio', '-t', type=float, default=0.7) + args = parser.parse_args() + + seed = args.seed + train_data_ratio = args.train_data_ratio + method = args.method + if args.label: + labels = args.label + class_num = len(labels) if isinstance(labels, list) else 1 + else: + sys.exit("Error: No target label is specified.") + + # Dataset preparation + dataset = None + + # Postprocess is required for regression task + def postprocess_label(label_list): + return numpy.asarray(label_list, dtype=numpy.float32) + + print('preprocessing dataset...') + preprocessor = preprocess_method_dict[method]() + parser = CSVFileParser(preprocessor, + postprocess_label=postprocess_label, + labels=labels, smiles_col='SMILES') + dataset = parser.parse(args.datafile)["dataset"] + + if args.scale == 'standardize': + # Standard Scaler for labels + ss = StandardScaler() + labels = ss.fit_transform(dataset.get_datasets()[-1]) + dataset = NumpyTupleDataset(*dataset.get_datasets()[:-1], labels) + + train_data_size = int(len(dataset) * train_data_ratio) + train, val = split_dataset_random(dataset, train_data_size, seed) + + # Network + n_unit = args.unit_num + conv_layers = args.conv_layers + if method == 'nfp': + print('Train NFP model...') + model = GraphConvPredictor(NFP(out_dim=n_unit, hidden_dim=n_unit, + n_layers=conv_layers), + MLP(out_dim=class_num, hidden_dim=n_unit)) + elif method == 'ggnn': + print('Train GGNN model...') + model = GraphConvPredictor(GGNN(out_dim=n_unit, hidden_dim=n_unit, + n_layers=conv_layers), + MLP(out_dim=class_num, hidden_dim=n_unit)) + elif method == 'schnet': + print('Train SchNet model...') + model = GraphConvPredictor( + SchNet(out_dim=class_num, hidden_dim=n_unit, n_layers=conv_layers), + None) + elif method == 'weavenet': + print('Train WeaveNet model...') + n_atom = 20 + n_sub_layer = 1 + weave_channels = [50] * conv_layers + model = GraphConvPredictor( + WeaveNet(weave_channels=weave_channels, hidden_dim=n_unit, + n_sub_layer=n_sub_layer, n_atom=n_atom), + MLP(out_dim=class_num, hidden_dim=n_unit)) + elif method == 'rsgcn': + print('Train RSGCN model...') + model = GraphConvPredictor( + RSGCN(out_dim=n_unit, hidden_dim=n_unit, n_layers=conv_layers), + MLP(out_dim=class_num, hidden_dim=n_unit)) + else: + raise ValueError('[ERROR] Invalid method {}'.format(method)) + + train_iter = I.SerialIterator(train, args.batchsize) + val_iter = I.SerialIterator(val, args.batchsize, + repeat=False, shuffle=False) + + def scaled_abs_error(x0, x1): + if isinstance(x0, Variable): + x0 = cuda.to_cpu(x0.data) + if isinstance(x1, Variable): + x1 = cuda.to_cpu(x1.data) + if args.scale == 'standardize': + scaled_x0 = ss.inverse_transform(cuda.to_cpu(x0)) + scaled_x1 = ss.inverse_transform(cuda.to_cpu(x1)) + diff = scaled_x0 - scaled_x1 + elif args.scale == 'none': + diff = cuda.to_cpu(x0) - cuda.to_cpu(x1) + return numpy.mean(numpy.absolute(diff), axis=0)[0] + + classifier = L.Classifier(model, lossfun=F.mean_squared_error, + accfun=scaled_abs_error) + + if args.gpu >= 0: + chainer.cuda.get_device_from_id(args.gpu).use() + classifier.to_gpu() + + optimizer = O.Adam() + optimizer.setup(classifier) + + updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, + converter=concat_mols) + trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) + trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu, + converter=concat_mols)) + trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) + trainer.extend(E.LogReport()) + trainer.extend(E.PrintReport(['epoch', 'main/loss', 'main/accuracy', + 'validation/main/loss', + 'validation/main/accuracy', + 'elapsed_time'])) + trainer.extend(E.ProgressBar()) + trainer.run() + + # Example of prediction using trained model + smiles = 'c1ccccc1' + mol = Chem.MolFromSmiles(smiles) + preprocessor = preprocess_method_dict[method]() + standardized_smiles, mol = preprocessor.prepare_smiles_and_mol(mol) + input_features = preprocessor.get_input_features(mol) + atoms, adjs = concat_mols([input_features]) + prediction = model(atoms, adjs).data[0] + print('Prediction for {}:'.format(smiles)) + for i, label in enumerate(args.label): + print('{}: {}'.format(label, prediction[i])) + + +if __name__ == '__main__': + main() From ddfc9cc1afb5ba02fcffaeb3695d370b301ff949 Mon Sep 17 00:00:00 2001 From: Naruki yoshikawa Date: Mon, 19 Mar 2018 11:57:09 +0900 Subject: [PATCH 2/5] Change following comments --- docs/source/tutorial.rst | 5 +++++ examples/own_dataset/README.md | 9 +++++++-- examples/own_dataset/train.py | 6 ++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 573f7ef7..51d2cdc7 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -190,6 +190,11 @@ You can also train other type models like GGNN, SchNet or WeaveNet, and other ta See output of ``python train_qm9.py --help``. +Using your own dataset +======================== +You can use your own dataset in Chainer Chemistry. +`example/own_dataset `_ shows an example code. + Reference ======================== [1] L. Ruddigkeit, R. van Deursen, L. C. Blum, J.-L. Reymond, Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17, J. Chem. Inf. Model. 52, 2864–2875, 2012. diff --git a/examples/own_dataset/README.md b/examples/own_dataset/README.md index 5f3f2390..447b0982 100644 --- a/examples/own_dataset/README.md +++ b/examples/own_dataset/README.md @@ -4,10 +4,15 @@ python train.py dataset.csv --label value1 value2 ``` -## How to use your own dataset +The `--label` option specifies which row in `dataset.csv` is trained. +Type `python train.py --help` to see complete options. + +## Procedure 1. Prepare a CSV file which contains the list of SMILES and the values you want to train. The first line of the CSV file should be label names. See `dataset.csv` as an example. +`dataset.csv` is made by sampling from the QM9 dataset. +`value1` is homo and `value2` is lumo. -2. Use `CSVFileParser` of Cheiner Chemistry to feed data to model. +2. Use [CSVFileParser](http://chainer-chemistry.readthedocs.io/en/stable/generated/chainer_chemistry.dataset.parsers.CSVFileParser.html) of Cheiner Chemistry to feed data to model. See `train.csv` as an example. diff --git a/examples/own_dataset/train.py b/examples/own_dataset/train.py index 405d1783..d26b7f02 100644 --- a/examples/own_dataset/train.py +++ b/examples/own_dataset/train.py @@ -22,13 +22,11 @@ from chainer import training from chainer.training import extensions as E import numpy - from chainer_chemistry.models import MLP, NFP, GGNN, SchNet, WeaveNet, RSGCN from chainer_chemistry.dataset.converters import concat_mols from chainer_chemistry.dataset.parsers import CSVFileParser from chainer_chemistry.dataset.preprocessors import preprocess_method_dict from chainer_chemistry.datasets import NumpyTupleDataset - from rdkit import Chem @@ -94,13 +92,12 @@ def main(): sys.exit("Error: No target label is specified.") # Dataset preparation - dataset = None # Postprocess is required for regression task def postprocess_label(label_list): return numpy.asarray(label_list, dtype=numpy.float32) - print('preprocessing dataset...') + print('Preprocessing dataset...') preprocessor = preprocess_method_dict[method]() parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label, @@ -185,6 +182,7 @@ def scaled_abs_error(x0, x1): converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) + # Note that scaled errors are reported as (validation/)main/accuracy trainer.extend(E.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', From 77da969fc17910c85db009cd6607fcdc9ab07cab Mon Sep 17 00:00:00 2001 From: Naruki yoshikawa Date: Tue, 20 Mar 2018 15:39:28 +0900 Subject: [PATCH 3/5] Add test to run example --- docs/source/tutorial.rst | 2 +- examples/example_test_cpu.sh | 5 +++++ examples/example_test_gpu.sh | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 51d2cdc7..a49b3499 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -193,7 +193,7 @@ See output of ``python train_qm9.py --help``. Using your own dataset ======================== You can use your own dataset in Chainer Chemistry. -`example/own_dataset `_ shows an example code. +`example/own_dataset `_ shows an example. Reference ======================== diff --git a/examples/example_test_cpu.sh b/examples/example_test_cpu.sh index 5e9a5591..b366960b 100644 --- a/examples/example_test_cpu.sh +++ b/examples/example_test_cpu.sh @@ -34,6 +34,11 @@ do python train_qm9.py --method ${method} --label A --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 --batchsize 32 python train_qm9.py --method ${method} --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 --batchsize 32 cd ../ + + # Own dataset + cd own_dataset + python train.py dataset.csv --method ${method} --label value1 --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 --batchsize 32 + cd ../ done cd tox21 diff --git a/examples/example_test_gpu.sh b/examples/example_test_gpu.sh index eb64314d..7ae35866 100644 --- a/examples/example_test_gpu.sh +++ b/examples/example_test_gpu.sh @@ -34,6 +34,11 @@ do python train_qm9.py --method ${method} --label A --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 python train_qm9.py --method ${method} --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 cd ../ + + # Own dataset + cd own_dataset + python train.py dataset.csv --method ${method} --label value1 --conv-layers 1 --gpu ${gpu} --epoch 1 --unit-num 10 + cd ../ done cd tox21 From 49b7a33bb3f728dbeffc1d1e5e2f503e030393ef Mon Sep 17 00:00:00 2001 From: Naruki yoshikawa Date: Tue, 27 Mar 2018 17:55:21 +0900 Subject: [PATCH 4/5] Support GPU --- examples/own_dataset/README.md | 2 +- examples/own_dataset/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/own_dataset/README.md b/examples/own_dataset/README.md index 447b0982..87b78fbd 100644 --- a/examples/own_dataset/README.md +++ b/examples/own_dataset/README.md @@ -4,7 +4,7 @@ python train.py dataset.csv --label value1 value2 ``` -The `--label` option specifies which row in `dataset.csv` is trained. +The `--label` option specifies which columns in `dataset.csv` are trained. Type `python train.py --help` to see complete options. ## Procedure diff --git a/examples/own_dataset/train.py b/examples/own_dataset/train.py index d26b7f02..c9d64022 100644 --- a/examples/own_dataset/train.py +++ b/examples/own_dataset/train.py @@ -196,7 +196,7 @@ def scaled_abs_error(x0, x1): preprocessor = preprocess_method_dict[method]() standardized_smiles, mol = preprocessor.prepare_smiles_and_mol(mol) input_features = preprocessor.get_input_features(mol) - atoms, adjs = concat_mols([input_features]) + atoms, adjs = concat_mols([input_features], device=args.gpu) prediction = model(atoms, adjs).data[0] print('Prediction for {}:'.format(smiles)) for i, label in enumerate(args.label): From 5fba8028ecddb933ae17053687ea65910f619338 Mon Sep 17 00:00:00 2001 From: Naruki yoshikawa Date: Wed, 28 Mar 2018 19:31:10 +0900 Subject: [PATCH 5/5] Change import order --- examples/own_dataset/README.md | 4 ++-- examples/own_dataset/train.py | 22 +++++++++------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/examples/own_dataset/README.md b/examples/own_dataset/README.md index 87b78fbd..0610965d 100644 --- a/examples/own_dataset/README.md +++ b/examples/own_dataset/README.md @@ -8,11 +8,11 @@ The `--label` option specifies which columns in `dataset.csv` are trained. Type `python train.py --help` to see complete options. ## Procedure -1. Prepare a CSV file which contains the list of SMILES and the values you want to train. +1. Prepare a CSV file which contains a list of SMILES and values you want to train. The first line of the CSV file should be label names. See `dataset.csv` as an example. `dataset.csv` is made by sampling from the QM9 dataset. -`value1` is homo and `value2` is lumo. +`value1` is HOMO and `value2` is LUMO. 2. Use [CSVFileParser](http://chainer-chemistry.readthedocs.io/en/stable/generated/chainer_chemistry.dataset.parsers.CSVFileParser.html) of Cheiner Chemistry to feed data to model. See `train.csv` as an example. diff --git a/examples/own_dataset/train.py b/examples/own_dataset/train.py index c9d64022..69ff9c4d 100644 --- a/examples/own_dataset/train.py +++ b/examples/own_dataset/train.py @@ -4,30 +4,27 @@ import argparse import sys -from sklearn.preprocessing import StandardScaler - -try: - import matplotlib - matplotlib.use('Agg') -except ImportError: - pass - - import chainer +from chainer.datasets import split_dataset_random from chainer import functions as F, cuda, Variable from chainer import iterators as I from chainer import links as L from chainer import optimizers as O -from chainer.datasets import split_dataset_random from chainer import training from chainer.training import extensions as E -import numpy -from chainer_chemistry.models import MLP, NFP, GGNN, SchNet, WeaveNet, RSGCN from chainer_chemistry.dataset.converters import concat_mols from chainer_chemistry.dataset.parsers import CSVFileParser from chainer_chemistry.dataset.preprocessors import preprocess_method_dict from chainer_chemistry.datasets import NumpyTupleDataset +from chainer_chemistry.models import MLP, NFP, GGNN, SchNet, WeaveNet, RSGCN +try: + import matplotlib + matplotlib.use('Agg') +except ImportError: + pass +import numpy from rdkit import Chem +from sklearn.preprocessing import StandardScaler class GraphConvPredictor(chainer.Chain): @@ -92,7 +89,6 @@ def main(): sys.exit("Error: No target label is specified.") # Dataset preparation - # Postprocess is required for regression task def postprocess_label(label_list): return numpy.asarray(label_list, dtype=numpy.float32)