### SageMaker self supervised prediction

In [1]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


In [2]:
version_tag="202209180127"
pytorch_custom_image_name=f"large-scale-ptm-ppi:gpu-{version_tag}"
instance_type = "ml.g4dn.2xlarge"  #ml.g4dn.2xlarge
instance_count = 3



In [3]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

In [4]:
bucket = "aegovan-data"

In [5]:

abstract_trainfile = "s3://{}/self-supervised/train.json".format(bucket)
abstract_testfile= "s3://{}/self-supervised/test.json".format(bucket)
abstract_valfile="s3://{}/self-supervised/val.json".format(bucket)

abstract_largescale = "s3://{}/chemprotlargescale/input/data_2022080620".format(bucket)

eval_file = abstract_largescale

In [6]:
import datetime
date_fmt = datetime.datetime.today().strftime("%Y%m%d%H")

In [7]:
training_job="selfsupervised-bert-f1-2022-09-17-23-42-42-470"

In [8]:
s3_model_path = f"s3://aegovan-data/selfsupervised_results/{training_job}/output/model.tar.gz"


### Run  prediction

In [9]:
#s3_output_predictions = "s3://aegovan-data/pubmed_asbtract/predictions_largescale_{}_{}/".format(job_prefix,date_fmt)
s3_output_predictions = "s3://aegovan-data/selfsupervised_chemprot/predictions_{}/{}".format(training_job,date_fmt)
s3_input_data = eval_file
s3_data_type="S3Prefix"
usefilter=0
filter_threshold_std=1.0

s3_input_models = s3_model_path
s3_input_vocab = "s3://{}/embeddings/bert/".format(bucket)

In [10]:
s3_input_data, s3_data_type

('s3://aegovan-data/chemprotlargescale/input/data_2022080620', 'S3Prefix')

In [11]:
from sagemaker.network import NetworkConfig
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.processing import ScriptProcessor

script_processor = ScriptProcessor(image_uri=docker_repo,
                                       command=["python"],
                                       env={'mode': 'python', 'PYTHONPATH':'/opt/ml/code'},
                                       role=role,
                                       instance_type=instance_type,
                                       instance_count=instance_count,
                                       max_runtime_in_seconds= 5 * 24 * 60 * 60,
                                       volume_size_in_gb = 200,
                                       network_config=NetworkConfig(enable_network_isolation=False),
                                       base_job_name ="selfsupervised-inference"
                                       )


sm_local_input_models = "/opt/ml/processing/input/data/models"
sm_local_input_data = "/opt/ml/processing/input/data/jsondata"
sm_local_input_vocab = "/opt/ml/processing/input/data/vocab"


sm_local_output = "/opt/ml/processing/output"

input_file_name = s3_input_data.split("/")[-1]

script_processor.run(
        code='../src/inference/chemprot_selfsupervised_batch_predict.py',

        arguments=[
            sm_local_input_data,
            sm_local_input_models,
            sm_local_output,
            "--ensemble", "0",
            "--tokenisor_data_dir", sm_local_input_vocab,           
            "--filter", str(usefilter),
            "--batch", "32",
            "--filterstdthreshold", str(filter_threshold_std)
        ],

        inputs=[
                ProcessingInput(
                    source=s3_input_data,
                    s3_data_type = s3_data_type,
                    destination=sm_local_input_data,
                    s3_data_distribution_type="ShardedByS3Key"),

            ProcessingInput(
                    source=s3_input_models,
                    destination=sm_local_input_models,
                    s3_data_distribution_type="FullyReplicated"),
            
            ProcessingInput(
                    source=s3_input_vocab,
                    destination=sm_local_input_vocab,
                    s3_data_distribution_type="FullyReplicated")
            ],


        outputs=[ProcessingOutput(
                source=sm_local_output, 
                destination=s3_output_predictions,
                output_name='predictions')]
    )





Job Name:  selfsupervised-inference-2022-09-18-02-08-50-050
Inputs:  [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://aegovan-data/chemprotlargescale/input/data_2022080620', 'LocalPath': '/opt/ml/processing/input/data/jsondata', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'ShardedByS3Key', 'S3CompressionType': 'None'}}, {'InputName': 'input-2', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://aegovan-data/selfsupervised_results/selfsupervised-bert-f1-2022-09-17-23-42-42-470/output/model.tar.gz', 'LocalPath': '/opt/ml/processing/input/data/models', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-3', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://aegovan-data/embeddings/bert/', 'LocalPath': '/opt/ml/processing/input/data/vocab', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3Compression

[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving key model_fine_tune with default 0, found 0[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving model complete[0m
[35m2022-09-18 02:14:35,405 - dataset_builder - INFO - Retrieving Tokeniser[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving Tokeniser[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving key tokenisor_max_seq_len with default 512, found 512[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving key tokenisor_lower_case with default 0, found 0[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving key pretrained_model with default bert-base-cased, found /opt/ml/input/data/PRETRAINED_MODEL[0m
[35m2022-09-18 02:14:35,405 - models.bert_model_factory - INFO - Retrieving key tokenisor_data_dir with default /opt/ml/input/data/PRETRAINED_MODEL, found /opt/ml/proc

[32m2022-09-18 03:05:52,172 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[32m2022-09-18 03:05:52,283 - inference.predictor - INFO - Using device cuda:0[0m
[35m2022-09-18 03:05:52,657 - inference.batch_predict - INFO - Records to write: 91621[0m
[35m2022-09-18 03:05:52,657 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0649.json.rel.json.json[0m
[35m2022-09-18 03:05:55,052 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0649.json.rel.json[0m
[35m2022-09-18 03:05:55,151 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0616.json.rel.json with output in /opt/ml/processing/output/pubmed19n0616.json.rel.json.json[0m
[35m2022-09-18 03:05:55,152 - inference.batch_predict - INFO - Processing data file /opt/ml/processing/input/data/jsondata/pubmed19n0616.json.rel.json[0m
[35m2022-09-18 03:05:55,155 - inference.b

[35m2022-09-18 03:59:26,604 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[35m2022-09-18 03:59:26,715 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 04:00:20,014 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 04:00:20,017 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 04:00:20,048 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 04:00:20,265 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 04:00:29,632 - inference.batch_predict - INFO - Records to write: 98385[0m
[34m2022-09-18 04:00:29,632 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0667.json.rel.json.json[0m
[34m2022-09-18 04:00:32,307 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0667.json.rel.json[0m
[34m2022-09-18 04:00:32,394 - inference.b

[35m2022-09-18 04:50:35,924 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 04:50:35,927 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 04:50:35,954 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 04:50:36,164 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 04:50:45,041 - inference.batch_predict - INFO - Records to write: 93664[0m
[35m2022-09-18 04:50:45,041 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0662.json.rel.json.json[0m
[35m2022-09-18 04:50:47,510 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0662.json.rel.json[0m
[35m2022-09-18 04:50:47,581 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0625.json.rel.json with output in /opt/ml/processing/output/pubmed19n0625.json.rel.jso

[32m2022-09-18 05:27:29,999 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[32m2022-09-18 05:27:30,109 - inference.predictor - INFO - Using device cuda:0[0m
[35m2022-09-18 05:33:05,066 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 05:33:05,069 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 05:33:05,092 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 05:33:05,265 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 05:33:12,515 - inference.batch_predict - INFO - Records to write: 78899[0m
[35m2022-09-18 05:33:12,515 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0625.json.rel.json.json[0m
[35m2022-09-18 05:33:14,578 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0625.json.rel.json[0m
[35m2022-09-18 05:33:14,638 - inference.b

[35m2022-09-18 06:26:55,491 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 06:26:55,494 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 06:26:55,526 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 06:26:55,738 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 06:27:04,769 - inference.batch_predict - INFO - Records to write: 95860[0m
[35m2022-09-18 06:27:04,770 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0631.json.rel.json.json[0m
[35m2022-09-18 06:27:07,277 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0631.json.rel.json[0m
[35m2022-09-18 06:27:07,345 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0665.json.rel.json with output in /opt/ml/processing/output/pubmed19n0665.json.rel.jso

[34m2022-09-18 07:57:59,692 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 07:57:59,695 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 07:57:59,734 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 07:58:00,008 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 07:58:12,110 - inference.batch_predict - INFO - Records to write: 122681[0m
[34m2022-09-18 07:58:12,110 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0615.json.rel.json.json[0m
[34m2022-09-18 07:58:15,379 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0615.json.rel.json[0m
[34m2022-09-18 07:58:15,453 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0636.json.rel.json with output in /opt/ml/processing/output/pubmed19n0636.json.rel.js

[35m2022-09-18 08:11:23,288 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[35m2022-09-18 08:11:23,397 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 08:54:16,168 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 08:54:16,171 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 08:54:16,203 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 08:54:16,429 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 08:54:25,720 - inference.batch_predict - INFO - Records to write: 100757[0m
[34m2022-09-18 08:54:25,720 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0636.json.rel.json.json[0m
[34m2022-09-18 08:54:28,367 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0636.json.rel.json[0m
[34m2022-09-18 08:54:28,434 - inference.

[34m2022-09-18 09:03:39,992 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[34m2022-09-18 09:03:40,102 - inference.predictor - INFO - Using device cuda:0[0m
[32m2022-09-18 09:09:31,255 - inference.predictor - INFO - Completed inference cuda:0[0m
[32m2022-09-18 09:09:31,258 - inference.ensemble_predictor - INFO - Computing average [0m
[32m2022-09-18 09:09:31,293 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[32m2022-09-18 09:09:31,531 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[32m2022-09-18 09:09:42,032 - inference.batch_predict - INFO - Records to write: 107956[0m
[32m2022-09-18 09:09:42,033 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0605.json.rel.json.json[0m
[32m2022-09-18 09:09:44,834 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0605.json.rel.json[0m
[32m2022-09-18 09:09:44,905 - inference.

[34m2022-09-18 09:23:42,310 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[34m2022-09-18 09:23:42,419 - inference.predictor - INFO - Using device cuda:0[0m
[35m2022-09-18 09:50:40,416 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 09:50:40,419 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 09:50:40,449 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 09:50:40,663 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 09:50:49,299 - inference.batch_predict - INFO - Records to write: 90416[0m
[35m2022-09-18 09:50:49,299 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0659.json.rel.json.json[0m
[35m2022-09-18 09:50:51,681 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0659.json.rel.json[0m
[35m2022-09-18 09:50:51,745 - inference.b

[32m2022-09-18 10:22:36,659 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[32m2022-09-18 10:22:36,770 - inference.predictor - INFO - Using device cuda:0[0m
[35m2022-09-18 10:39:35,566 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 10:39:35,570 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 10:39:35,595 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 10:39:35,799 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 10:39:44,698 - inference.batch_predict - INFO - Records to write: 92170[0m
[35m2022-09-18 10:39:44,698 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0687.json.rel.json.json[0m
[35m2022-09-18 10:39:47,211 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0687.json.rel.json[0m
[35m2022-09-18 10:39:47,296 - inference.b

[35m2022-09-18 11:28:30,331 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 11:28:30,334 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 11:28:30,363 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 11:28:30,562 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 11:28:38,805 - inference.batch_predict - INFO - Records to write: 87657[0m
[35m2022-09-18 11:28:38,805 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0678.json.rel.json.json[0m
[35m2022-09-18 11:28:41,209 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0678.json.rel.json[0m
[35m2022-09-18 11:28:41,298 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0607.json.rel.json with output in /opt/ml/processing/output/pubmed19n0607.json.rel.jso

[35m2022-09-18 12:05:41,177 - models.bert_model_factory - INFO - Retrieving key model_fine_tune with default 0, found 0[0m
[35m2022-09-18 12:05:41,177 - models.bert_model_factory - INFO - Retrieving model complete[0m
[35m2022-09-18 12:05:41,177 - dataset_builder - INFO - Retrieving Tokeniser[0m
[35m2022-09-18 12:05:41,177 - models.bert_model_factory - INFO - Retrieving Tokeniser[0m
[35m2022-09-18 12:05:41,178 - models.bert_model_factory - INFO - Retrieving key tokenisor_max_seq_len with default 512, found 512[0m
[35m2022-09-18 12:05:41,178 - models.bert_model_factory - INFO - Retrieving key tokenisor_lower_case with default 0, found 0[0m
[35m2022-09-18 12:05:41,178 - models.bert_model_factory - INFO - Retrieving key pretrained_model with default bert-base-cased, found /opt/ml/input/data/PRETRAINED_MODEL[0m
[35m2022-09-18 12:05:41,178 - models.bert_model_factory - INFO - Retrieving key tokenisor_data_dir with default /opt/ml/input/data/PRETRAINED_MODEL, found /opt/ml/proc

[32m2022-09-18 12:50:43,219 - inference.predictor - INFO - Completed inference cuda:0[0m
[32m2022-09-18 12:50:43,221 - inference.ensemble_predictor - INFO - Computing average [0m
[32m2022-09-18 12:50:43,244 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[32m2022-09-18 12:50:43,409 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[32m2022-09-18 12:50:50,379 - inference.batch_predict - INFO - Records to write: 75468[0m
[32m2022-09-18 12:50:50,380 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0694.json.rel.json.json[0m
[32m2022-09-18 12:50:52,418 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0694.json.rel.json[0m
[32m2022-09-18 12:50:52,493 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0685.json.rel.json with output in /opt/ml/processing/output/pubmed19n0685.json.rel.jso

[34m2022-09-18 13:31:38,307 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0639.json.rel.json[0m
[34m2022-09-18 13:31:38,371 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0686.json.rel.json with output in /opt/ml/processing/output/pubmed19n0686.json.rel.json.json[0m
[34m2022-09-18 13:31:38,373 - inference.batch_predict - INFO - Processing data file /opt/ml/processing/input/data/jsondata/pubmed19n0686.json.rel.json[0m
[34m2022-09-18 13:31:38,378 - inference.batch_predict - INFO - Using args :{'datasetfactory': 'datasets.chemprot_selfsupervised_dataset_factory.ChemprotSelfsupervisedDatasetFactory', 'traindir': '/opt/ml/input/data/train', 'valdir': '/opt/ml/input/data/val', 'testdir': None, 'modelfactory': 'models.bert_model_factory.BertModelFactory', 'pretrained_model_dir': '/opt/ml/input/data/PRETRAINED_MODEL', 'kfoldtrainprefix': None, 'outdir': '/opt/ml/output/data'

[35m2022-09-18 13:48:02,471 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[35m2022-09-18 13:48:02,580 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 14:22:42,915 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 14:22:42,918 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 14:22:42,944 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 14:22:43,149 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 14:22:51,889 - inference.batch_predict - INFO - Records to write: 91131[0m
[34m2022-09-18 14:22:51,889 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0686.json.rel.json.json[0m
[34m2022-09-18 14:22:54,419 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0686.json.rel.json[0m
[34m2022-09-18 14:22:54,505 - inference.b

[32m2022-09-18 14:39:49,725 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0650.json.rel.json[0m
[32m2022-09-18 14:39:49,796 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0623.json.rel.json with output in /opt/ml/processing/output/pubmed19n0623.json.rel.json.json[0m
[32m2022-09-18 14:39:49,798 - inference.batch_predict - INFO - Processing data file /opt/ml/processing/input/data/jsondata/pubmed19n0623.json.rel.json[0m
[32m2022-09-18 14:39:49,801 - inference.batch_predict - INFO - Using args :{'datasetfactory': 'datasets.chemprot_selfsupervised_dataset_factory.ChemprotSelfsupervisedDatasetFactory', 'traindir': '/opt/ml/input/data/train', 'valdir': '/opt/ml/input/data/val', 'testdir': None, 'modelfactory': 'models.bert_model_factory.BertModelFactory', 'pretrained_model_dir': '/opt/ml/input/data/PRETRAINED_MODEL', 'kfoldtrainprefix': None, 'outdir': '/opt/ml/output/data'

[35m2022-09-18 15:34:20,482 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 15:34:20,486 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 15:34:20,516 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 15:34:20,744 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 15:34:30,549 - inference.batch_predict - INFO - Records to write: 103521[0m
[35m2022-09-18 15:34:30,549 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0604.json.rel.json.json[0m
[35m2022-09-18 15:34:33,263 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0604.json.rel.json[0m
[35m2022-09-18 15:34:33,331 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0656.json.rel.json with output in /opt/ml/processing/output/pubmed19n0656.json.rel.js

[32m2022-09-18 16:01:55,347 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[32m2022-09-18 16:01:55,456 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 16:11:48,190 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 16:11:48,193 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 16:11:48,223 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 16:11:48,437 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 16:11:57,755 - inference.batch_predict - INFO - Records to write: 94490[0m
[34m2022-09-18 16:11:57,756 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0651.json.rel.json.json[0m
[34m2022-09-18 16:12:00,312 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0651.json.rel.json[0m
[34m2022-09-18 16:12:00,379 - inference.b

[34m2022-09-18 17:06:39,320 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 17:06:39,322 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 17:06:39,354 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 17:06:39,573 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 17:06:48,702 - inference.batch_predict - INFO - Records to write: 97370[0m
[34m2022-09-18 17:06:48,702 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0664.json.rel.json.json[0m
[34m2022-09-18 17:06:51,300 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0664.json.rel.json[0m
[34m2022-09-18 17:06:51,371 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0600.json.rel.json with output in /opt/ml/processing/output/pubmed19n0600.json.rel.jso

[35m2022-09-18 17:58:15,165 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[35m2022-09-18 17:58:15,275 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 18:04:05,143 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 18:04:05,147 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 18:04:05,174 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 18:04:05,402 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 18:04:15,232 - inference.batch_predict - INFO - Records to write: 102010[0m
[34m2022-09-18 18:04:15,232 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0600.json.rel.json.json[0m
[34m2022-09-18 18:04:17,965 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0600.json.rel.json[0m
[34m2022-09-18 18:04:18,031 - inference.

[35m2022-09-18 18:58:19,941 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[35m2022-09-18 18:58:20,051 - inference.predictor - INFO - Using device cuda:0[0m
[34m2022-09-18 19:02:39,631 - inference.predictor - INFO - Completed inference cuda:0[0m
[34m2022-09-18 19:02:39,634 - inference.ensemble_predictor - INFO - Computing average [0m
[34m2022-09-18 19:02:39,663 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[34m2022-09-18 19:02:39,898 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[34m2022-09-18 19:02:49,758 - inference.batch_predict - INFO - Records to write: 102971[0m
[34m2022-09-18 19:02:49,758 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0658.json.rel.json.json[0m
[34m2022-09-18 19:02:52,518 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0658.json.rel.json[0m
[34m2022-09-18 19:02:52,587 - inference.

[35m2022-09-18 20:00:22,177 - inference.predictor - INFO - Completed inference cuda:0[0m
[35m2022-09-18 20:00:22,180 - inference.ensemble_predictor - INFO - Computing average [0m
[35m2022-09-18 20:00:22,213 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[35m2022-09-18 20:00:22,461 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[35m2022-09-18 20:00:33,251 - inference.batch_predict - INFO - Records to write: 111266[0m
[35m2022-09-18 20:00:33,251 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0628.json.rel.json.json[0m
[35m2022-09-18 20:00:36,179 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0628.json.rel.json[0m
[35m2022-09-18 20:00:36,249 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0672.json.rel.json with output in /opt/ml/processing/output/pubmed19n0672.json.rel.js

[32m2022-09-18 20:30:53,982 - inference.predictor - INFO - Completed inference cuda:0[0m
[32m2022-09-18 20:30:53,983 - inference.ensemble_predictor - INFO - Computing average [0m
[32m2022-09-18 20:30:53,988 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[32m2022-09-18 20:30:54,030 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[32m2022-09-18 20:30:55,650 - inference.batch_predict - INFO - Records to write: 18178[0m
[32m2022-09-18 20:30:55,650 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0669.json.rel.json.json[0m
[32m2022-09-18 20:30:56,154 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0669.json.rel.json[0m
[32m2022-09-18 20:30:56,182 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0663.json.rel.json with output in /opt/ml/processing/output/pubmed19n0663.json.rel.jso

[34m2022-09-18 21:05:45,699 - inference.ensemble_predictor - INFO - Using devices ['cuda:0'][0m
[34m2022-09-18 21:05:45,808 - inference.predictor - INFO - Using device cuda:0[0m
[32m2022-09-18 21:24:59,629 - inference.predictor - INFO - Completed inference cuda:0[0m
[32m2022-09-18 21:24:59,632 - inference.ensemble_predictor - INFO - Computing average [0m
[32m2022-09-18 21:24:59,660 - inference.ensemble_predictor - INFO - Computing ensemble prediction [0m
[32m2022-09-18 21:24:59,877 - inference.ensemble_predictor - INFO - Completed ensemble prediction [0m
[32m2022-09-18 21:25:09,307 - inference.batch_predict - INFO - Records to write: 96957[0m
[32m2022-09-18 21:25:09,308 - inference.batch_predict - INFO - Writing to file /opt/ml/processing/output/pubmed19n0663.json.rel.json.json[0m
[32m2022-09-18 21:25:11,894 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0663.json.rel.json[0m
[32m2022-09-18 21:25:11,964 - inference.b

[34m2022-09-18 21:56:18,889 - inference.batch_predict - INFO - Completed file /opt/ml/processing/input/data/jsondata/pubmed19n0630.json.rel.json[0m
[34m2022-09-18 21:56:18,950 - inference.batch_predict - INFO - Running inference on file /opt/ml/processing/input/data/jsondata/pubmed19n0621.json.rel.json with output in /opt/ml/processing/output/pubmed19n0621.json.rel.json.json[0m
[34m2022-09-18 21:56:18,952 - inference.batch_predict - INFO - Processing data file /opt/ml/processing/input/data/jsondata/pubmed19n0621.json.rel.json[0m
[34m2022-09-18 21:56:18,957 - inference.batch_predict - INFO - Using args :{'datasetfactory': 'datasets.chemprot_selfsupervised_dataset_factory.ChemprotSelfsupervisedDatasetFactory', 'traindir': '/opt/ml/input/data/train', 'valdir': '/opt/ml/input/data/val', 'testdir': None, 'modelfactory': 'models.bert_model_factory.BertModelFactory', 'pretrained_model_dir': '/opt/ml/input/data/PRETRAINED_MODEL', 'kfoldtrainprefix': None, 'outdir': '/opt/ml/output/data'

KeyboardInterrupt: 