In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import pickle
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from spacy.lang.en import English

2022-01-13 19:21:57.644306: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [4]:
vectorizer = layers.experimental.preprocessing.TextVectorization(max_tokens=68000,
                                                                 output_sequence_length=55) # 95% sentences contain 55 words as seen in data analysis.

vectorizer_char = layers.experimental.preprocessing.TextVectorization(max_tokens =60,
                                    output_sequence_length = 300, # 95% of the sentences have ~300 chars
                                    name = 'Character_vectorizer')

In [5]:
# Pretrained Embedding layer
embedding_layer = layers.Embedding(
    64843,
    300,
    trainable=False,
    name = "Pre_trained"
)

# Character Embeddings layer 
char_layer = layers.Embedding(input_dim = 28,
                             output_dim = 30,
                             name="char_layer")

In [6]:
# Custom Attention layer
class attention(layers.Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)

    def call(self,intput_emb):
        et=tf.keras.backend.squeeze(tf.keras.backend.tanh(tf.keras.backend.dot(intput_emb,self.W)+self.b),axis=-1)
        at=tf.keras.backend.softmax(et)
        at=tf.keras.backend.expand_dims(at,axis=-1)
        return intput_emb*at
    
# THE MODEL:

# Word Embeddings Model
sent_inputs = layers.Input(shape=[], dtype=tf.string)
sent_vec = vectorizer(sent_inputs)
word_embeddings = embedding_layer(sent_vec)
word_layer_2= layers.Bidirectional(layers.LSTM(128, return_sequences = True))(word_embeddings)
attention_layer=attention()(word_layer_2)
word_model = tf.keras.Model(inputs=sent_inputs,
                            outputs=attention_layer)

# Character Embeddings Model
char_inputs = layers.Input(shape=[], dtype=tf.string)
char_vectorizer = vectorizer_char(char_inputs)
char_embeddings = char_layer(char_vectorizer)
char_layer_1= layers.Bidirectional(layers.LSTM(128, return_sequences=True))(char_embeddings) 
char_model = tf.keras.Model(inputs=char_inputs,
                          outputs=char_layer_1)

# Position model
position_inputs = layers.Input(shape=(460,), dtype = tf.int64)
pos_dense = layers.Dense(64, activation = 'relu')(position_inputs)
pos_model = tf.keras.Model(position_inputs, pos_dense)

word_char_layer = layers.Concatenate(axis =1)([attention_layer,
                                        char_layer_1])

word_char_lstm = layers.Bidirectional(layers.LSTM(128))(word_char_layer)
word_char_dropout = layers.Dropout(0.5)(word_char_lstm)

hybrid_layer = layers.Concatenate(name="word_char_pos")([word_char_dropout,
                                                        pos_model.output])

output = layers.Dense(5, activation = 'softmax')(hybrid_layer)
model = tf.keras.Model(inputs = [word_model.input,
                                 char_model.input,
                                 pos_model.input],
                       outputs =  output)

model.compile(loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing= 0.3),
                optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                metrics = ['accuracy'])

In [7]:
model.load_weights("Model")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f53a01b6a90>

In [2]:
from model import transformer

In [3]:
model = transformer()

2022-01-13 19:22:21.109493: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-01-13 19:22:21.110287: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-01-13 19:22:21.214805: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-13 19:22:21.215401: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1050 computeCapability: 6.1
coreClock: 1.493GHz coreCount: 5 deviceMemorySize: 3.95GiB deviceMemoryBandwidth: 104.43GiB/s
2022-01-13 19:22:21.215458: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-01-13 19:22:21.220541: I tensorflow/stream_executor/platform/def

In [4]:
import joblib
one_hot = joblib.load('one_hot.joblib')

In [5]:
from nltk import sent_tokenize

In [6]:
import pickle
#one_hot = OneHotEncoder()
def classify(data, model):
    classes = ["BACKGROUND", "CONCLUSIONS", "METHODS", "OBJECTIVE", "RESULTS"]
    
    data = sent_tokenize(data)
    
    abstracts = [] # To store the dictonaries

    for line_no, abst_lines in enumerate(data):
        each_line = {} 
        each_line['position'] = str(line_no+1) +"_of_"+ str(len(data))
        each_line["text"] = abst_lines # to get the text of sentence in convert to lower
        abstracts.append(each_line) # add dictionary to list of abstracts.
    # reset the sample lines for next abstract.
    def split(text):
        return ' '.join(list(text))
    abstract = pd.DataFrame(abstracts)  
    
    abs_sent = abstract.text
    abs_char = abstract.text.apply(split)
    abs_pos = one_hot.transform(np.expand_dims(abstract.position, axis = 1)).toarray()
    
    #abs_sent= vectorizer(abs_sent)
    #abs_char = vectorizer_char(abs_char)
    
    abs_pred_probs = model.predict(x = (abs_sent,
                                    abs_char,
                                    abs_pos))
    
    abs_preds = tf.argmax(abs_pred_probs, axis=1)
    abs_pred_classes = [classes[i] for i in abs_preds]
    
    for i , line in enumerate(data):
        print(abs_pred_classes[i],": ")
        print(line, "\n")

In [7]:
abstract_1 = " We aimed to establish an acute treatment protocol to increase serum vitamin D, evaluate the effectiveness of vitamin D3 supplementation, and reveal the potential mechanisms in COVID-19.  We retrospectively analyzed the data of 867 COVID-19 cases. Then, a prospective study was conducted, including 23 healthy individuals and 210 cases. A total of 163 cases had vitamin D supplementation, and 95 were followed for 14 days. Clinical outcomes, routine blood biomarkers, serum levels of vitamin D metabolism, and action mechanism-related parameters were evaluated.  Our treatment protocol increased the serum 25OHD levels significantly to above 30 ng/mL within two weeks. COVID-19 cases (no comorbidities, no vitamin D treatment, 25OHD <30 ng/mL) had 1.9-fold increased risk of having hospitalization longer than 8 days compared with the cases with comorbidities and vitamin D treatment. Having vitamin D treatment decreased the mortality rate by 2.14 times. The correlation analysis of specific serum biomarkers with 25OHD indicated that the vitamin D action in COVID-19 might involve regulation of INOS1, IL1B, IFNg, cathelicidin-LL37, and ICAM1.  Vitamin D treatment shortened hospital stay and decreased mortality in COVID-19 cases, even in the existence of comorbidities. Vitamin D supplementation is effective on various target parameters; therefore, it is essential for COVID-19 treatment. "


In [8]:
classify(abstract_1, model)

2022-01-13 19:22:30.426745: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-01-13 19:22:30.453947: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2499950000 Hz
2022-01-13 19:22:31.907973: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-01-13 19:22:32.066997: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


BACKGROUND : 
 We aimed to establish an acute treatment protocol to increase serum vitamin D, evaluate the effectiveness of vitamin D3 supplementation, and reveal the potential mechanisms in COVID-19. 

METHODS : 
We retrospectively analyzed the data of 867 COVID-19 cases. 

METHODS : 
Then, a prospective study was conducted, including 23 healthy individuals and 210 cases. 

RESULTS : 
A total of 163 cases had vitamin D supplementation, and 95 were followed for 14 days. 

METHODS : 
Clinical outcomes, routine blood biomarkers, serum levels of vitamin D metabolism, and action mechanism-related parameters were evaluated. 

RESULTS : 
Our treatment protocol increased the serum 25OHD levels significantly to above 30 ng/mL within two weeks. 

RESULTS : 
COVID-19 cases (no comorbidities, no vitamin D treatment, 25OHD <30 ng/mL) had 1.9-fold increased risk of having hospitalization longer than 8 days compared with the cases with comorbidities and vitamin D treatment. 

RESULTS : 
Having vitam