In [16]:
from keras.layers import Input, Embedding, Conv1D, LSTM, Dense, Bidirectional, GRU
from keras.models import Model, Sequential
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import json
import numpy as np
import nltk
import copy
nltk.download('maxent_ne_chunker')
nltk.download('words')
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.chunk import conlltags2tree, tree2conlltags
from sklearn.metrics import classification_report

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\GIGABYTE\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\GIGABYTE\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


In [4]:
# Load JSON

def load_data_from_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = [json.loads(line) for line in file]

    words = [entry["words"] for entry in data]
    tags = [entry["tags"] for entry in data]

    return words, tags


train_words, train_tags = load_data_from_json('PhoNER_COVID19-main-BIO/PhoNER_COVID19-main/data/syllable/train_syllable.json')
dev_words, dev_tags = load_data_from_json('PhoNER_COVID19-main-BIO/PhoNER_COVID19-main/data/syllable/dev_syllable.json')
test_words, test_tags = load_data_from_json('PhoNER_COVID19-main-BIO/PhoNER_COVID19-main/data/syllable/test_syllable.json')

def merge_data(words1, tags1, words2, tags2):
    merged_words = words1 + words2
    merged_tags = tags1 + tags2
    return merged_words, merged_tags

traindev_words, traindev_tags = merge_data(train_words, train_tags, dev_words, dev_tags)


In [5]:
# Tokenize words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(traindev_words)

# Đổi chữ thành ID
traindev_sequences = tokenizer.texts_to_sequences(traindev_words)
train_sequences = tokenizer.texts_to_sequences(train_words)
dev_sequences = tokenizer.texts_to_sequences(dev_words)
test_sequences = tokenizer.texts_to_sequences(test_words)


# Đổi tag thành ID
tag_to_index = {'O': 0, 'B-AGE': 1, 'B-DATE': 2, 'B-GENDER': 3, 'B-JOB': 4, 'B-LOCATION': 5, 'B-NAME': 6, 'B-ORGANIZATION': 7, 'B-PATIENT_ID': 8, 'B-SYMPTOM_AND_DISEASE': 9, 'B-TRANSPORTATION': 10, 'I-AGE': 11, 'I-DATE': 12, 'I-GENDER': 13, 'I-JOB': 14, 'I-LOCATION': 15, 'I-NAME': 16, 'I-ORGANIZATION': 17, 'I-PATIENT_ID': 18, 'I-SYMPTOM_AND_DISEASE': 19, 'I-TRANSPORTATION': 20}

traindev_tags = [[tag_to_index.get(tag) for tag in seq] for seq in traindev_tags]
train_tags = [[tag_to_index.get(tag) for tag in seq] for seq in train_tags]
dev_tags = [[tag_to_index.get(tag) for tag in seq] for seq in dev_tags]
test_tags = [[tag_to_index.get(tag) for tag in seq] for seq in test_tags]

In [6]:
# Get the word-to-index mapping
word_index = tokenizer.word_index

# Print the ID of a specific token
token_to_check = '.'
token_id = word_index.get(token_to_check)

if token_id is not None:
    print(f"The ID of '{token_to_check}' is: {token_id}")
else:
    print(f"{token_to_check} is not in the vocabulary.")
    

The ID of '.' is: 4


In [7]:
def remove_tokens(data, tags, tokens_to_remove):
    new_data = []
    new_tags = []
    for seq, tag_seq in zip(data, tags):
        new_seq = [word for word in seq if word not in tokens_to_remove]
        new_tag_seq = [tag for word, tag in zip(seq, tag_seq) if word not in tokens_to_remove]
        new_data.append(new_seq)
        new_tags.append(new_tag_seq)
    return new_data, new_tags

tokens_to_remove = [1, 4, 9, 32, 33, 151, 769]

# Remove instances of the specified tokens
# traindev_sequences, traindev_tags = remove_tokens(traindev_sequences, traindev_tags, tokens_to_remove)
# train_sequences, train_tags = remove_tokens(train_sequences, train_tags, tokens_to_remove)
# dev_sequences, dev_tags = remove_tokens(dev_sequences, dev_tags, tokens_to_remove)
# test_sequences, test_tags = remove_tokens(test_sequences, test_tags, tokens_to_remove)


In [8]:
# Print the modified data
print(train_sequences[0])

[165, 144, 1, 2, 7, 32, 195, 312, 54, 48, 693, 342, 156, 253, 69, 2, 40, 5, 30, 76, 522, 455, 47, 80, 41, 42, 4]


In [9]:
# Pad chuỗi entry theo câu
max_seq_len = max_seq_len = 200
traindev_data = pad_sequences(traindev_sequences, maxlen=max_seq_len, padding='post', truncating='post')
train_data = pad_sequences(dev_sequences, maxlen=max_seq_len, padding='post', truncating='post')
dev_data = pad_sequences(dev_sequences, maxlen=max_seq_len, padding='post', truncating='post')
test_data = pad_sequences(test_sequences, maxlen=max_seq_len, padding='post', truncating='post')

# Pad chuỗi tag theo câu
traindev_tags = pad_sequences(traindev_tags, maxlen=max_seq_len, padding='post')
train_tags = pad_sequences(train_tags, maxlen=max_seq_len, padding='post')
dev_tags = pad_sequences(dev_tags, maxlen=max_seq_len, padding='post')
test_tags = pad_sequences(test_tags, maxlen=max_seq_len, padding='post')

In [10]:
print(train_data[0])

[ 155  174  231   26  526    1  226  276   26   93  253  559    1    2
    7  190  196    1   44  119    2    3   12   70   16    2    7    6
  969    1  343  407 2812  559  454   28   37   99 1197 3511    4    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0 

In [11]:
# Module load file w2v
def load_embeddings(file_path):
    embeddings = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split()
            word = parts[0]
            vector = list(map(float, parts[-300:])) #  dims vector embedding
            embeddings[word] = vector
    return embeddings

def create_embedding_matrix(tokenizer, embeddings, embedding_dim):
    vocab_size = len(tokenizer.word_index) + 1
    embedding_matrix = np.zeros((vocab_size, embedding_dim))

    for word, i in tokenizer.word_index.items():
        embedding_vector = embeddings.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    return embedding_matrix


embedding_file_path = 'word2vec_vi_syllables_300dims.txt' # Input path file embedding (.txt)
word_embeddings = load_embeddings(embedding_file_path)
embedding_dim = 300  # dims vector embedding


# Load pre-train embedding
embeddings = load_embeddings(embedding_file_path)
# Embedding matrix
embedding_matrix = create_embedding_matrix(tokenizer, embeddings, embedding_dim)

In [12]:
print(dev_data[42])

[ 100    2    3   18  126  161 1005  482   11   32   56  112   17    2
    3   88  354  188    2    3  809   23   66    1   39   29   24    1
  847   23   34    1  949   29   24  179  723   23   34    1  370   29
   24    1  684   23   66    1  706   29    1   15 2777  170   47    2
    3   88  354   24    4    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0 

In [13]:
print(dev_tags[42])

[ 0  0  0  0  5 15 15 15  0  0  0  0  0  0  0  0  8  0  0  0  8  0  3  0
  1  0  0  0  8  0  3  0  1  0  0  0  8  0  3  0  1  0  0  0  8  0  3  0
  1  0  0  0  0  0  0  0  0  0  8  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0]


In [21]:
def prep_data_non_BIO(tags):
    tags_copy = copy.deepcopy(tags)
    
    for seq in range(tags_copy.shape[0]):
        for lb in range(tags_copy.shape[1]):
            if tags_copy[seq][lb] > 10:
                tags_copy[seq][lb] -= 10
    
    return tags_copy

traindev_tags_non_BIO = prep_data_non_BIO(traindev_tags)

In [27]:
lstm_units = 64
num_classes = 21

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)) # trainable = False
model.add(Bidirectional(LSTM(units=lstm_units, return_sequences=True)))  # Bidirectional LSTM
model.add(Bidirectional(GRU(units=lstm_units, return_sequences=True)))

model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 200, 300)          1219200   
                                                                 
 bidirectional_6 (Bidirecti  (None, 200, 128)          186880    
 onal)                                                           
                                                                 
 bidirectional_7 (Bidirecti  (None, 200, 128)          74496     
 onal)                                                           
                                                                 
 dense_3 (Dense)             (None, 200, 21)           2709      
                                                                 
Total params: 1483285 (5.66 MB)
Trainable params: 1483285 (5.66 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
def post_processing(pred, data):
    for seq in range(pred.shape[0]):
        for label in range(pred.shape[1]):
            if label > 0 and label < 199:
                if data[seq][label] in tokens_to_remove:
                    pred[seq][label] = 0
                
                if pred[seq][label] == pred[seq][label+1] and pred[seq][label] < 11 and pred[seq][label] > 0:
                    pred[seq][label+1] += 10
                if pred[seq][label-1] == 0 and pred[seq][label] < 21 and pred[seq][label] > 10:
                    pred[seq][label] -= 10
                if pred[seq][label+1] == pred[seq][label] - 10 and pred[seq][label] < 21 and pred[seq][label] > 10:
                    pred[seq][label+1] = pred[seq][label]
                

In [29]:
model.fit(traindev_data, traindev_tags, epochs=10, batch_size=32)


_, accuracy = model.evaluate(test_data, test_tags)
print(f'Accuracy: {accuracy * 100:.2f}%')

Epoch 1/10

KeyboardInterrupt: 

In [15]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)
# trainable true

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       0.99      1.00      0.99    569419
                B-AGE       0.71      0.66      0.68       582
               B-DATE       0.88      0.87      0.88      1654
             B-GENDER       0.75      0.70      0.72       462
                B-JOB       0.70      0.47      0.56       173
           B-LOCATION       0.76      0.74      0.75      4441
               B-NAME       0.83      0.55      0.66       318
       B-ORGANIZATION       0.81      0.81      0.81       771
         B-PATIENT_ID       0.88      0.80      0.84      2005
B-SYMPTOM_AND_DISEASE       0.83      0.77      0.80      1136
     B-TRANSPORTATION       0.80      0.54      0.65       193
                I-AGE       0.00      0.00      0.00         6
               I-DATE       0.91      0.93      0.92      1752
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.64      0.38      0.48  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)
# trainable false

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       0.99      1.00      0.99    569419
                B-AGE       0.68      0.70      0.69       582
               B-DATE       0.89      0.87      0.88      1654
             B-GENDER       0.70      0.73      0.72       462
                B-JOB       0.61      0.49      0.54       173
           B-LOCATION       0.76      0.71      0.73      4441
               B-NAME       0.75      0.40      0.52       318
       B-ORGANIZATION       0.84      0.77      0.80       771
         B-PATIENT_ID       0.89      0.80      0.84      2005
B-SYMPTOM_AND_DISEASE       0.87      0.68      0.77      1136
     B-TRANSPORTATION       0.75      0.48      0.59       193
                I-AGE       0.00      0.00      0.00         6
               I-DATE       0.92      0.92      0.92      1752
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.69      0.46      0.55  

  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
predictions = model.predict(traindev_data)
pred_tags = np.argmax(predictions, axis=-1)
# trainable true

print(classification_report(traindev_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       1.00      1.00      1.00   1347627
                B-AGE       0.99      0.95      0.97      1043
               B-DATE       0.99      1.00      0.99      3652
             B-GENDER       0.99      0.90      0.94       819
                B-JOB       0.97      0.89      0.93       337
           B-LOCATION       0.99      0.99      0.99      8135
               B-NAME       0.98      0.97      0.98       537
       B-ORGANIZATION       0.97      0.98      0.97      1688
         B-PATIENT_ID       0.99      1.00      0.99      4516
B-SYMPTOM_AND_DISEASE       0.98      0.98      0.98      2205
     B-TRANSPORTATION       1.00      0.99      0.99       313
                I-AGE       0.00      0.00      0.00         2
               I-DATE       0.99      1.00      1.00      3618
             I-GENDER       0.00      0.00      0.00        16
                I-JOB       0.96      0.92      0.94  

  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
predictions = model.predict(traindev_data)
pred_tags = np.argmax(predictions, axis=-1)
# trainable false

print(classification_report(traindev_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       1.00      1.00      1.00   1347627
                B-AGE       0.92      0.97      0.95      1043
               B-DATE       0.99      0.99      0.99      3652
             B-GENDER       0.89      0.95      0.92       819
                B-JOB       0.86      0.82      0.84       337
           B-LOCATION       0.97      0.93      0.95      8135
               B-NAME       0.86      0.73      0.79       537
       B-ORGANIZATION       0.94      0.92      0.93      1688
         B-PATIENT_ID       0.98      0.99      0.99      4516
B-SYMPTOM_AND_DISEASE       0.95      0.86      0.90      2205
     B-TRANSPORTATION       0.95      0.89      0.92       313
                I-AGE       0.00      0.00      0.00         2
               I-DATE       0.99      0.99      0.99      3618
             I-GENDER       1.00      0.19      0.32        16
                I-JOB       0.85      0.79      0.82  

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)
post_processing(pred_tags, test_data)
# trainable true

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       0.99      1.00      0.99    569419
                B-AGE       0.71      0.66      0.68       582
               B-DATE       0.88      0.87      0.88      1654
             B-GENDER       0.75      0.70      0.72       462
                B-JOB       0.57      0.45      0.50       173
           B-LOCATION       0.74      0.74      0.74      4441
               B-NAME       0.83      0.55      0.66       318
       B-ORGANIZATION       0.72      0.81      0.77       771
         B-PATIENT_ID       0.88      0.80      0.84      2005
B-SYMPTOM_AND_DISEASE       0.78      0.78      0.78      1136
     B-TRANSPORTATION       0.81      0.54      0.65       193
                I-AGE       1.00      0.17      0.29         6
               I-DATE       0.91      0.93      0.92      1752
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.67      0.35      0.46  

  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)
post_processing(pred_tags, test_data)
# trainable true non BIO

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))

                       precision    recall  f1-score   support

                    O       0.99      1.00      0.99    569419
                B-AGE       0.69      0.69      0.69       582
               B-DATE       0.88      0.87      0.88      1654
             B-GENDER       0.72      0.71      0.71       462
                B-JOB       0.44      0.42      0.43       173
           B-LOCATION       0.72      0.74      0.73      4441
               B-NAME       0.80      0.52      0.63       318
       B-ORGANIZATION       0.65      0.79      0.71       771
         B-PATIENT_ID       0.89      0.80      0.84      2005
B-SYMPTOM_AND_DISEASE       0.77      0.78      0.77      1136
     B-TRANSPORTATION       0.80      0.54      0.65       193
                I-AGE       0.67      0.33      0.44         6
               I-DATE       0.91      0.93      0.92      1752
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.70      0.36      0.48  

In [None]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)
post_processing(pred_tags, test_data)
# trainable false

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



In [15]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))

                       precision    recall  f1-score   support

                    O       0.98      0.99      0.99    271361
                B-AGE       0.62      0.66      0.64       516
               B-DATE       0.87      0.88      0.87      1593
             B-GENDER       0.67      0.72      0.69       392
                B-JOB       0.58      0.45      0.50       170
           B-LOCATION       0.73      0.66      0.69      4293
               B-NAME       0.84      0.64      0.73       252
       B-ORGANIZATION       0.83      0.74      0.78       768
         B-PATIENT_ID       0.86      0.86      0.86      1676
B-SYMPTOM_AND_DISEASE       0.81      0.72      0.76      1112
     B-TRANSPORTATION       0.79      0.59      0.67       158
                I-AGE       0.00      0.00      0.00         6
               I-DATE       0.90      0.92      0.91      1685
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.53      0.39      0.45  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
predictions = model.predict(test_data)
pred_tags = np.argmax(predictions, axis=-1)

print(classification_report(test_tags.flatten(), pred_tags.flatten(), target_names=list(tag_to_index.keys())))



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score   support

                    O       0.99      0.99      0.99    421675
                B-AGE       0.73      0.67      0.70       517
               B-DATE       0.87      0.89      0.88      1577
             B-GENDER       0.79      0.70      0.75       396
                B-JOB       0.71      0.37      0.49       171
           B-LOCATION       0.71      0.71      0.71      4260
               B-NAME       0.79      0.62      0.69       252
       B-ORGANIZATION       0.85      0.74      0.79       768
         B-PATIENT_ID       0.88      0.88      0.88      1657
B-SYMPTOM_AND_DISEASE       0.80      0.77      0.78      1109
     B-TRANSPORTATION       0.75      0.60      0.67       157
                I-AGE       0.00      0.00      0.00         6
               I-DATE       0.90      0.95      0.92      1665
             I-GENDER       0.00      0.00      0.00         1
                I-JOB       0.68      0.37      0.47  

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# JOB luôn là tag perform tệ nhất ở các lần thử, các model khác nhau, do ít dữ liệu train?

In [26]:
def find_dist(label):
    label_counts = {}
    
    for seq in range(label.shape[0]):
        for tag in range(label.shape[1]):
            current_label = label[seq][tag]
            
            if current_label not in label_counts:
                label_counts[current_label] = 1
            else:
                label_counts[current_label] += 1
    sorted_label_counts = sorted(label_counts.items(), key=lambda x: x[0])
    for label_type, count in sorted_label_counts:
        print(f"Label {label_type}: {count}")


find_dist(traindev_tags)
find_dist(test_tags)


Label 0: 1347627
Label 1: 1043
Label 2: 3652
Label 3: 819
Label 4: 337
Label 5: 8135
Label 6: 537
Label 7: 1688
Label 8: 4516
Label 9: 2205
Label 10: 313
Label 11: 2
Label 12: 3618
Label 13: 16
Label 14: 527
Label 15: 18935
Label 16: 132
Label 17: 7448
Label 18: 22
Label 19: 3734
Label 20: 94
Label 0: 569419
Label 1: 582
Label 2: 1654
Label 3: 462
Label 4: 173
Label 5: 4441
Label 6: 318
Label 7: 771
Label 8: 2005
Label 9: 1136
Label 10: 193
Label 11: 6
Label 12: 1752
Label 13: 1
Label 14: 347
Label 15: 10729
Label 16: 84
Label 17: 3672
Label 18: 27
Label 19: 2156
Label 20: 72
