In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, TimeDistributed, Dense, Bidirectional
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

In [2]:
def load_data(file_path):
    data = pd.read_excel(file_path)
    return data

In [3]:
def preprocess_data(data, max_len=5000):
    data['X'] = data['auth_name'].apply(lambda x: x.lower())
    data['y'] = data['fn'].apply(lambda x: x.lower())
    input_tokenizer = Tokenizer(char_level=True)
    input_tokenizer.fit_on_texts(data['X'].values)
    print(input_tokenizer.index_word)
    input_sequences = input_tokenizer.texts_to_sequences(data['X'].values)
#     print(input_sequences)
    input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='post')
    output_tokenizer = Tokenizer(char_level=True)
    output_tokenizer.fit_on_texts(data['y'].values)
    output_sequences = input_tokenizer.texts_to_sequences(data['y'].values)
    print(output_sequences)
    output_sequences = pad_sequences(output_sequences, maxlen=max_len, padding='post')
    output_sequences = tf.keras.utils.to_categorical(output_sequences, num_classes=len(input_tokenizer.word_index)+1)
    return input_sequences, output_sequences, input_tokenizer, input_tokenizer

In [4]:
def build_model(input_vocab_size, output_vocab_size, max_len=5000, embedding_dim=64, lstm_units=128):
    inputs = Input(shape=(max_len,))
    x = Embedding(input_dim=input_vocab_size, output_dim=embedding_dim, input_length=max_len)(inputs)
    x = Bidirectional(LSTM(lstm_units, return_sequences=True))(x)
    outputs = TimeDistributed(Dense(output_vocab_size, activation='softmax'))(x)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [20]:
def train_model(model, X_train, y_train, X_val, y_val, batch_size=8, epochs=10):
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val))
    return history

In [21]:
def predict(model, input_text, input_tokenizer, output_tokenizer, max_len=5000):
    input_sequence = input_tokenizer.texts_to_sequences([input_text])
    input_sequence = pad_sequences(input_sequence, maxlen=max_len, padding='post')
    output_sequence = model.predict(input_sequence)
    output_sequence_indices = np.argmax(output_sequence, axis=-1)
    print(output_sequence_indices)
    predicted_text = output_tokenizer.sequences_to_texts(np.argmax(output_sequence, axis=-1))
    return predicted_text[0]

In [22]:
file_path = "data.xlsx"
data = load_data(file_path)

In [23]:
input_sequences, output_sequences, input_tokenizer, output_tokenizer = preprocess_data(data)

{1: ' ', 2: 'e', 3: 'i', 4: 'a', 5: 'n', 6: 'o', 7: 'r', 8: 's', 9: 't', 10: 'd', 11: 'c', 12: 'l', 13: 'h', 14: 'g', 15: 'm', 16: 'u', 17: 'f', 18: 'p', 19: 'y', 20: 'v', 21: '.', 22: ',', 23: 'b', 24: 'w', 25: '\n', 26: 'k', 27: 'x', 28: ':', 29: 'z', 30: '(', 31: ')', 32: 'j', 33: "'", 34: '-', 35: 'q', 36: '"', 37: '1', 38: '5', 39: '9', 40: '7', 41: '2', 42: '0', 43: '6', 44: '8', 45: '/', 46: '4', 47: 'é', 48: '&', 49: 'ä', 50: 'ô', 51: 'ş', 52: 'ü', 53: 'ó', 54: 'á'}
[[19, 16, 2, 1, 11, 4, 6], [23, 2, 5, 8, 13, 2, 5, 14, 1, 35, 3, 16], [7, 6, 23, 2, 7, 9, 1, 17, 12, 2, 11, 26, 1, 32], [13, 6, 12, 10, 2, 5, 1, 24, 16], [24, 3, 12, 12, 3, 4, 15, 1, 13, 19, 8, 12, 6, 18], [17, 12, 2, 15, 15, 3, 5, 14, 1, 17, 6, 7, 8, 23, 2, 7, 14], [32, 4, 15, 2, 8, 1, 11, 4, 7, 7], [4, 5, 10, 7, 2, 32, 1, 12, 19, 8, 13, 11, 13, 3, 26], [23, 6, 23, 23, 19, 1, 26, 4, 12, 23], [32, 6, 13, 5, 1, 26, 16, 7, 13, 4, 5, 2, 24, 3, 11, 29], [4, 5, 10, 2, 7, 4, 5, 3, 26, 1, 9, 6, 15, 4, 8, 3, 4, 5], [5, 1, 1

In [24]:
X_train, X_val, y_train, y_val = train_test_split(input_sequences, output_sequences, test_size=0.2, random_state=42)

In [25]:
input_vocab_size = len(input_tokenizer.word_index) + 1
output_vocab_size = len(output_tokenizer.word_index) + 1
print(input_tokenizer.word_index)
print(output_tokenizer.word_index)
model = build_model(input_vocab_size, output_vocab_size)

{' ': 1, 'e': 2, 'i': 3, 'a': 4, 'n': 5, 'o': 6, 'r': 7, 's': 8, 't': 9, 'd': 10, 'c': 11, 'l': 12, 'h': 13, 'g': 14, 'm': 15, 'u': 16, 'f': 17, 'p': 18, 'y': 19, 'v': 20, '.': 21, ',': 22, 'b': 23, 'w': 24, '\n': 25, 'k': 26, 'x': 27, ':': 28, 'z': 29, '(': 30, ')': 31, 'j': 32, "'": 33, '-': 34, 'q': 35, '"': 36, '1': 37, '5': 38, '9': 39, '7': 40, '2': 41, '0': 42, '6': 43, '8': 44, '/': 45, '4': 46, 'é': 47, '&': 48, 'ä': 49, 'ô': 50, 'ş': 51, 'ü': 52, 'ó': 53, 'á': 54}
{' ': 1, 'e': 2, 'i': 3, 'a': 4, 'n': 5, 'o': 6, 'r': 7, 's': 8, 't': 9, 'd': 10, 'c': 11, 'l': 12, 'h': 13, 'g': 14, 'm': 15, 'u': 16, 'f': 17, 'p': 18, 'y': 19, 'v': 20, '.': 21, ',': 22, 'b': 23, 'w': 24, '\n': 25, 'k': 26, 'x': 27, ':': 28, 'z': 29, '(': 30, ')': 31, 'j': 32, "'": 33, '-': 34, 'q': 35, '"': 36, '1': 37, '5': 38, '9': 39, '7': 40, '2': 41, '0': 42, '6': 43, '8': 44, '/': 45, '4': 46, 'é': 47, '&': 48, 'ä': 49, 'ô': 50, 'ş': 51, 'ü': 52, 'ó': 53, 'á': 54}


In [None]:
history = train_model(model, X_train, y_train, X_val, y_val)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [None]:
input_text = "Dr. Bobby Kalb is a renowned radiologist who has made significant contributions to the field of medical imaging. With an illustrious career spanning over three decades, Dr. Kalb has earned a reputation as a leading expert in the field, known for his exceptional skills, extensive knowledge, and innovative research. Education History: Dr. Kalb's passion for medicine was ignited at an early age, leading him to pursue an illustrious educational journey. He graduated magna cum laude from a prestigious medical school, earning his Doctor of Medicine (M.D.) degree. He then completed his residency in diagnostic radiology at a renowned academic medical center, where he honed his skills in various imaging modalities and gained expertise in interpreting complex medical images. Professional History: Dr. Kalb's professional career has been marked by his dedication to advancing the field of radiology. He has held numerous leadership positions in academic and clinical settings, including serving as the Chief of Radiology at several prominent hospitals. He has also been a sought-after consultant for radiology departments across the country, providing expert guidance on complex cases and contributing to the development of cutting-edge imaging protocols. Medical Research and Special Interests: Throughout his career, Dr. Kalb has been actively involved in medical research, with a particular focus on advancing the field of abdominal imaging. His pioneering work has resulted in numerous publications in prestigious medical journals, and he has been invited to present his research findings at national and international conferences. Dr. Kalb's special interests lie in utilizing advanced imaging techniques, such as magnetic resonance imaging (MRI), computed tomography (CT), and ultrasound, to diagnose and manage complex abdominal conditions, including liver diseases, pancreatic disorders, and gastrointestinal malignancies. Books: Dr. Kalb has also authored several authoritative books on radiology, which have become widely recognized as essential references for practitioners in the field. His books cover a wide range of topics, including abdominal imaging, radiologic anatomy, and advanced imaging techniques. His work has been lauded for its comprehensive and practical approach, providing valuable insights and guidance to radiologists at all stages of their careers. Awards Procured in Medical Space: Dr. Kalb's contributions to the field of radiology have been widely recognized, and he has received numerous awards for his excellence in research, teaching, and clinical practice. His accolades include the prestigious ""Radiologist of the Year"" award from a leading radiology society and the ""Outstanding Educator"" award from a renowned academic institution. He has also been recognized for his philanthropic efforts, including his contributions to improving radiology education in underserved areas. Memberships: Dr. Kalb is an active member of several esteemed medical societies, including the American College of Radiology, the Radiological Society of North America, and the Society of Abdominal Radiology. He has also served on the board of directors for several radiology organizations, where he has played an instrumental role in shaping the future of the field. In conclusion, Dr. Bobby Kalb's illustrious career as a radiologist has been marked by his unwavering commitment to advancing the field through his exceptional skills, innovative research, and dedication to teaching. His contributions to abdominal imaging have been widely recognized, and he continues to be a leading authority in the field, making a lasting impact on the practice of radiology."
predicted_expert_name = predict(model, input_text, input_tokenizer, output_tokenizer)
print("Predicted expert name:", predicted_expert_name)