### In this code, we used all the previously trained models, and passed a new image through these models.
1. First, an image is sent to the encoder (autoencoder_block4_conv4.h5) to get image embeddings of shape 28x28x512.
2. The image embeddings for that image is passed through the ImgEmb-To-TextEmb model to get text embeddings.
3. The text embedding is passed through another model (model4_unique3.h5) which finally generates the text sentence.

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import load_img, img_to_array

2023-12-09 06:14:25.542451: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-09 06:14:25.590952: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
autoencoder = load_model('autoencoder_block4_conv4.h5')
autoencoder.summary()

2023-12-09 06:14:29.769575: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:4b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-09 06:14:29.797343: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:4b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-09 06:14:29.797391: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:4b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-09 06:14:29.805599: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:4b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-09 06:14:29.805652: I tensorflow/compile

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 model (Functional)          (None, 28, 28, 512)       10585152  
                                                                 
 model_1 (Functional)        (None, 224, 224, 3)       1550467   
                                                                 
Total params: 12135619 (46.29 MB)
Trainable params: 12135619 (46.29 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [3]:
from tensorflow.keras.models import Model

# Assuming 'autoencoder' is your existing autoencoder model
# And 'autoencoder.get_layer('model')' is the encoder
encoder = autoencoder.get_layer('model')

# Create a new model using the input and output of the encoder
encoder_model = Model(inputs=encoder.input, outputs=encoder.output)

# Summary of the encoder model
encoder_model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [4]:
img = 'aug_0_0.png'
image_dim = (224, 224, 3)
# Load and preprocess the images as NumPy arrays
def load_and_preprocess_image(filename):
    img = load_img(filename, target_size=image_dim[:2])
    img = img_to_array(img)
    
    img /= 255.0  # Rescale pixel values to [0, 1]
    return img

In [5]:
img_emb = load_and_preprocess_image(img)
img_emb.shape

(224, 224, 3)

In [6]:
img = np.expand_dims(img_emb, axis=0)
image_emb = encoder_model.predict(img)

2023-12-09 06:14:45.546034: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8905
2023-12-09 06:14:47.287451: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory




In [7]:
# Counting non-zero elements
non_zero_count = np.count_nonzero(image_emb)

print("Number of non-zero elements:", non_zero_count)

Number of non-zero elements: 24747


In [8]:
has_nan = np.isnan(image_emb).any()

if has_nan:
    print("NaN values found in the embedding. Replacing with 0.")
    # Replace NaN values with 0
    embedding = np.nan_to_num(image_emb)
else:
    print("No NaN values found in the embedding.")

No NaN values found in the embedding.


In [9]:
model2 = load_model('ImgEmb-To-TextEmb.h5')
model2.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 512)]     0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 512)       2359808   
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 512)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 256)       1179904   
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 256)         0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 128)         295040

In [10]:
text_embedding = model2.predict(image_emb)
text_embedding



2023-12-09 06:14:56.627632: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


array([[ 1.61573678e-01, -2.61288077e-01,  6.66614547e-02,
         2.95694917e-01,  1.85754657e-01, -1.05258316e-01,
        -1.18384445e-02,  2.47880816e-04,  1.05237849e-01,
         4.67540249e-02, -1.55207589e-02,  2.28024065e-01,
        -9.15555209e-02,  3.82096767e-02, -6.14957035e-01,
        -9.24022347e-02,  3.13957155e-01, -2.99370810e-02,
         1.79346278e-01, -1.76533192e-01, -2.37248152e-01,
         1.39791399e-01, -3.56571823e-01, -1.09637097e-01,
         2.96602268e-02, -2.80378610e-01,  1.56205714e-01,
         3.66470516e-01, -4.58159298e-03,  2.53580213e-01,
         1.79607362e-01,  2.96160989e-02,  2.03085780e-01,
         6.49701208e-02,  1.67321742e-01, -8.88357162e-02,
         7.78879821e-02,  2.99046397e-01, -4.15513851e-02,
         6.11527376e-02,  1.84384018e-01, -1.31585255e-01,
         9.10161585e-02,  2.02044621e-02,  3.23419482e-01,
        -5.56980297e-02, -5.88123314e-03,  5.34872860e-02,
        -6.43694401e-01, -4.20472175e-02,  1.18721724e-0

In [11]:
text_embedding.shape

(1, 768)

In [12]:
new_text_embedding = np.reshape(text_embedding, (1, 768, 1))  # Reshape for single prediction
new_text_embedding

array([[[ 1.61573678e-01],
        [-2.61288077e-01],
        [ 6.66614547e-02],
        [ 2.95694917e-01],
        [ 1.85754657e-01],
        [-1.05258316e-01],
        [-1.18384445e-02],
        [ 2.47880816e-04],
        [ 1.05237849e-01],
        [ 4.67540249e-02],
        [-1.55207589e-02],
        [ 2.28024065e-01],
        [-9.15555209e-02],
        [ 3.82096767e-02],
        [-6.14957035e-01],
        [-9.24022347e-02],
        [ 3.13957155e-01],
        [-2.99370810e-02],
        [ 1.79346278e-01],
        [-1.76533192e-01],
        [-2.37248152e-01],
        [ 1.39791399e-01],
        [-3.56571823e-01],
        [-1.09637097e-01],
        [ 2.96602268e-02],
        [-2.80378610e-01],
        [ 1.56205714e-01],
        [ 3.66470516e-01],
        [-4.58159298e-03],
        [ 2.53580213e-01],
        [ 1.79607362e-01],
        [ 2.96160989e-02],
        [ 2.03085780e-01],
        [ 6.49701208e-02],
        [ 1.67321742e-01],
        [-8.88357162e-02],
        [ 7.78879821e-02],
 

In [13]:
text_embedding.shape

(1, 768)

In [14]:
model4 = load_model('model4_unique3.h5')
model4.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 768)]                0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 reshape_1 (Reshape)         (None, 768, 1)               0         ['input_3[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, None, 256)            553216    ['input_4[0][0]']             
                                                                                            

In [15]:
import pickle

# Save the tokenizer
with open('tokenizer3.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

In [16]:
def generate_text(model, input_embedding, tokenizer, max_length=768, num_words_to_print=10):
    out_text = ''
    in_text = '<start>'
    sequence = tokenizer.texts_to_sequences([in_text])[0]

    for i in range(max_length):
        padded_sequence = pad_sequences([sequence], maxlen=max_length, padding='post')

        # Predict the next word
        yhat = model.predict([input_embedding, padded_sequence], verbose=0)
        next_word_probs = yhat[0, len(sequence)-1]

        # Get indices of top 10 probable words
        next_word_indices = np.argsort(next_word_probs)[-num_words_to_print:]

        # Print the top 10 words
        print(f"Step {i+1}, Top {num_words_to_print} predictions:")
        for idx in reversed(next_word_indices):
            word = tokenizer.index_word.get(idx, 'N/A')
            prob = next_word_probs[idx]
            print(f"{word}: {prob:.4f}")

        # Choose the highest probability word
        next_word = np.argmax(next_word_probs)

        if next_word == 0:
            break

        sequence.append(next_word)
        out_text = ' '.join([tokenizer.index_word.get(i, '') for i in sequence if i > 0])

    return out_text

generated_text = generate_text(model4, new_text_embedding, tokenizer)
print("Generated text:", generated_text)


NameError: name 'pad_sequences' is not defined

In [17]:
import numpy as np

def sample_with_temperature(probabilities, temperature=1.0):
    if temperature <= 0:
        return np.argmax(probabilities)
    else:
        probabilities = np.exp(np.log(probabilities + 1e-7) / temperature)
        probabilities /= np.sum(probabilities)
        return np.random.choice(len(probabilities), p=probabilities)


In [22]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

def generate_text(model, input_embedding, tokenizer, max_length=768, temperature=1.0):
    out_text = ''
    in_text = '<start>'
    sequence = tokenizer.texts_to_sequences([in_text])[0]
    used_words = set(sequence)

    for _ in range(max_length):
        padded_sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
        yhat = model.predict([input_embedding, padded_sequence], verbose=0)
        next_word_probs = yhat[0, len(sequence)-1]

        # Set the probabilities of used words to zero
        next_word_probs[list(used_words)] = 0

        # Apply temperature scaling and select the word with the highest probability
        next_word = sample_with_temperature(next_word_probs, temperature)

        if next_word == 0:  # End of sequence
            break
        '''
        # If the word is already used, select the next highest probability word
        if next_word in used_words:
            next_word_probs[next_word] = 0  # Set the probability of the used word to zero
            next_word = np.argmax(next_word_probs)  # Select the next word
        '''
        if next_word == 0 or next_word in used_words:  # Double check for end-of-sequence or repetition
            break

        sequence.append(next_word)
        used_words.add(next_word)
        out_text += tokenizer.index_word.get(next_word, '') + ' '

    return out_text.strip()



# Generate text using the model
generated_text = generate_text(model4, new_text_embedding, tokenizer)
print(generated_text)


redemonstrated right left  tubes silhouette head suprahilar diameter costophrenic decrease moderately


In [19]:
generated_text

'engorgement worrisome'