In [1]:
import nltk
from nltk.tokenize import word_tokenize
import gensim
from tensorflow.keras import Model
import re
from io import BytesIO 
from PIL import Image

2023-05-31 20:22:38.887851: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-31 20:22:39.059335: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-31 20:22:39.061732: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import plotly.express as ex
import numpy as np
from dash import Dash,html,dcc,Input, Output,State
from jupyter_dash import JupyterDash
import pandas as pd
import dash_bootstrap_components as dbc
import dash_daq as daq
from dash.exceptions import PreventUpdate
import matplotlib.pyplot as plt
import plotly.io as pio
import base64
import io
import tensorflow as tf

# Read and prepare used data and models

In [3]:
# dictionary for output map
import json
# Read data from a JSON file
with open('ModelandFilesUsed/label_dict.json', 'r') as f:
    data = json.load(f)
# Convert the data to a dictionary
label_dict = dict(data)

In [4]:
# read ubload image
img = "uploadImage.png"
with open(img, 'rb') as f:
    img_data = f.read()
img_card_base64 = base64.b64encode(img_data).decode('utf-8')

In [5]:
model_w2v = gensim.models.KeyedVectors.load_word2vec_format("ModelandFilesUsed/GoogleNews-vectors-negative300.bin.gz", binary=True) 

In [6]:
# model for extract feature from images
image_feature_extraction = tf.keras.applications.VGG16(include_top=False,weights='imagenet',input_shape=(448,448,3))
last_layer = image_feature_extraction.layers[-1].output   # Last layer has an output layer of (14,14,512)
image_feature_extraction = Model(image_feature_extraction.input,last_layer)
image_feature_extraction.trainable = False

'''  The below model will convert (196,512) to (21,300) (i.e same as the dimension of word embedding)  '''
dimen_red = tf.keras.Sequential()  # Use for converting (196,512) -> (21,300)
dimen_red.add(tf.keras.layers.Conv2D(300,kernel_size=(1,1),input_shape= (14,14,512)))
dimen_red.add(tf.keras.layers.Reshape((196,300)))
dimen_red.add(tf.keras.layers.Permute((2,1)))  # Reshaping about the axis, useful for applying the dense network
dimen_red.add(tf.keras.layers.Dense(11))
dimen_red.add(tf.keras.layers.Permute((2,1)))  # Reshaping about the axis, useful for applying the dense network

# Help function

In [7]:
def tokenize(sentence):
    ''' Perform Tokenization '''
    return [i for i in re.split(r"([-.\"',:? !$#@~()*&\^%;/\\+<>\n=])", sentence) if i!='' and i!=' ' and i!='\n']

In [8]:
# get answer from index
def get_key(val):
    for key, value in label_dict.items():
        if val == value:
            return key

In [9]:
def get_text_embeding_matrix(text):
  embedding_matrix = np.zeros((11, 300))
  text = re.sub(r'\?$', '', text)
  tokenized_text = tokenize(text)
  for i, word in enumerate(tokenized_text):
    try:
      embedding_matrix[i] = model_w2v[word]
    except:
      pass
  return embedding_matrix

In [10]:
def read_images_as_array():
  with open('data.txt', encoding='UTF8') as f:
        contents = f.read()
  decoded_data =base64.b64decode(str(contents).split(',')[1])
  image = Image.open(BytesIO(decoded_data))
  image = image.resize((448,448))
  image = np.array(image)  / 225
  image = np.expand_dims(image, axis=0)
  return image

In [11]:
def get_image_embedding_matrix():
  image_array = read_images_as_array()
  image_feature_from_vgg = image_feature_extraction(image_array)
  print(image_feature_from_vgg.shape)
  image_reduced = dimen_red(image_feature_from_vgg) 
  image_reduced = image_reduced.numpy().reshape(-1, image_reduced.shape[-1])
  return image_reduced

In [12]:
def predict_answer(question):
    # load main model
    model = tf.keras.models.load_model('ModelandFilesUsed/moreData_model.h5')

    
    image = read_images_as_array()
    question_vec = get_text_embeding_matrix(question)
    
    
    image_embedding = dimen_red(image_feature_extraction(image)).numpy().reshape(-1, 11, 300)
    question_embedding = np.array([question_vec])

    
    prediction = model.predict([image_embedding, question_embedding])

  
    predicted_index = np.argmax(prediction)
    predicted_answer = get_key(predicted_index)
    return predicted_answer

# Main Dashboard

In [13]:
app=JupyterDash(external_stylesheets=[dbc.themes.LUMEN],suppress_callback_exceptions=True)

In [14]:
# card for upload image
card_1 = dbc.Card(
    dbc.CardBody(
        [
            html.H3(html.B("upload Image"),style={'color': '#068DA9', 'fontSize': 25, 
                                        'textAlign':'center','font-family':'Courier New, monospace'}),
            
       
            
             dcc.Upload(
                        id='upload-image',
                        children=html.Div([
                            'Drag and Drop or ',
                            html.A('Select Files')
                                        ]),
                                            style={
                                                'color': '#068DA9', 'fontSize': 25, 
                                                'font-family':'Courier New, monospace',
                                                'width': '90%',
                                                'height': '50px',
                                                'lineHeight': '60px',
                                                'borderWidth': '1px',
                                                'borderStyle': 'dashed',
                                                'borderRadius': '5px',
                                                'textAlign': 'center',
                                                'margin':'7px'
                                                },
                                            ),
            html.Div(id="uploaded_img")
            
       
       ]),style={"width": "100%",
           "background-color": "rgba(255, 255, 255, 0.2)",
           'box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'},
)

In [15]:
# card for prediction
card_2 = dbc.Card(
    dbc.CardBody(
        [
            dcc.Input(id='input-text', type='text', placeholder='Enter question here...',style={'color': '#116D6E', 'fontSize': 25, 
                                                'font-family':'Courier New, monospace',
                                                'width': '90%',
                                                'height': '50px',
                                                'lineHeight': '60px',
                                                'borderWidth': '1px',
                                                'borderRadius': '5px',
                                                'textAlign': 'center',
                                                'margin':'15px'}),
            html.Label('Predicted answer.', id="show_answer",style={'color': '#068DA9', 'borderWidth': '1px',
                                                'borderStyle': 'dashed',
                                                'borderRadius': '5px',
                                                'textAlign': 'center','fontSize': 25, 'width': '90%',
                                                'font-family':'Courier New, monospace','margin':'15px'}),
            html.Button('Predict', id='submit-button', n_clicks=0, style={
                                                'color': '#ffffff',
                                                'backgroundColor': '#068DA9',
                                                'fontSize': 25, 
                                                'font-family':'Courier New, monospace',
                                                'width': '250px',
                                                'height': '50px',
                                                'lineHeight': '60px',
                                                'borderWidth': '1px',
                                                'borderRadius': '5px',
                                                'textAlign': 'center',
                                                'margin':'15px'
                                                })
        ]
    ),
    style={"width": "100%",
           "background-color": "rgba(255, 255, 255, 0.2)",
           'box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)',
           'justify-content': 'center',
           'align-items': 'center',
        },
)

In [16]:
app.layout=html.Div(children=[
        html.Div(
            children=[
                # card for tittle 
                dbc.Card(
                    dbc.CardBody(
                        [
                           html.H1(html.B("Visual Question Answering"),
                                    style={'color': '#068DA9', 'fontSize': 25, 
                                        'textAlign':'center','font-family':'Courier New, monospace',
                                         }),             
                        ]
                    ),style={'width': '100%', 
                             'height': '10%',
                             "background-color": "rgba(255, 255, 255, 0.2)",
                             'box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}),
                
                
                # two cards for image and prediction
                dbc.Row(
                        [
                            dbc.Col(card_1,width=6),
                            dbc.Col([card_2,
                                     html.Img(src=f'data:image/png;base64,{img_card_base64}',style={"object-fit": "cover",
                                     "max-width": "90%",
                                     "max-height": "90%",
                                     "width":"340", 
                                     "height":"160",
                                     "border-radius":"20px",
                                     "margin-left":"20px",
                                     "margin-top":"20px",
                                     "margin-right":"20px"})
                                    ],width=6),
                        ],
                        style={
                                'width': '100%',  # Set the width to 100%
                                'height': '100%',  # Set the height to 100%
                                'margin-top' : '20px',
                                'margin-left' : '5px',
                        }
                    ),
                
                ],
            style={
                'backgroundColor': '#C4DFDF',  # Set the background color
                'padding': '20px' , # Add some padding
                'width': '100%',  # Set the width to 100%
                'height': '100%',  # Set the height to 100%
                'position': 'absolute',  # Set the position to absolute
                'top': 0,  # Position the top edge at the top of the page
                'left': 0,
                'overflowY': 'scroll'
            }),
    
])

# Callbacks

In [17]:
def parse_contents(contents, filename):
    with open('data.txt', 'w') as f:
        f.write(contents)
    return html.Div([
        # HTML images accept base64 encoded strings in the same format
        # that is supplied by the upload
        html.Img(src=contents,style={"object-fit": "cover",
                                     "max-width": "90%",
                                     "max-height": "90%",
                                     "width":"340", 
                                     "height":"160",
                                     "border-radius":"20px",
                                     "margin-left":"20px",
                                     "margin-top":"20px",
                                     "margin-right":"20px"})        
        ])

In [18]:
@app.callback(Output('uploaded_img', 'children'),
              Input('upload-image', 'contents'),
              State('upload-image', 'filename'))
def update_output_image(list_of_contents, list_of_names):
    if list_of_contents is not None:
        children = parse_contents(list_of_contents, list_of_names)
        return children 
    return ""

In [19]:
@app.callback(
              Output("show_answer",'children'),
              Input("submit-button","n_clicks"),
              State("input-text","value"))
def update_output_image(n_clicks,value):
    answer = " "
    if value:
        answer = predict_answer(str(value)) 
    return answer

In [20]:
if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/
