In [1]:
!pip install streamlit

Collecting streamlit
  Obtaining dependency information for streamlit from https://files.pythonhosted.org/packages/e9/07/63a6e890c9b998a6318b46c2a34377fd1a3e01a94c427d82bfb2472b7c16/streamlit-1.30.0-py2.py3-none-any.whl.metadata
  Downloading streamlit-1.30.0-py2.py3-none-any.whl.metadata (8.2 kB)
Collecting validators<1,>=0.2 (from streamlit)
  Obtaining dependency information for validators<1,>=0.2 from https://files.pythonhosted.org/packages/3a/0c/785d317eea99c3739821718f118c70537639aa43f96bfa1d83a71f68eaf6/validators-0.22.0-py3-none-any.whl.metadata
  Downloading validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━

In [2]:
import streamlit as st
import cv2
import numpy as np
from keras import __version__ as keras_version

print("streamlit version:", st.__version__)
print("cv2 version:", cv2.__version__)
print("numpy version:", np.__version__)
print("keras version:", keras_version)

streamlit version: 1.30.0
cv2 version: 4.9.0
numpy version: 1.24.3
keras version: 2.13.1


In [3]:
%%writefile app.py
# this version of app allows to just upload one image

import os
import streamlit as st
import cv2
import numpy as np
from keras.models import load_model, Model
import keras.backend as K


# Function to preprocess the image
def preprocess_image(img):
    """
    Converts image to shape (32, 128, 1) & normalize
    """
    w, h = img.shape
    new_w = 32
    new_h = int(h * (new_w / w))
    img = cv2.resize(img, (new_h, new_w))
    w, h = img.shape
    
    img = img.astype('float32')
    
    if w < 32:
        add_zeros = np.full((32-w, h), 255)
        img = np.concatenate((img, add_zeros))
        w, h = img.shape
    
    if h < 128:
        add_zeros = np.full((w, 128-h), 255)
        img = np.concatenate((img, add_zeros), axis=1)
        w, h = img.shape
        
    if h > 128 or w > 32:
        dim = (128,32)
        img = cv2.resize(img, dim)
    
    img = cv2.subtract(255, img)
    
    img = np.expand_dims(img, axis=2)
    
    img = img / 255
    
    return img

char_set = "!\"#&'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# function to perform OCR prediction
def perform_ocr(img_path):
    try:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = preprocess_image(img)

        # Load the model
        loaded_model = load_model('/kaggle/input/model/ocr_word_model_1.h5', custom_objects={'<lambda>': ctc_lambda_func})
        dense_layer = loaded_model.get_layer('dense')
        prediction_model = Model(inputs=loaded_model.input, outputs=dense_layer.output)

        # Get predictions
        test_prediction = prediction_model.predict([np.array([img]), np.zeros((1, 19)), np.ones((1, 1)) * 31, np.ones((1, 1)) * 19])
        test_decoded = K.ctc_decode(test_prediction, input_length=np.ones(test_prediction.shape[0]) * test_prediction.shape[1], greedy=True)[0][0]
        test_out = K.get_value(test_decoded)[0]

        # Decode predictions
        predicted_text = ''.join([char_set[int(p)] for p in test_out if int(p) != -1])

        return predicted_text
    except:
        return "Error processing image"

# actual Streamlit app
def main():
    st.title("OCR Streamlit App")

    uploaded_file = st.file_uploader("Choose an image...", type="png")

    if uploaded_file is not None:

        file_name = uploaded_file.name

        st.image(uploaded_file, caption=f"Uploaded Image: {file_name}", use_column_width=True)
        st.write("")

        classification_status = st.empty()

        classification_status.text("Classifying...")

        # save the uploaded file temporarily
        temp_file_path = "temp_image.png"
        with open(temp_file_path, "wb") as f:
            f.write(uploaded_file.getvalue())

        # perform OCR on the uploaded image
        predicted_text = perform_ocr(temp_file_path)

        classification_status.text(f"Predicted Text: {predicted_text}")

        # remove the temporary file
        os.remove(temp_file_path)

if __name__ == "__main__":
    main()

Writing app.py


In [14]:
%%writefile app1.py
# this version of app allows to just upload several images

import os
import streamlit as st
import cv2
import numpy as np
from keras.models import load_model, Model
import keras.backend as K



def preprocess_image(img):
    """
    Converts image to shape (32, 128, 1) & normalize
    """
    w, h = img.shape
    new_w = 32
    new_h = int(h * (new_w / w))
    img = cv2.resize(img, (new_h, new_w))
    w, h = img.shape

    img = img.astype('float32')

    if w < 32:
        add_zeros = np.full((32 - w, h), 255)
        img = np.concatenate((img, add_zeros))
        w, h = img.shape

    if h < 128:
        add_zeros = np.full((w, 128 - h), 255)
        img = np.concatenate((img, add_zeros), axis=1)
        w, h = img.shape

    if h > 128 or w > 32:
        dim = (128, 32)
        img = cv2.resize(img, dim)

    img = cv2.subtract(255, img)

    img = np.expand_dims(img, axis=2)

    img = img / 255

    return img

char_set = "!\"#&'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"


def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args

    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


def perform_ocr(img_path):
    try:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = preprocess_image(img)

        loaded_model = load_model('/kaggle/input/model/ocr_word_model_1.h5',
                                  custom_objects={'<lambda>': ctc_lambda_func})
        dense_layer = loaded_model.get_layer('dense')
        prediction_model = Model(inputs=loaded_model.input, outputs=dense_layer.output)

        test_prediction = prediction_model.predict(
            [np.array([img]), np.zeros((1, 19)), np.ones((1, 1)) * 31, np.ones((1, 1)) * 19])
        test_decoded = K.ctc_decode(test_prediction,
                                    input_length=np.ones(test_prediction.shape[0]) * test_prediction.shape[1],
                                    greedy=True)[0][0]
        test_out = K.get_value(test_decoded)[0]

        predicted_text = ''.join([char_set[int(p)] for p in test_out if int(p) != -1])

        return predicted_text, img
    except:
        return "Error processing image", None


# function to perform OCR on all images
def perform_ocr_on_folder(folder_path):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".png"):
            img_path = os.path.join(folder_path, filename)
            predicted_text, img = perform_ocr(img_path)
            results.append((filename, predicted_text, img))
    return results


# actual Streamlit app
def main():
    st.title("OCR Streamlit App")

    uploaded_files = st.file_uploader("Choose a folder...", type="png", accept_multiple_files=True)

    if uploaded_files is not None:
        st.write("Classifying...")

        # again create a temporary folder to save uploaded images
        temp_folder_path = "temp_folder"
        os.makedirs(temp_folder_path, exist_ok=True)

        # save the uploaded files temporarily
        for uploaded_file in uploaded_files:
            with open(os.path.join(temp_folder_path, uploaded_file.name), "wb") as f:
                f.write(uploaded_file.getvalue())

        # perform OCR on all images in the temporary folder
        results = perform_ocr_on_folder(temp_folder_path)

        # here we display results
        for filename, predicted_text, img in results:
            st.success(f"Image: {filename}, Predicted Text: {predicted_text}")
            st.image(img, caption=f"Image: {filename}", use_column_width=True)

        # remove the temporary folder and files
        for filename in os.listdir(temp_folder_path):
            os.remove(os.path.join(temp_folder_path, filename))
        os.rmdir(temp_folder_path)


if __name__ == "__main__":
    main()


Writing app1.py


To be able to use Streamlit in Kaggle, the following steps were necessary:<br>
(It took me a considerable amount of time to find a solution for running the Streamlit app in Kaggle, and eventually, I came across this solution online.)

Download ngrok: <br>
(Ngrok is a tunneling and reverse proxy service that creates secure tunnels from a public endpoint to a locally running web service. Here it is used to make a locally hosted Streamlit app accessible through a temporary public URL.)

In [4]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip

--2024-01-16 21:35:51--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 54.237.133.81, 54.161.241.46, 52.202.168.65, ...
Connecting to bin.equinox.io (bin.equinox.io)|54.237.133.81|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13921656 (13M) [application/octet-stream]
Saving to: 'ngrok-stable-linux-amd64.zip'


2024-01-16 21:35:52 (35.2 MB/s) - 'ngrok-stable-linux-amd64.zip' saved [13921656/13921656]



Unzip it:

In [5]:
!unzip ngrok-stable-linux-amd64.zip

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


Set ngrok Authentication Token:

In [16]:
!./ngrok authtoken 2auQKZnO9DU41lxvWc2hYJxEiEk_6bVABk44WKLu3wzstmo8a

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


Start ngrok to expose the Streamlit app:
(This starts ngrok in the background to expose the local Streamlit app on port 8501 to a public URL.)

In [17]:
get_ipython().system_raw('./ngrok http 8501 &')

Here curl is used to retrieve information about the ngrok tunnel and extract the public URL from the response.

In [18]:
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    'import sys, json; print("Execute the next cell and then go to the following URL: " +json.load(sys.stdin)["tunnels"][0]["public_url"])'

Execute the next cell and then go to the following URL: http://a93e-34-136-109-26.ngrok-free.app


**Finally we can run app.py :)**

You can choose between app.py and app1.py.<br>
The difference is that in app.py, you can upload only one image, while in app1.py, you can upload several images

In [19]:
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.136.109.26:8501[0m
[0m
^C
[34m  Stopping...[0m


In [15]:
!streamlit run app1.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.136.109.26:8501[0m
[0m
^C
[34m  Stopping...[0m
