In [None]:
!pip install streamlit pytesseract pillow speechrecognition pypdf2 transformers torch pydub langdetect googletrans==4.0.0-rc1

Collecting streamlit
  Downloading streamlit-1.44.0-py3-none-any.whl.metadata (8.9 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting speechrecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Collecting pypdf2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletran

In [None]:
# Install necessary libraries
%%writefile app.py
import streamlit as st
import pytesseract
from PIL import Image
import speech_recognition as sr
from PyPDF2 import PdfReader
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from pydub import AudioSegment
import os
import io
import langdetect
import cv2
import numpy as np
from googletrans import Translator

# Load T5 model and tokenizer
MODEL_NAME = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)

# Ensure pytesseract can find Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract"  # Update to your Tesseract path

translator = Translator()

# Function to clean and enhance images for better OCR
def preprocess_image(image):
    gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return Image.fromarray(thresh)

# Function to detect language
def detect_language(text):
    try:
        return langdetect.detect(text)
    except:
        return "unknown"

# Function to translate text
def translate_text(text, target_lang="en"):
    try:
        translated = translator.translate(text, dest=target_lang)
        return translated.text
    except Exception as e:
        return f"Translation Error: {e}"

# Function to summarize text using T5 model
def summarize_text(text, max_length=150, min_length=30, num_beams=4):
    if not text.strip():
        return "No valid text provided for summarization."

    input_text = "summarize: " + text
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    with st.spinner("Generating summary..."):
        summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=num_beams, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

# Function to extract text from an image
def extract_text_from_image(image_file):
    image = Image.open(image_file)
    image = preprocess_image(image)
    with st.spinner("Extracting text from image..."):
        extracted_text = pytesseract.image_to_string(image)
    return extracted_text

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
    extracted_text = ""
    try:
        with st.spinner("Extracting text from PDF..."):
            reader = PdfReader(pdf_file)
            for page in reader.pages:
                extracted_text += page.extract_text() or ""
        if not extracted_text.strip():
            return "No readable text found in the PDF."
    except Exception as e:
        return f"Error processing PDF: {e}"
    return extracted_text

# Function to extract text from audio
def extract_text_from_audio(audio_file):
    recognizer = sr.Recognizer()
    try:
        with st.spinner("Processing audio file..."):
            audio = AudioSegment.from_file(io.BytesIO(audio_file.read()))
            audio.export("converted_audio.wav", format="wav")

            with sr.AudioFile("converted_audio.wav") as source:
                audio_data = recognizer.record(source)
                text = recognizer.recognize_google(audio_data)

        os.remove("converted_audio.wav")  # Cleanup temporary file
        return text
    except Exception as e:
        return f"Error processing audio file: {e}"

# Streamlit Web Interface
def main():
    st.title("Adaptive Summarization Tool")
    st.write("Summarize Text, Images, PDFs, or Audio Files using the T5 Model.")

    # Sidebar settings
    st.sidebar.header("Choose Input Type")
    input_type = st.sidebar.selectbox("Select input type:", ["Text Input", "Image File", "PDF File", "Audio File"])

    st.sidebar.header("Model Settings")
    max_length = st.sidebar.slider("Max Summary Length", 50, 300, 150)
    min_length = st.sidebar.slider("Min Summary Length", 10, 100, 30)
    num_beams = st.sidebar.slider("Beam Search (Higher = Better, Slower)", 2, 10, 4)

    if input_type == "Text Input":
        st.header("Text Summarization")
        user_input = st.text_area("Enter the text you want to summarize:", "")
        target_lang = st.text_input("Enter target language code (e.g., 'en' for English):", "en")
        if st.button("Summarize"):
            if user_input:
                language = detect_language(user_input)
                st.write(f"Detected Language: {language}")
                translated_text = translate_text(user_input, target_lang)
                summary = summarize_text(translated_text, max_length, min_length, num_beams)
                with st.expander("Summary:"):
                    st.write(summary)
            else:
                st.warning("Please enter some text to summarize.")

    elif input_type == "Image File":
        st.header("Image Summarization")
        uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
        if uploaded_image:
            st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
            if st.button("Extract & Summarize Text"):
                extracted_text = extract_text_from_image(uploaded_image)
                language = detect_language(extracted_text)
                st.write(f"Detected Language: {language}")
                translated_text = translate_text(extracted_text, target_lang)
                with st.expander("Extracted Text:"):
                    st.write(extracted_text)
                summary = summarize_text(translated_text, max_length, min_length, num_beams)
                with st.expander("Summary:"):
                    st.write(summary)

if __name__ == "__main__":
    main()


Writing app.py


In [None]:
!pip install streamlit pytesseract pillow speechrecognition pypdf2 transformers torch pydub
!apt-get install -y tesseract-ocr ffmpeg


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 29 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 1s (9,232 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 126210 files and directo

In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [None]:
!ngrok authtoken 2tTQu2ArgMfx2bRNTTINr28VgsW_oaJek2gL18XbzXYyVnPa

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!streamlit run app.py &> logs.txt &

In [None]:
from pyngrok import ngrok

# Now, open the tunnel on port 8501
public_url = ngrok.connect(addr="8501")
print("Streamlit App URL:", public_url)


Streamlit App URL: NgrokTunnel: "https://c3d4-34-58-56-87.ngrok-free.app" -> "http://localhost:8501"
