In [1]:
!pip install streamlit
!pip install pyngrok
!pip install praw
!pip install easyocr
!pip install deep-translator
!pip install pillow

Collecting streamlit
  Downloading streamlit-1.40.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.40.0-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [3

In [2]:
%%writefile app_combined3.py

import streamlit as st
import joblib
import pandas as pd
import praw
import easyocr
from PIL import Image
from deep_translator import GoogleTranslator
import requests
from io import BytesIO
from collections import Counter
import google.generativeai as genai

# Load the saved logistic regression model and vectorizer
model = joblib.load('LRmodel.pkl')
vectorizer = joblib.load('LRvectorizer.pkl')

# Initialize OCR readers for supported language combinations
reader_en_hi = easyocr.Reader(['en', 'hi'])  # Hindi + English
reader_en_bn = easyocr.Reader(['en', 'bn'])  # Bengali + English
reader_en_te = easyocr.Reader(['en', 'te'])  # Telugu + English
reader = easyocr.Reader(['en', 'es', 'fr', 'de'])  # Independent languages without the need for 'en'

"""
Supported Languages:
en: English
hi: Hindi
bn: Bengali
te: Telugu (with English)
es: Spanish
fr: French
de: German
"""

reddit = praw.Reddit(client_id='DAOso5_7CHzXzdtd-070fg',
                     client_secret='JtdGFRDM10avSQFYthzYUQNfLeI8rQ',
                     user_agent='Mental Health')

# Configure the Gemini API for wellbeing mapping
genai.configure(api_key="AIzaSyD-pu0AuG2dbzzspRfgS8DjO10Ffh08JiU")
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}
gemini_model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
)

# Function to fetch text-based posts from Reddit
def fetch_user_text_posts(username):
    try:
        user = reddit.redditor(username)
        posts = [post.title + " " + post.selftext for post in user.submissions.new(limit=20)]
        return posts
    except Exception as e:
        st.write(f"Error fetching text posts: {e}")
        return []

# Function to fetch image-based posts from Reddit and perform OCR
def fetch_user_images_and_extract_text(username):
    try:
        user = reddit.redditor(username)
        images = [post.url for post in user.submissions.new(limit=20) if post.url.endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tiff'))]

        extracted_texts = []
        for image_url in images:
            try:
                response = requests.get(image_url)
                image = Image.open(BytesIO(response.content))
                st.image(image, caption="Fetched Image", use_column_width=True)

                # Extract text from image
                extracted_text = extract_text_from_image(image, 'en')  # Default language is 'en'
                extracted_text = "\n".join(extracted_text)

                # Translate to English if needed
                if extracted_text.strip():
                    translated_text = GoogleTranslator(source='auto', target='en').translate(extracted_text)
                    extracted_texts.append(translated_text)
                    st.write("Extracted and Translated Text from Image:")
                    st.text(translated_text)
            except Exception as e:
                st.write(f"Error processing image {image_url}: {e}")

        return extracted_texts
    except Exception as e:
        st.write(f"Error fetching images: {e}")
        return []

# Function to classify text and display result
def classify_text(text):
    input_vectorized = vectorizer.transform([text])
    prediction_proba = model.predict_proba(input_vectorized)

    issue_labels = model.classes_
    proba_df = pd.DataFrame(prediction_proba, columns=issue_labels).T
    proba_df.columns = ['Probability']

    top_issue = proba_df['Probability'].idxmax()
    top_probability = proba_df['Probability'].max()

    st.write(f"The most likely mental health concern is: {top_issue} with a probability of {top_probability:.2%}")

    # Call the Gemini model to get well-being insights
    get_wellbeing_insight(text, top_issue)

# Function to get well-being insights from Gemini model
def get_wellbeing_insight(text, top_issue):
    try:
        # Assuming you have a Gemini model API or client setup
        chat_session = gemini_model.start_chat(history=[])
        prompt = f"Analyze the following text for mental wellbeing insights related to {top_issue}: {text}. Based on this, provide practical advice or actions the user can take to reduce or improve {top_issue}. Be supportive and provide actionable suggestions."
        response = chat_session.send_message(prompt)

        st.write("### Wellbeing Insight:")
        st.write(response.text)
    except Exception as e:
        st.write(f"Error retrieving wellbeing insights: {e}")

# Function to extract text from image based on the language
def extract_text_from_image(image, language):
    if language == "te":  # Telugu requires 'en' and 'te'
        reader_to_use = reader_en_te
    elif language == "hi":  # Hindi with English
        reader_to_use = reader_en_hi
    elif language == "bn":  # Bengali with English
        reader_to_use = reader_en_bn
    else:
        reader_to_use = reader  # Default reader for languages like Kannada, Marathi, etc.

    # Perform OCR
    extracted_text = reader_to_use.readtext(image, detail=0)
    return extracted_text

# Define the Streamlit app
def run_app():
    st.title("Mental Health Classifier App")

    # Option to choose functionality
    option = st.sidebar.selectbox(
        "Choose an option",
        ["Text Input", "Image Upload", "Reddit Username Analysis"]
    )

    # 1. Text Input
    if option == "Text Input":
        st.subheader("Enter Text to Classify Mental Health Issue")
        input_text = st.text_area("Enter your text here:")

        if st.button("Classify Text"):
            if input_text.strip() == "":
                st.write("Please enter some text to classify.")
            else:
                # Translate if not in English
                translated_text = GoogleTranslator(source='auto', target='en').translate(input_text)
                st.write("Translated Text (to English):")
                st.write(translated_text)

                # Classify and display result
                classify_text(translated_text)

    # 2. Image Upload
    elif option == "Image Upload":
        st.subheader("Upload an Image to Extract and Classify Text")
        uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png", "webp", "bmp", "tiff"])

        if uploaded_image is not None:
            image = Image.open(uploaded_image)
            st.image(image, caption="Uploaded Image", use_column_width=True)

            # Extract text from image
            extracted_text = extract_text_from_image(image, 'en')  # Default to 'en'
            extracted_text = "\n".join(extracted_text)

            st.subheader("Extracted Text")
            st.text(extracted_text)

            # Translate text to English if needed
            translated_text = GoogleTranslator(source='auto', target='en').translate(extracted_text)
            st.subheader("Translated Text (to English)")
            st.text(translated_text)

            if st.button("Classify Extracted Text"):
                classify_text(translated_text)

    # 3. Reddit Username Analysis
    elif option == "Reddit Username Analysis":
        st.subheader("Enter Reddit Username for Analysis")
        username = st.text_input("Enter Reddit username:")

        if st.button("Analyze"):
            if username.strip() == "":
                st.write("Please enter a Reddit username.")
            else:
                # Fetch and display text posts
                text_posts = fetch_user_text_posts(username)
                if text_posts:
                    st.write("Recent Text Posts:")
                    st.write(text_posts[:3])  # Display a few posts for review

                # Fetch and display image-based posts with extracted text
                image_texts = fetch_user_images_and_extract_text(username)

                # Combine text from both text posts and image text
                all_text = text_posts + image_texts
                if all_text:
                    predictions = []
                    for text in all_text:
                        # Vectorize and classify each post
                        input_vectorized = vectorizer.transform([text])
                        prediction = model.predict(input_vectorized)
                        predictions.append(prediction[0])

                    # Count the most common mental health issue
                    issue_counts = Counter(predictions)
                    top_issue, top_count = issue_counts.most_common(1)[0]
                    top_percentage = (top_count / len(predictions)) * 100

                    st.write(f"The most frequently detected mental health concern is: {top_issue} appearing in {top_percentage:.2f}% of analyzed text.")
                    issue_distribution = pd.DataFrame(issue_counts.items(), columns=['Mental Health Issue', 'Count'])
                    st.write("Mental health issue distribution across posts:")
                    st.write(issue_distribution)

                    # Call the Gemini model to get well-being insights
                    get_wellbeing_insight(" ".join(all_text), top_issue)
                else:
                    st.write("No valid text found for analysis.")

# Run the app
if __name__ == '__main__':
    run_app()


Writing app_combined3.py


In [5]:
# Import ngrok
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("2ohUKqk37HcGbvwN0s8Y1E2WNxE_39z1gVF3bYq9vFSEm7Wzq") # Replace YOUR_AUTHTOKEN with your actual authtoken

# Kill any existing ngrok processes
ngrok.kill()

# Start Streamlit with nohup
!nohup streamlit run app_combined3.py &

# Create a public URL with ngrok to access the app
public_url = ngrok.connect(addr='8501')
print(f"Public URL: {public_url}")

nohup: appending output to 'nohup.out'
Public URL: NgrokTunnel: "https://852b-34-80-184-7.ngrok-free.app" -> "http://localhost:8501"


In [6]:
ngrok.kill()