### Simple ui to intract with the lstm-model

In [1]:
# Install required packages
!pip install streamlit pyngrok nltk

# Download NLTK data if needed
import nltk
nltk.download('punkt', quiet=True)

Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m93.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m122.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.3.0 streamlit-1.49.1


True

In [3]:
# Create the Streamlit app file
%%writefile app.py
import streamlit as st
import torch
import torch.nn.functional as F
import re
import pickle
import os
from collections import Counter
from torch.amp import autocast

# Custom dataset class for loading
class StreamlitBookDataset:
    def __init__(self, vocab_data):
        self.vocab = vocab_data['vocab']
        self.token_to_idx = vocab_data['token_to_idx']
        self.idx_to_token = vocab_data['idx_to_token']
        if 'seq_length' in vocab_data:
            self.seq_length = vocab_data['seq_length']
        else:
            self.seq_length = 100

    def tokenize_text(self, text):
        # Same tokenization as training
        tokens = re.findall(r'\b\w+\b|[^\w\s]', text)
        return [token.lower() for token in tokens if token.strip()]

# Enhanced model class for loading
class StreamlitTextGenerator(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim=256, hidden_dim=512, num_layers=3, dropout=0.3):
        super(StreamlitTextGenerator, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.vocab_size = vocab_size

        self.embedding = torch.nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = torch.nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=False
        )
        self.dropout = torch.nn.Dropout(dropout)
        self.layer_norm = torch.nn.LayerNorm(hidden_dim)
        self.fc1 = torch.nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc2 = torch.nn.Linear(hidden_dim // 2, vocab_size)
        self.relu = torch.nn.ReLU()

    def forward(self, x, hidden=None):
        embedded = self.embedding(x)
        lstm_out, hidden = self.lstm(embedded, hidden)
        lstm_out = self.layer_norm(lstm_out)
        lstm_out = self.dropout(lstm_out)
        out = self.relu(self.fc1(lstm_out))
        out = self.dropout(out)
        output = self.fc2(out)
        return output, hidden

def load_model_and_dataset(model_path, dataset_path):
    """Load trained model and dataset"""
    try:
        # Load dataset info
        with open(dataset_path, 'rb') as f:
            dataset_info = pickle.load(f)

        dataset = StreamlitBookDataset(dataset_info)

        # Load model
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        checkpoint = torch.load(model_path, map_location=device)

        model = StreamlitTextGenerator(
            vocab_size=len(dataset.vocab),
            embedding_dim=256,
            hidden_dim=512,
            num_layers=3
        )
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        model.eval()

        return model, dataset, device
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return None, None, None

def intelligent_generate_text(model, dataset, seed_text, max_length=150, temperature=0.8, top_k=50):
    """Generate text using the trained model"""
    if model is None or dataset is None:
        return "Model not loaded properly"

    device = next(model.parameters()).device
    model.eval()

    try:
        # Tokenize seed text
        seed_tokens = dataset.tokenize_text(seed_text)
        if not seed_tokens:
            seed_tokens = ['the']

        seed_indices = [dataset.token_to_idx.get(token, dataset.token_to_idx.get('<UNK>', 0)) for token in seed_tokens]

        # Convert to tensor
        input_seq = torch.tensor([seed_indices], dtype=torch.long).to(device)
        generated_tokens = seed_tokens.copy()

        hidden = None
        with torch.no_grad():
            for _ in range(max_length):
                # Get prediction
                with autocast('cuda' if device.type == 'cuda' else 'cpu'):
                    output, hidden = model(input_seq, hidden)

                # Get logits for last token
                logits = output[0, -1] / temperature

                # Top-k sampling
                top_logits, top_indices = torch.topk(logits, min(top_k, len(logits)))
                probabilities = F.softmax(top_logits, dim=-1)

                # Sample from top-k
                try:
                    next_token_idx_local = torch.multinomial(probabilities, 1).item()
                    next_token_idx = top_indices[next_token_idx_local].item()
                except:
                    # Fallback to argmax if sampling fails
                    next_token_idx = torch.argmax(logits).item()

                # Convert back to token
                if next_token_idx < len(dataset.idx_to_token):
                    next_token = dataset.idx_to_token[next_token_idx]
                    if next_token not in ['<PAD>', '<UNK>']:
                        generated_tokens.append(next_token)
                else:
                    generated_tokens.append('<UNK>')

                # Update input sequence
                new_input = torch.tensor([[next_token_idx]], device=device)
                input_seq = torch.cat([input_seq, new_input], dim=1)

                # Keep reasonable context length
                if input_seq.size(1) > 200:
                    input_seq = input_seq[:, -200:]

        return ' '.join(generated_tokens)
    except Exception as e:
        return f"Error in generation: {str(e)}"

def format_answer(text, question):
    """Format the generated answer to be more relevant"""
    # Simple formatting - you can make this more sophisticated
    sentences = text.split('.')
    if len(sentences) > 3:
        # Return first 2-3 sentences
        return '. '.join(sentences[:3]) + '.'
    return text

# Streamlit UI
def main():
    st.set_page_config(
        page_title="Book Knowledge Assistant",
        page_icon="📚",
        layout="wide"
    )

    st.title("📚 Book Knowledge Assistant")
    st.markdown("Ask questions about your trained book content!")

    # Initialize session state
    if 'model_loaded' not in st.session_state:
        st.session_state.model_loaded = False
        st.session_state.model = None
        st.session_state.dataset = None
        st.session_state.device = None

    # Sidebar for model loading
    with st.sidebar:
        st.header("Model Configuration")

        model_file = st.file_uploader("Upload Model File (.pth)", type=['pth'])
        dataset_file = st.file_uploader("Upload Dataset File (.pkl)", type=['pkl'])

        if st.button("Load Model") and model_file and dataset_file:
            with st.spinner("Loading model..."):
                try:
                    # Save uploaded files temporarily
                    with open("/tmp/model.pth", "wb") as f:
                        f.write(model_file.getvalue())
                    with open("/tmp/dataset.pkl", "wb") as f:
                        f.write(dataset_file.getvalue())

                    # Load model and dataset
                    model, dataset, device = load_model_and_dataset("/tmp/model.pth", "/tmp/dataset.pkl")

                    if model is not None:
                        st.session_state.model_loaded = True
                        st.session_state.model = model
                        st.session_state.dataset = dataset
                        st.session_state.device = device
                        st.success("Model loaded successfully!")
                    else:
                        st.error("Failed to load model")
                except Exception as e:
                    st.error(f"Error: {str(e)}")

        if st.session_state.model_loaded:
            st.success("✅ Model Ready")
            st.info(f"Device: {st.session_state.device}")
            st.info(f"Vocabulary: {len(st.session_state.dataset.vocab)} tokens")

    # Main interface
    if not st.session_state.model_loaded:
        st.info("👈 Please upload and load your model files in the sidebar")
        st.markdown("""
        ### How to use:
        1. Upload your trained model file (.pth)
        2. Upload your dataset file (.pkl)
        3. Click 'Load Model'
        4. Ask questions in the main panel
        """)
        return

    # Question interface
    st.header("Ask Your Question")

    # Predefined questions
    predefined_questions = [
        "What are the main concepts discussed?",
        "Explain the key features",
        "How does this work?",
        "What are the benefits?",
        "Tell me more about this topic"
    ]

    selected_question = st.selectbox(
        "Choose a predefined question or type your own:",
        [""] + predefined_questions,
        key="predefined_q"
    )

    user_question = st.text_input(
        "Your Question:",
        value=selected_question if selected_question else "",
        placeholder="Enter your question about the book content..."
    )

    # Generation parameters
    col1, col2, col3 = st.columns(3)
    with col1:
        max_length = st.slider("Response Length", 50, 300, 150)
    with col2:
        temperature = st.slider("Creativity", 0.1, 1.5, 0.8)
    with col3:
        top_k = st.slider("Diversity (Top-K)", 10, 100, 50)

    if st.button("📝 Generate Answer", type="primary") and user_question:
        with st.spinner("Generating answer..."):
            try:
                # Generate response
                prompt = f"Question: {user_question} Answer:"
                generated_text = intelligent_generate_text(
                    st.session_state.model,
                    st.session_state.dataset,
                    prompt,
                    max_length=max_length,
                    temperature=temperature,
                    top_k=top_k
                )

                # Format and display answer
                formatted_answer = format_answer(generated_text, user_question)

                st.subheader("🤖 Generated Answer:")
                st.markdown(f"**{formatted_answer}**")

                # Show raw generation for debugging
                with st.expander("🔍 See raw generation"):
                    st.text(generated_text)

            except Exception as e:
                st.error(f"Error generating answer: {str(e)}")

    # Example usage
    st.markdown("---")
    st.markdown("### 💡 Tips:")
    st.markdown("""
    - Be specific with your questions
    - Try different creativity levels
    - Longer responses may take more time
    - The model works best with questions related to your book's content
    """)

if __name__ == "__main__":
    main()

Overwriting app.py


### Put ur ngrok Token

In [4]:
!ngrok config add-authtoken 31CCfPaNlvTntxLilA1mz9MSP38_6YQUypMVfgWAe2SspGUz8

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [5]:
# Run the Streamlit app with Ngrok tunnel
from pyngrok import ngrok
import streamlit as st
import subprocess
import threading
import time
import os

# Kill any existing ngrok processes
!pkill ngrok

# Set up Ngrok
ngrok.set_auth_token("31CCfPaNlvTntxLilA1mz9MSP38_6YQUypMVfgWAe2SspGUz8")  # Add your Ngrok auth token here if you have one

# Function to run Streamlit
def run_streamlit():
    os.system("streamlit run app.py --server.port 8501 --server.address 0.0.0.0")

# Start Streamlit in background thread
streamlit_thread = threading.Thread(target=run_streamlit)
streamlit_thread.daemon = True
streamlit_thread.start()

# Wait a moment for Streamlit to start
time.sleep(5)

# Create Ngrok tunnel
try:
    public_url = ngrok.connect(8501)
    print("✅ Streamlit app is running!")
    print(f"🔗 Public URL: {public_url}")
    print("📝 To use the app:")
    print("   1. Click the URL above")
    print("   2. Upload your model (.pth) and dataset (.pkl) files")
    print("   3. Ask questions about your book content")
    print("\n⚠️  Keep this cell running to maintain the web interface!")

    # Display QR code for easy access (optional)
    try:
        import qrcode
        from PIL import Image
        qr = qrcode.QRCode(version=1, box_size=10, border=5)
        qr.add_data(str(public_url))
        qr.make(fit=True)
        img = qr.make_image(fill_color="black", back_color="white")
        img.save("/content/qr_code.png")
        print("📱 QR code saved as 'qr_code.png'")
    except:
        pass

except Exception as e:
    print(f"❌ Error creating Ngrok tunnel: {e}")
    print("🔧 Trying alternative method...")

    # Alternative: Display local URL
    print("🌐 Access the app locally at: http://localhost:8501")
    print("💡 If you want public access, get a free Ngrok account at https://ngrok.com")

ERROR:pyngrok.process.ngrok:t=2025-08-30T05:28:09+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: The account 'Erehn' has been suspended.\nThis is usually the result of violating the ngrok Terms of Service.\nEmail support@ngrok.com if you think your suspension is an error.\r\n\r\nERR_NGROK_103\r\n"


❌ Error creating Ngrok tunnel: The ngrok process errored on start: authentication failed: The account 'Erehn' has been suspended.\nThis is usually the result of violating the ngrok Terms of Service.\nEmail support@ngrok.com if you think your suspension is an error.\r\n\r\nERR_NGROK_103\r\n.
🔧 Trying alternative method...
🌐 Access the app locally at: http://localhost:8501
💡 If you want public access, get a free Ngrok account at https://ngrok.com
