<a href="https://colab.research.google.com/github/VidyaGanes/Hackathon/blob/main/Streamlit_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install streamlit transformers python-docx



In [5]:
from docx import Document
import streamlit as st
from transformers import pipeline, set_seed
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
# Function to extract text from a Word document
def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = []
    for paragraph in doc.paragraphs:
        text.append(paragraph.text)
    return '\n\n'.join(text)

# Function to split text into chunks
def chunk_text(text, chunk_size=500):
    chunks = []
    words = text.split()
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) < chunk_size:
            current_chunk.append(word)
            current_length += len(word) + 1  # +1 for the space
        else:
            chunks.append(' '.join(current_chunk))
            current_chunk = [word]
            current_length = len(word)

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

# Initialize the Streamlit app
def main():
    st.title('Lost and Found: GenAI Application')
    st.markdown("""
    This web app generates responses based on prompts using the Gemini model and relevant context from a document.
    """)

    # Extract text from the Word document
    input_text = extract_text_from_docx('hackathon_input.docx')
    text_chunks = chunk_text(input_text)

    # Initialize the Gemini model
    set_seed(42)
    genai_model = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")

    # Function to find relevant chunk based on prompt
    def find_relevant_chunk(prompt):
        prompt_vector = vectorizer.transform([prompt])
        chunk_scores = cosine_similarity(prompt_vector, chunk_vectors).flatten()
        max_score_index = np.argmax(chunk_scores)
        return text_chunks[max_score_index]

    # Function to generate response based on relevant chunk
    def generate_response(prompt, relevant_chunk, max_length=150):
        combined_prompt = f"{prompt}\n\nContext: {relevant_chunk}"
        return genai_model(combined_prompt, max_length=max_length, do_sample=True)[0]['generated_text']

    # Vectorize chunks for cosine similarity
    vectorizer = TfidfVectorizer(stop_words='english')
    chunk_vectors = vectorizer.fit_transform(text_chunks)

    # Input prompt
    prompt = st.text_area('Enter your prompt:', '')

    if st.button('Generate Response'):
        if prompt:
            # Find the most relevant chunk based on the prompt
            relevant_chunk = find_relevant_chunk(prompt)

            # Generate response based on the prompt and relevant chunk
            response = generate_response(prompt, relevant_chunk)

            # Display response
            st.markdown(f'**Response:** \n{response}')

if __name__ == '__main__':
    main()

2024-07-15 11:03:49.366 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

ValueError: empty vocabulary; perhaps the documents only contain stop words

In [7]:
!transformers-cli login

2024-07-15 11:24:58.224439: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-15 11:24:58.224565: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-15 11:24:58.375471: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[1m[31mERROR! `huggingface-cli login` uses an outdated login mechanism that is not compatible with the Hugging Face Hub backend anymore. Please use `huggingface-cli login instead.[0m


In [8]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineGrained).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in yo

In [12]:
pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6


In [13]:
from pyngrok import ngrok

# Start a Streamlit server on Colab
public_url = ngrok.connect(port='8501')

# Open the link to the Streamlit app
public_url



ERROR:pyngrok.process.ngrok:t=2024-07-15T11:30:03+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-07-15T11:30:03+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-07-15T11:30:03+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [14]:
from pyngrok import ngrok

# Replace 'your_ngrok_authtoken' with your actual ngrok authtoken
ngrok.set_auth_token('2jHU1xGXEWcMQIZequRH2uxKl4i_5371A9CbCpCczs11W1WA1')

In [16]:
!pip install -q pyngrok==5.1.0

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/745.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.6/745.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m532.5/745.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m745.3/745.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone


In [17]:
from pyngrok import ngrok

# Start a Streamlit server on Colab
public_url = ngrok.connect(addr='8501', proto='http')

# Open the link to the Streamlit app
public_url

<NgrokTunnel: "https://c2c7-34-105-86-38.ngrok-free.app" -> "http://localhost:8501">

In [19]:
!streamlit run Streamlit_test.ipynb &

# Connect ngrok to the Streamlit port
public_url = ngrok.connect(addr='8501', proto='http')

# Display the public URL
public_url

Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Streamlit requires raw Python (.py) files, not .ipynb.
For more information, please see https://docs.streamlit.io


<NgrokTunnel: "https://f7a3-34-105-86-38.ngrok-free.app" -> "http://localhost:8501">