In [6]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.docstore.document import Document
import warnings
import getpass
import PyPDF2
import ollama
warnings.filterwarnings('ignore')

In [20]:
from IPython.display import Markdown, display
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # setting protobuf env vars up
os.environ["LANGSMITH_API_KEY"] = "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"
os.environ["LANGSMITH_TRACING"] = "true"

In [3]:
file_path = "research_paper_final.pdf"
loader = UnstructuredPDFLoader(file_path)
data = loader.load()
data[0]

Document(metadata={'source': 'research_paper_final.pdf'}, page_content="From Signs to Speech: An End-to-End Conversational Platform for Deaf and Mute Individuals Using GRU and LLM Integration\n\nAryan Chauhan Department of Electrical and Electronic Engineering MIT World Peace University Pune, India chauhanaryan381@gmail.com\n\nAbdulqadir Kayamkhani Department of Electrical and Electronic Engineering MIT World Peace University Pune, India abdulqadirk2153@gmail.com\n\nAtharva Gujar Department of Electrical and Electronic Engineering MIT World Peace University Pune, India atharva.gujar7488@gmail.com\n\nManisha Kumawat Department of Electrical and Electronic Engineering MIT World Peace University Pune, India manisha.kumawat@mitwpu.edu.in\n\nMandar Gade\n\nDepartment of Electrical and Electronic Engineering MIT World Peace University Pune, India mandargade724@gmail.com\n\nindividuals are often disadvantaged in professional interview settings due to limited verbal relevant qualifications. Th

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(data)
print(f"The paper is split into {len(chunks)} chunks.")

The paper is split into 41 chunks.


In [7]:
# Step 3: Create lightweight TF-IDF embeddings
texts = [doc.page_content for doc in chunks]
vectorizer = TfidfVectorizer()
_ = vectorizer.fit(texts)

# Define lightweight embedding class
class TFIDFEmbeddings(Embeddings):
    def embed_documents(self, docs):
        return vectorizer.transform(docs).toarray()

    def embed_query(self, query):
        return vectorizer.transform([query]).toarray()[0]

# Step 4: Create VectorStore using FAISS (or Chroma if needed)
embedding_model = TFIDFEmbeddings()
vectorstore = FAISS.from_documents(chunks, embedding_model)
retriever = vectorstore.as_retriever()

print("✅ Lightweight vector store created.")

✅ Lightweight vector store created.


In [8]:
local_model = "llama3.2"
llm = ChatOllama(model=local_model)

In [10]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2 different versions of the given user questions to retrieve relevant documents
    from the vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. 
    provide these new alternative questions on a new line.
    Original question: {question}"""
)

retriever = MultiQueryRetriever.from_llm(
    vectorstore.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

In [12]:
# rag prompt template
template = """Answer the questions based ONLY on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [13]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [21]:
def chat(question):
    ans = display(Markdown(chain.invoke(question)))
    return ans

In [22]:
chat("Who is the author of the paper")

Unfortunately, the provided context does not mention the author of the paper. It appears to be a research paper discussing a system for real-time communication between deaf-mute individuals and interviewers, but the identity of the authors is not mentioned.

In [19]:
chat("Give me a detailed summary of the entire research paper")

The research paper proposes a real-time sign language recognition system that translates sign gestures into both text and speech, enabling effective interaction between interviewers and deaf-mute individuals in professional interviews.

The system architecture consists of several components:

1. Computer Vision: The user activates their webcam stream, which is processed using OpenCV and MediaPipe for landmark detection. Specifically, the system focuses on the hands and face regions to extract key points for gesture recognition.
2. Deep Learning Model: The extracted landmarks are passed into a trained Gated Recurrent Unit (GRU) model, optimized through experimentation for temporal classification of custom technical signs. The GRU model achieves a training accuracy of 97% and a test accuracy of 94%.
3. Language Model: The output of the GRU model is translated into a preliminary sentence, which is then refined by a locally hosted Ollama LLM. The language model improves the sentence structure, adds missing functional words, and outputs a coherent sentence.
4. Text-to-Speech Engine: The final message is delivered back to the interviewer interface, where it is converted into audio using the pyttsx3 text-to-speech engine.

The system architecture is designed to enable smooth, real-time communication between deaf-mute individuals and interviewers through a modular, interactive web platform. The proposed framework supports both roles with role-specific processing pipelines that integrate computer vision, deep learning, natural language processing, and cloud-based communication.

To enhance grammatical fluency and coherence, the system uses a conversion dictionary that maps numeric class indices to textual words. The converted text is then passed into the language processing module for correction and refinement.

The system successfully delivers a robust and fully functional end-to-end system that bridges the communication gap between deaf-mute individuals and interviewers through real-time sign language recognition and speech synthesis.

Future improvements are suggested, including:

* Expanding the vocabulary beyond 10 technical signs
* Enabling continuous sign sentence recognition
* Incorporating training with language datasets and dynamic sign Transformer-based architectures
* Multilingual support for text-to-speech and sign-to-text translation
* Integration with mobile platforms and edge devices
* User-specific adaptation

The research paper also presents experimental results, including real-time testing on various devices and lighting conditions, which confirms the suitability of the GRU model for conversational use.

Overall, the research paper proposes a novel approach to bridge the communication gap between deaf-mute individuals and interviewers through real-time sign language recognition and speech synthesis.

In [30]:
def pdf(question, file_path):
    loader = UnstructuredPDFLoader(file_path)
    data = loader.load()
    print("Data has been loaded.")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(data)
    print(f"The paper is split into {len(chunks)} chunks.")
    # Step 3: Create lightweight TF-IDF embeddings
    texts = [doc.page_content for doc in chunks]
    vectorizer = TfidfVectorizer()
    _ = vectorizer.fit(texts)
    
    # Define lightweight embedding class
    class TFIDFEmbeddings(Embeddings):
        def embed_documents(self, docs):
            return vectorizer.transform(docs).toarray()
    
        def embed_query(self, query):
            return vectorizer.transform([query]).toarray()[0]
    
    # Step 4: Create VectorStore using FAISS (or Chroma if needed)
    embedding_model = TFIDFEmbeddings()
    vectorstore = FAISS.from_documents(chunks, embedding_model)
    retriever = vectorstore.as_retriever()
    
    print("✅ Lightweight vector store created.")
    
    local_model = "llama3.2"
    llm = ChatOllama(model=local_model)
    print("Model has been loaded")
    
    QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2 different versions of the given user questions to retrieve relevant documents
    from the vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. 
    provide these new alternative questions on a new line.
    Original question: {question}"""
    )
    print("Retriever has been loaded")
    
    retriever = MultiQueryRetriever.from_llm(
        vectorstore.as_retriever(),
        llm,
        prompt=QUERY_PROMPT
    )
    template = """Answer the questions based ONLY on the following context: 
    {context}
    Question: {question}
    """
    print("Model thinking...\n\n\n")
    prompt = ChatPromptTemplate.from_template(template)
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    
    ans = display(Markdown(chain.invoke(question)))
    return ans

question = "Tell me in detail in 10 words what is in this pdf?"
file_path = "research_paper_final.pdf"
pdf(question, file_path)

Data has been loaded.
The paper is split into 41 chunks.
✅ Lightweight vector store created.
Model has been loaded
Retriever has been loaded
Model thinking...





Sign language recognition system with speech synthesis and Firebase integration.