In [None]:
#!pip install pypdf
#!pip install -q langchain
#!pip install langchain_community
#!pip install -U sentence-transformers
print("""
This script performs the following steps:
1. Reads text from a PDF file named 'LCD controller ST7066U.pdf'.
2. Splits the extracted text into smaller chunks using `RecursiveCharacterTextSplitter`.
3. Uses the `SentenceTransformerEmbeddings` model 'all-MiniLM-L6-v2' to create embeddings for each text chunk.
4. Prompts the user to enter a search keyword.
5. Creates an embedding for the user's keyword.
6. Calculates the cosine similarity between the keyword embedding and all the document chunk embeddings.
7. Identifies and prints the top 3 most similar document chunks along with their similarity scores.
""")

In [None]:
## Read pdf as test
from pypdf import PdfReader
reader = PdfReader('/content/LCD controller ST7066U.pdf')
number_of_pages = len(reader.pages)
all_text = ""  # Initialize an empty string to store text from all pages

for page_num in range(number_of_pages):
  page = reader.pages[page_num]
  text = page.extract_text()
  all_text += text  # Append text from the current page to the all_text string

print("Text from all pages:")
print(all_text)

In [None]:
# Break down the text into smaller, manageable chunks using LangChain
from langchain.text_splitter import RecursiveCharacterTextSplitter

# This is the text that we are going to split
#with open('state_of_the_union.txt') as f:
state_of_the_union = all_text

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show there's a limit
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
)

texts = text_splitter.create_documents([state_of_the_union])
print(texts[0].page_content)
texts[1].page_content

In [None]:
# vector embeddings using sentence-transformers

#!pip install -U sentence-transformers
from langchain_community.embeddings import SentenceTransformerEmbeddings

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# embed the documents
embeddings = embedding_function.embed_documents([text.page_content for text in texts])

print(f"Found {len(embeddings)} documents")
print(f"Here's a sample of the first document: {embeddings[0][:5]}...")

Popular Models (available via sentence-transformers):
*   `all-MiniLM-L6-v2`: Very fast, good quality, small model size. Excellent general-purpose starting point.
*   `all-mpnet-base-v2`: Higher quality than MiniLM, slightly larger and slower. Often a top performer on leaderboards.
*   `multi-qa-mpnet-base-dot-v1`: Excellent for semantic search/question answering retrieval tasks (use dot-product similarity).
*   `e5-large-v2` (or other `e5` variants like `multilingual-e5-large`): Often state-of-the-art open-source models.
*   `bge-large-en-v1.5` (or other BGE variants): Strong open-source models from 

In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

#create open source embedding functon
embed_input = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

input_keyword = input("Enter the keyword to search")

# 1. Embed the Query
query_embedding = embed_input.embed_query(input_keyword)

# 2. Calculate Similarity (using cosine similarity as an example)
# Reshape the query_embedding to a 2D array for cosine_similarity
query_embedding_2d = [query_embedding]

# Calculate cosine similarity between the query embedding and all document embeddings
similarities = cosine_similarity(query_embedding_2d, embeddings)[0]

# 3. Rank and Retrieve
# Get the indices of the top similar documents
top_n = 3  # You can adjust the number of top results you want
top_indices = similarities.argsort()[-top_n:][::-1]

print(f"\nTop {top_n} most similar document chunks for the keyword '{input_keyword}':")
for index in top_indices:
    print(f"Similarity: {similarities[index]:.4f}")
    print(texts[index].page_content)
    print("-" * 20)

In [None]:
# Assuming you have the necessary libraries for interacting with Gemini already installed
from google.colab import auth
#auth.authenticate_user()

import google.generativeai as genai

# Replace "YOUR_API_KEY" with your actual API key for Gemini
genai.configure(api_key="your API Key")

# Initialize the Gemini model
#print("Available models:")
#for m in genai.list_models():
#  if 'generateContent' in m.supported_generation_methods:
#    print(m.name)

#models/gemini-2.0-flash
model = genai.GenerativeModel('gemini-2.0-flash')
# Get the text from the top similar document chunks based on the previously calculated top_indices
top_texts = [texts[index].page_content for index in top_indices]

# Combine the top texts into a single context string
context = " ".join(top_texts)

# Construct the prompt for the Gemini model
# This prompt instructs the model to answer the question based *only* on the provided context.
prompt = f"""Based on the following context, answer the question: '{input_keyword}'

Context:
{context}

If the information is not available in the context, please state that you cannot answer based on the provided information.
"""

# Send the prompt to the Gemini model and get the response
# This is a conceptual example; the actual function call might vary depending on the Gemini library.
response = model.generate_content(prompt)

# Print the response from the Gemini model
print("\nGemini Response based on context:")
print(response.text)

Gemini Response based on context:
Based on the provided context, you can control the cursor in the following ways:

*   **Cursor ON/OFF:** Use the "C" bit (8th line). Setting C to "High" turns the cursor on.
*   **Moving direction:** You can set the moving direction of the cursor (and display) using the "Entry Mode Set".
*   **Cursor shift:** You can shift the cursor.

The context also mentions that you can bring the cursor to the left edge without changing anything else. However, it doesn't specify *how* to do this, only that it's possible. The context also references other potential controls, such as cursor blinking, but doesn't explain how to achieve them.
