In [None]:
!pip install -q langchain-groq langchain chromadb sentence-transformers unstructured
!pip install -U langchain-community
!pip install -U langchain


In [None]:
from langchain_groq import ChatGroq

# For security, consider using environment variables instead of hardcoding keys
import os
groq_api_key = os.getenv("GROQ_API_KEY", "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")  # Replace with your actual key

llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)



Mounted at /content/drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pypdf
!pip install -U langchain-community

In [35]:
import os
from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader

# Step 2: Define document loading function
def load_documents_from_directory(directory_path):
    documents = []
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())
        elif filename.endswith('.txt'):
            loader = UnstructuredFileLoader(file_path)
            documents.extend(loader.load())
    return documents

# Step 3: Set your actual folder path inside Google Drive
directory_path = "/content/drive/MyDrive/Culture_and_Quiz"

# Step 4: Load documents
documents = load_documents_from_directory(directory_path)

# Preview the first doc (if loaded)
if documents:
    print(documents[0].page_content[:500])
else:
    print("No documents found in the specified path.")

Performing Arts: Music, Dance and Drama
Notes
Indian Culture and Heritage Secondary Course 168
MODULE - V
Painting,
Performing Arts
and Architecture
12
PERFORMING ARTS: MUSIC,
DANCE AND DRAMA
M
usic, dance, drama, folk theatre or puppetry our country India, had all of it in
abundance. Oh! somebody playing the Dhol (music instrument) and there is
music, we run to see what it is. Well, it is Lohri which is normally held on 13th
January every year in the Northern part of our country. People are sin


In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Chunking
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store in ChromaDB
chroma_dir = "/content/drive/MyDrive/Culture_and_Quiz_db"
vectordb = Chroma.from_documents(docs, embedding=embedding_model, persist_directory=chroma_dir)
vectordb.persist()

print("✅ Chroma DB created and persisted.")

✅ Chroma DB created and persisted.


  vectordb.persist()


In [37]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import chromadb

# 1. First clean up any existing database files
!rm -rf "/content/drive/MyDrive/Culture_and_Quiz_db"

# 2. Recreate with proper configuration
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
persist_directory = "/content/drive/MyDrive/Culture_and_Quiz_db"

# 3. Create Chroma client with explicit settings
client = chromadb.PersistentClient(
    path=persist_directory,
    settings=chromadb.Settings(
        allow_reset=True,
        is_persistent=True,
        anonymized_telemetry=False
    )
)

# 4. Initialize fresh vectorstore
vectorstore = Chroma(
    client=client,
    collection_name="culture_quiz",
    embedding_function=embedding,
    persist_directory=persist_directory
)

# 5. Now recreate your vector database using previous code
# (Re-run your document ingestion code here)
# create_vector_db(...)

# 6. Then retrieve
retriever = vectorstore.as_retriever()



In [38]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 3. Define the prompt template
template = """You are a an expert on Indian culture and heritage answering questions based on the given context.

Context:
{context}

Question:
{question}

Answer:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

# 4. Set up QA chain with the custom prompt
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
)

# 5. Ask a question
query = "What is kalasha?"
response = qa_chain.run(query)
print(response)


In Indian culture and heritage, a Kalasha (कलаша) refers to a sacred pot or vessel that is often used in Hindu and Buddhist rituals. It is typically made of copper, bronze, or terracotta and is used to hold sacred water, flowers, or other offerings.

In Hindu rituals, the Kalasha is believed to represent the divine feminine energy, Ganga, and is often used in pujas (worship ceremonies) to invoke the blessings of the gods. The Kalasha is typically decorated with flowers, leaves, and other ornaments and is placed on a special pedestal or altar.

In Buddhist traditions, the Kalasha is used as a symbol of enlightenment and is often depicted in art and architecture as a symbol of the Buddha's enlightenment. In Tibetan Buddhism, the Kalasha is used in rituals to purify and consecrate sacred objects and spaces.

Overall, the Kalasha is a significant symbol in Indian culture and heritage, representing the connection between the sacred and the profane, and the importance of ritual and spiritual

In [39]:
pip install gradio




In [40]:
import gradio as gr

def answer_question(user_question):
    try:
        response = qa_chain.run(user_question)
        return response
    except Exception as e:
        return f"⚠️ Error: {str(e)}"

gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(lines=2, placeholder="Ask a cultural question..."),
    outputs="text",
    title="📚 Cultural QA Assistant",
    description="Ask questions related to Indian culture and heritage."
).launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://937d9e20ea71fe2bbc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


