In [None]:
!pip install -U langchain langchain-community langchain-core langchain-google-genai chromadb



In [None]:
# Set up environment and API key
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyDLYh0y51ovlhfmUB11CRJKTHaquBPCESM"  # 🔐 Replace with your Gemini API Key

In [54]:
# ✅ Import necessary libraries
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import JSONLoader
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [55]:
# 🔹 Load a single JSON file
file_path = "/content/Introduction_to_Ai.json"  #  Replace with your actual file path

with open(file_path, 'r', encoding='utf-8') as f:
    json_data = json.load(f)

# Wrap into a list for compatibility with rest of pipeline
raw_docs = [json_data]
print(" Successfully loaded ")

 Successfully loaded 


In [56]:
# 🔹 Split the document into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
)

structured_docs = [Document(page_content=json.dumps(entry)) for entry in raw_docs]
doc_chunks = splitter.split_documents(structured_docs)
print(f"✅ Created {len(doc_chunks)} chunks.")

✅ Created 10 chunks.


In [57]:
# 🔹 Create embeddings using Gemini
embedder = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [58]:
# 🔹 Build vector store
if doc_chunks:
    vector_db = Chroma.from_documents(
        documents=doc_chunks,
        embedding=embedder,
        persist_directory="./chroma_db"
    )
    vector_db.persist()


In [59]:
# 🔹 Set up Gemini LLM + RetrievalQA
if 'vector_db' in locals():
    gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-thinking-exp-01-21", temperature=0.2)
    retriever = vector_db.as_retriever(search_kwargs={"k": 3})

    rag_prompt = """
    You are an expert on the code of Artificial Intelligence and Machine Learning. Use the following context to answer the user's question.
    If you don't know the answer, just say that you don't have enough information. Do not try to make up an answer.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    custom_prompt = PromptTemplate(template=rag_prompt, input_variables=["context", "question"])

    qa_chain = RetrievalQA.from_chain_type(
        llm=gemini_llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": custom_prompt},
    )
    print("RAG chain created ")
else:
    print("RAG chain could not be initialized.")

RAG chain created 


In [60]:
if 'qa_chain' in locals():
    def Machine_Learning_Question(question):
        """
        Queries the introduction to AI and prints the answer.
        """
        print(f"\n Question: {question}")
        response = qa_chain.invoke({"query": question})
        print("\nAnswer")
        print(response['result'])

In [63]:
if 'qa_chain' in locals():
    questions = [
        "Q1) What is supervised Learning? "
        "Q2) What is meant by labeled data? "

    ]

    for q in questions:
        Machine_Learning_Question(q)

else:
    print("I don't have enough information to answer this.")


 Question: Q1) What is supervised Learning? Q2) What is meant by labeled data? 

Answer
Q1) Supervised Learning is a type of machine learning where the training data is labeled, and the system tries to learn from a "teacher" (i.e., the labels).

Q2) Labeled data refers to training data that includes the desired output or target for each input example, allowing the system to learn by associating inputs with their correct outputs.
