In [3]:
print("Check")

Check


In [8]:
import os
from dotenv import load_dotenv

import pandas as pd
from langchain.chat_models import init_chat_model # to create the brain
from langchain.tools import tool # weapon, retriever tool in this proj
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings # to download model, to convert csv to vectors

load_dotenv()

True

In [9]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [10]:
import os
from langchain.chat_models import init_chat_model
# groq : model_name
model = init_chat_model("groq:llama-3.3-70b-versatile")
response = model.invoke("Why am I learning at DOT, SPPU?")
print(response)

content="You're learning at the Department of Technology (DOT) under Savitribai Phule Pune University (SPPU). That's great!\n\nThe Department of Technology at SPPU is known for its excellent academic programs, research opportunities, and industry collaborations. Here are some possible reasons why you might be learning at DOT, SPPU:\n\n1. **Academic excellence**: SPPU is a reputable university in India, and the Department of Technology is one of its prominent departments. You might be drawn to the university's strong academic reputation, experienced faculty, and well-structured curriculum.\n2. **Specialized programs**: DOT, SPPU offers various undergraduate and postgraduate programs in fields like engineering, technology, and management. You might be interested in one of these specialized programs, which can help you develop skills and knowledge in your chosen field.\n3. **Research opportunities**: The department has a strong research focus, with faculty members and students working on 

Data Loading

In [22]:
class AnimeDataLoader:
    # for diff inputs there are diff outputs
    def __init__(self, original_csv: str, processed_csv: str):
        self.original_csv = original_csv
        self.processed_csv = processed_csv
    
    def load_and_process(self):
        df = pd.read_csv(
            self.original_csv,
            # encodes string to numbers
            encoding='utf-8',
            # ignore problematic lines / rows
            # Aish, 22, Pune
            # Vaish, 22 ---> gone 
            on_bad_lines='skip'
        ).dropna()

        required_cols = {"Name", "Genres", "synopsis"}
        if not required_cols.issubset(df.columns):
            raise ValueError("Missing required columns in CSV")
        
        df["combined_info"] = (
            "Title: " + df["Name"]
            + " Overview: " + df["sypnopsis"]
            + " Genres: " + df["Genres"]
        )

        df[["combined_info"]].to_csv(
            self.processed_csv,
            index=False,
            encoding="utf-8"
        )

        return self.processed_csv


In [None]:
# data transformation
class AnimeDataLoader:
    def __init__(self, original_csv: str, processed_csv: str):
        self.original_csv = original_csv
        self.processed_csv = processed_csv

    def load_and_process(self):
        df = pd.read_csv(
            self.original_csv,
            encoding="utf-8",
            on_bad_lines="skip"
        ).dropna()

        required_cols = {"Name", "Genres", "sypnopsis"}
        if not required_cols.issubset(df.columns):
            raise ValueError("Missing required columns in CSV")

        df["combined_info"] = (
            "Title: " + df["Name"]
            + " Overview: " + df["sypnopsis"]
            + " Genres: " + df["Genres"]
        )

        df[["combined_info"]].to_csv(
            self.processed_csv,
            index=False,
            encoding="utf-8"
        )

        return self.processed_csv

In [12]:
original_csv = "data/anime_with_synopsis.csv"
processed_csv = "data/anime_updated.csv"


hi = AnimeDataLoader(original_csv, processed_csv)
hi.load_and_process()

'data/anime_updated.csv'

Data Splitting

In [13]:
class VectorStoreBuilder:
    # constructor
    # persist_dir: is string because it is path of folder and default name is chroma_db
    # where to be stored and in which folder (chroma_db)
    def __init__(self, csv_path: str, persist_dir: str = "chroma_db"):
        self.csv_path = csv_path
        self.persist_dir = persist_dir
        # not accepted from the user but initialized internally 
        self.embedding = HuggingFaceEmbeddings(
            model_name = "all-MiniLM-L6-v2"
        )
    
    def build_and_save_vectorstore(self):
        # loader --> variable/ object, CSVLoader class
        loader = CSVLoader(
            file_path = self.csv_path,
            encoding = "utf-8",
            metadata_columns=[]
        )

        # loader class gets initialized here
        documents = loader.load()

        splitter = RecursiveCharacterTextSplitter(
            chunk_size = 1000,
            chunk_overlap = 20
        )

        # splitting the documents into chunks which was loaded
        chunks = splitter.split_documents(documents)

        # create vector store from these chunks
        # Chroma class from langchain_community.vectorstores
        # to convert documents to vectors we need embeddings
        # to store these vectors we need persist_directory
        db = Chroma.from_documents(
            chunks, # kya
            self.embedding, # kaise
            persist_directory=self.persist_dir # kaha
        )

        # call persist method from db
        db.persist()

    def load_vector_store(self):
        # store and load the data
        return Chroma(
            persist_directory=self.persist_dir,
            embedding_function=self.embedding
        )

In [14]:
print("BUILDING VECTOR STORE.")

original_csv = "data/anime_with_synopsis.csv"
processed_csv = "data/anime_updated.csv"
persist_dir = "chroma_db"

hi = AnimeDataLoader(original_csv, processed_csv)

processed_csv_path = hi.load_and_process()

BUILDING VECTOR STORE.


In [15]:
print("STILL BUILDING VECTOR STORE.")

vector_builder = VectorStoreBuilder(
    processed_csv_path, 
    persist_dir=persist_dir
    )

vector_builder.build_and_save_vectorstore()

print("VECTOR STORE BUILT AND SAVED.")

STILL BUILDING VECTOR STORE.


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


VECTOR STORE BUILT AND SAVED.


  db.persist()


Retriever Tool

In [None]:
vector_builder = VectorStoreBuilder(
    csv_path = "",
    persist_dir="chroma_db"
    )

# coverting vector db into retriever obj
retriever = vector_builder.load_vector_store().as_retriever()

In [20]:
# coverting this into numbers and comparing to vectors stored to show similar results
# from the db
query = "Tell me about naruto"

retriever.invoke(query)

[Document(metadata={'source': 'data/anime_updated.csv', 'row': 10}, page_content="combined_info: Title: Naruto Overview: oments prior to Naruto Uzumaki's birth, a huge demon known as the Kyuubi, the Nine-Tailed Fox, attacked Konohagakure, the Hidden Leaf Village, and wreaked havoc. In order to put an end to the Kyuubi's rampage, the leader of the village, the Fourth Hokage, sacrificed his life and sealed the monstrous beast inside the newborn Naruto. Now, Naruto is a hyperactive and knuckle-headed ninja still living in Konohagakure. Shunned because of the Kyuubi inside him, Naruto struggles to find his place in the village, while his burning desire to become the Hokage of Konohagakure leads him not only to some great new friends, but also some deadly foes. Genres: Action, Adventure, Comedy, Super Power, Martial Arts, Shounen"),
 Document(metadata={'row': 43, 'source': 'data/anime_updated.csv'}, page_content='Kotori, the two sisters Mizukoshi Moe and Mako who prefer eating nabe on the s

In [None]:
# decorator class to create tool
@tool
# -> returns in str
def anime_retriever_tool(query: str) -> str:
    # docstring: to explain what the tool does
    """
    Use this tool to search the anime knowledge base.

    Always call this tool for anime-related questions such as:
    recommendations, similarity search, genres, or plot summaries.

    Input:
    - query: User's anime preference or question.

    Output:
    - Relevant anime information retrieved from the vector database.
    """
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

Initialize Model
-> Combine tool with model

In [22]:
model_with_tools = model.bind_tools([anime_retriever_tool])

In [None]:
# 3 messages: user, AI, tool 
# Now, AI is added into the picture
messages = [
    {
        "role": "user",
        # what is user asking
        "content": "Can you recommend an anime similar to Naruto and also tell me a joke?"
    }
]

In [24]:
ai_msg = model_with_tools.invoke(messages)

# to remember the conversation
messages.append(ai_msg)

In [25]:
messages

[{'role': 'user',
  'content': 'Can you recommend an anime similar to Naruto and also tell me a joke?'},
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'n4snhccjq', 'function': {'arguments': '{"query":"anime similar to Naruto"}', 'name': 'anime_retriever_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 342, 'total_tokens': 384, 'completion_time': 0.114576776, 'completion_tokens_details': None, 'prompt_time': 0.017710311, 'prompt_tokens_details': None, 'queue_time': 0.161981819, 'total_time': 0.132287087}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_45180df409', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c1354-4285-7280-bb06-1829667bc903-0', tool_calls=[{'name': 'anime_retriever_tool', 'args': {'query': 'anime similar to Naruto'}, 'id': 'n4snhccjq', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'in

In [26]:
ai_msg.tool_calls

[{'name': 'anime_retriever_tool',
  'args': {'query': 'anime similar to Naruto'},
  'id': 'n4snhccjq',
  'type': 'tool_call'}]

In [27]:
messages[0]

{'role': 'user',
 'content': 'Can you recommend an anime similar to Naruto and also tell me a joke?'}

In [28]:
for tool_call in ai_msg.tool_calls:
    tool_result = anime_retriever_tool.invoke(tool_call)
    messages.append(tool_result)

In [29]:
messages

[{'role': 'user',
  'content': 'Can you recommend an anime similar to Naruto and also tell me a joke?'},
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'n4snhccjq', 'function': {'arguments': '{"query":"anime similar to Naruto"}', 'name': 'anime_retriever_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 342, 'total_tokens': 384, 'completion_time': 0.114576776, 'completion_tokens_details': None, 'prompt_time': 0.017710311, 'prompt_tokens_details': None, 'queue_time': 0.161981819, 'total_time': 0.132287087}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_45180df409', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c1354-4285-7280-bb06-1829667bc903-0', tool_calls=[{'name': 'anime_retriever_tool', 'args': {'query': 'anime similar to Naruto'}, 'id': 'n4snhccjq', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'in

In [30]:
final_response = model_with_tools.invoke(messages)
print(final_response.text)

Here's a joke for you: Why did the ninja go to the doctor? Because he was feeling a little "stealthy"! 

As for the anime recommendation, you might enjoy "Ninin ga Shinobuden", "Tsubasa Chronicle", or "Rurouni Kenshin: Meiji Kenkaku Romantan - Ishinshishi e no Chinkonka" as they all have elements of action, adventure, and martial arts similar to Naruto.


In [None]:
# Run:
# uv pip install -e. --> to create a package of the project
# 