%pip install -q torch transformers langchain sentence-transformers tqdm openpyxl openai pandas datasets langchain-community ragatouille

In [1]:
from dotenv import load_dotenv, find_dotenv
import sys
import os, getpass
from openai import OpenAI


# Add the project root directory to Python path
project_root = os.path.dirname(os.path.abspath(''))
if project_root not in sys.path:
    sys.path.append(project_root)

from utils import set_api_key


load_dotenv(find_dotenv())  

QWEN_API_KEY = set_api_key('QWEN_API_KEY')

from tqdm.auto import tqdm
import pandas as pd
from typing import Optional, List, Tuple
import json
import tiktoken

# requirements (example)
# pip install requests beautifulsoup4 transformers sentence-transformers faiss-cpu langchain pillow pytesseract

import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os
import numpy as np
#import torch

# Hugging Face tools
#from transformers import pipeline, CLIPProcessor, CLIPModel
from sentence_transformers import SentenceTransformer  # for text embeddings

# LangChain
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document as LangchainDocument
import langgraph
from langgraph.prebuilt import ToolNode
from langchain.chat_models import init_chat_model



pd.set_option("display.max_colwidth", None)

API key found in .env file for QWEN_API_KEY
API key set successfully.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://www.incometax.gov.in/iec/foportal/help/all-topics/tax-payer/individual/how-to-file-tax-returns",
    "https://www.incometax.gov.in/iec/foportal/help/individual/return-applicable-1#taxdeductions",
    "https://www.incometax.gov.in/iec/foportal/help/individual/return-applicable-1#taxableincome",
    "https://www.incometax.gov.in/iec/foportal/help/e-filing-itr1-form-sahaj-faq",
    "https://www.incometax.gov.in/iec/foportal/help/e-filing-itr4-form-sugam-faq"

]
loader = WebBaseLoader(urls)
docs = loader.load()
assert len(docs) == 5

print(f"Total Characters: {sum([len(doc.page_content) for doc in docs])}")


USER_AGENT environment variable not set, consider setting it to identify your requests.


Total Characters: 117671


In [3]:
docs[0]

Document(metadata={'source': 'https://www.incometax.gov.in/iec/foportal/help/all-topics/tax-payer/individual/how-to-file-tax-returns', 'title': 'How to File Tax Returns | Income Tax Department', 'description': 'ITRs', 'language': 'en'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHow to File Tax Returns | Income Tax Department\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Skip to main content\n    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\nCall Us\n\n\n\ne-filing and Centralized Processing Center\ne-Filing of Income Tax Return or Forms and other value added services & Intimation, Rectification, Refund and other Income Tax Processing Related Queries\n\n\n1800 103 0025 (or)\n1800 419 0025\n+91-80-46122000\n+91-80-61464700\n\n\n08:00 hrs - 20:00 hrs\n(Monday to Friday)\n\n\n\n\nTax Information Network - NSDL\nQueries related to PAN & TAN application for Issuance / Update through NSDL\n\n\n+91-20-27218080\n\n\n07:00 hrs - 23:00 hrs\n(All Days)\n\n\n\n\n

In [4]:
# Split the documents
#langchain_docs = [LangchainDocument(page_content=doc["text"], metadata={"source": doc["source"]}) for doc in tqdm(ds)]


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
    separators=[ r"\n#", ".", r"\t#", " ", ""],
    length_function= lambda text: len(tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text)),
    is_separator_regex=True
)

docs_processed = []
for doc in docs:
    docs_processed += text_splitter.split_documents([doc])



In [5]:
len(docs_processed)

141

In [6]:
docs_processed[:3]

[Document(metadata={'source': 'https://www.incometax.gov.in/iec/foportal/help/all-topics/tax-payer/individual/how-to-file-tax-returns', 'title': 'How to File Tax Returns | Income Tax Department', 'description': 'ITRs', 'language': 'en', 'start_index': 34}, page_content='How to File Tax Returns | Income Tax Department\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Skip to main content\n    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\nCall Us\n\n\n\ne-filing and Centralized Processing Center\ne-Filing of Income Tax Return or Forms and other value added services & Intimation, Rectification, Refund and other Income Tax Processing Related Queries\n\n\n1800 103 0025 (or)\n1800 419 0025\n+91-80-46122000\n+91-80-61464700\n\n\n08:00 hrs - 20:00 hrs\n(Monday to Friday)\n\n\n\n\nTax Information Network - NSDL\nQueries related to PAN & TAN application for Issuance / Update through NSDL\n\n\n+91-20-27218080\n\n\n07:00 hrs - 23:00 hrs\n(All Days)\n\n\n\n\nAIS and Reporting Portal\nQueries related to AIS

#### Creating a team of agents for different tasks
- Question Agent
- Critique Agent
- Agent as a Judge
- Answering Agent

In [10]:
MISTRAL_KEY = os.environ.get('MISTRAL_KEY')
QWEN_API_KEY = os.environ.get('QWEN_API_KEY')

In [34]:
llm  = init_chat_model("mistral-large-latest", 
                       model_provider="mistralai", 
                       timeout = 60, 
                       streaming = True,
                       api_key = MISTRAL_KEY
                       )

In [35]:
llm.invoke("Hi dude").pretty_print()


Hey there! 😊 What's up? How can I help you today? (And just a heads-up—I’m an AI, not a dude, but I’m happy to chat about anything!) 🚀

What’s on your mind? 💡


In [22]:
import requests
import json

response = requests.post(
  url="https://openrouter.ai/api/v1/chat/completions",
  headers={
    "Authorization": F"Bearer {QWEN_API_KEY}",
    "Content-Type": "application/json",
  },
  data=json.dumps({
    "model": "qwen/qwen3-4b:free",
    "messages": [
      {
        "role": "user",
        "content": "Hi"
      }
    ],
    
  })
)

In [23]:
response.json()['choices'][0]['message']['content']

'Hello! How can I assist you today?'