In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''


In [None]:
os.environ['OPENAI_API_KEY'] = ''

In [55]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import json
from langchain.docstore.document import Document

In [56]:
# Function to load data from a .json file
def load_json_data(json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    return data

# Modify the loader to work with .json file data
def process_json_data(json_file_path):
    data = load_json_data(json_file_path)

    docs = []
    for entry in data:
        document_text = f"Citation: {entry.get('citation', '')}\n" \
                        f"Case Name: {entry.get('name', '')}\n" \
                        f"Year: {entry.get('year', '')}\n" \
                        f"Language: {entry.get('language', '')}\n" \
                        f"Document Date: {entry.get('document_date', '')}\n" \
                        f"Content: {entry.get('unofficial_text', '')}"
        metadata = {
            "citation": entry.get("citation", ""),
            "name": entry.get("name", ""),
            "year": entry.get("year", ""),
            "language": entry.get("language", ""),
            "document_date": entry.get("document_date", ""),
        }
        docs.append(Document(page_content=document_text, metadata=metadata))
    return docs


In [58]:
# Example usage with your .json file
json_file_path = '/content/data.json'  # Path to your JSON file
docs = process_json_data(json_file_path)
print("\nProcessing json data")

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print("\nSplitting documents")

# Embed documents using OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
print("\nEmbedding documents")

# Create retriever
retriever = vectorstore.as_retriever()
print("\nCreated retriever")



Processing json data

Splitting documents

Embedding documents

Creating retriever


In [59]:
#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
# rag_chain.invoke("Give me the summary of Yogeswaran v. Canada (Ministry of Citizenship and Tourism) case")
#rag_chain.invoke("What is the significance of an interlocutory motion in legal proceedings?")


## install streamlit package
# Streamlit UI

# import streamlit as st

st.title("Legal Document Query System")
st.sidebar.header("Settings")

query_input = st.text_area("Enter your legal query:", "")
run_query = st.button("Run Query")

if run_query and query_input.strip():
    st.info("Running query...")
    try:
        gpt_response = rag_chain.invoke(query_input)
        st.subheader("GPT-4 Generated Response")
        st.write(gpt_response)
    except Exception as e:
        st.error(f"Error: {e}")

'Yogeswaran v. Canada (Minister of Citizenship and Immigration) involved applicants seeking judicial review of a decision by the Convention Refugee Determination Division. The decision dated February 17, 1999, determined that the applicants were not Convention refugees under the Immigration Act. The case was heard in Toronto on August 15, 2000, with the decision reserved for further consideration.'