In [1]:
#warnings generated by code:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

import torch
from numba import cuda
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import DirectoryLoader

from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes, DecodingMethods
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
import wget

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.current_device()

0

In [4]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3050 Laptop GPU'

In [5]:
folder_path = "C:\Projects\Infosys\judgement"

In [6]:
loader=DirectoryLoader(folder_path,glob="*.txt")
documents=loader.load()
txt_split=CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
txt=txt_split.split_documents(documents)
print(len(txt))

Created a chunk of size 3505, which is longer than the specified 2000


2113


In [7]:
embeddings=HuggingFaceEmbeddings()
docsearch= Chroma.from_documents(txt,embeddings) #used for vector spaces
print("Ingested")

Ingested


In [8]:
model_id='google/flan-ul2'

In [9]:
parameters={
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MIN_NEW_TOKENS:130, #minimum characters in output
    GenParams.MAX_NEW_TOKENS:256, #maximum characters in output
    GenParams.TEMPERATURE:0.5 #creativness or randomness to create varity in output
}

In [10]:
import json

with open("apikey.json",'r') as file:
    data=json.load(file)
apikey=data['apikey']

In [11]:
credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": apikey
}

project_id = "b6ee87bb-8411-4a01-afd7-9dd74aef532b"

In [12]:
model=Model(model_id=model_id, params=parameters, credentials=credentials, project_id=project_id)

In [13]:
flan_ul2_llm=WatsonxLLM(model=model)

In [14]:
qa=RetrievalQA.from_chain_type(llm=flan_ul2_llm,
                               chain_type="stuff", 
                               retriever= docsearch.as_retriever(), 
                               return_source_documents=False)
query="Summarize the documents"
qa.invoke(query)

{'query': 'Summarize the documents',
 'result': 'The letter read as a whole is an innocuous document and its dominant purpose and intent was to safeguard the interests of accused Patel and to protect him against any underhand or unfair act of his rival contractors. We cannot infer any intent to defraud or any intention to injure Amarnath, though in order to protect himself accused Patel made certain allegations against him. We therefore set aside the conviction of both the appellants under the third charge and acquit them. 1112 The result is that the consolidated appeal is allowed, the judgments of all the three courts below are set aside and the appellants are acquitted. Appellants acquitted.'}

In [15]:
model_id='meta-llama/llama-3-70b-instruct'
parameters={
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MAX_NEW_TOKENS:256,
    GenParams.TEMPERATURE:0.5
}

credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": apikey
}

project_id = "b6ee87bb-8411-4a01-afd7-9dd74aef532b"
model=Model(
    model_id=model_id,
    params= parameters,
    credentials=credentials,
    project_id=project_id
)
llama_3_llm=WatsonxLLM(model=model)

In [16]:
qa=RetrievalQA.from_chain_type(llm=llama_3_llm,
                               chain_type="stuff",
                               retriever=docsearch.as_retriever(),
                               return_source_documents=False)
query="Can you summarize the documents"
qa.invoke(query)

{'query': 'Can you summarize the documents',
 'result': " Here is a summary of the documents mentioned in the text:\n\n* Exhibit P 3A: A document whose authenticity is disputed. It is not clear what it contains, but it is mentioned as being unusual, peculiar, and strange by the courts below.\n* Exhibit P 4 and P 5: Tenders with appendices marked A, B, and C.\n* Exhibit P 24: A letter dated November 20, 1946, with office notes by A.M. Naidu and the appellant Nargundkar. The notes contain mistakes in the date.\n* Tamliknama: A document executed by Nand Lal, making a gift of a double-storied pucca built shop and a house to Mst. Meria, the widow of Chhedi Lal.\n* Deed of relinquishment: A contemporaneous document executed by Meria, renouncing all her claims to the property left by Mangal Sen.\n\nPlease note that the contents of Exhibit P 3A are not specified in the text, and the other documents are mentioned in the context of the court's discussion of their authenticity and relevance to th

In [6]:
import os
import re
import json
from transformers import pipeline
import pandas as pd

In [2]:
#Loading and Preprocessing
def load_preprocess(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    with open(file_path,'r',encoding='utf-8')as file:
        document=file.read()
    chunks=[document[i:i+1000] for i in range(0,len(document),1000)]
    return chunks

In [3]:
#Risk detection using Hugging Face
def risk_detection(chunks):
    model_name = "google/flan-t5-base"
    nlp = pipeline("text2text-generation", model=model_name)

    results = []

    for chunk in chunks:
        # Define a prompt for risk analysis
        prompt = (
            "Analyze the following text for potential risks, hidden obligations, "
            "or dependencies, and provide recommendations:\n\n" + chunk
        )

        # Generate response
        result = nlp(prompt, max_length=208, do_sample=False)
        results.append({"context": chunk, "analysis": result[0]['generated_text'],"recommendation": result[0]['generated_text']})

    return results

In [4]:
def main(file_path):
    print("Loading")
    chunks=load_preprocess(file_path)
    print("Analyzing")
    analysis=risk_detection(chunks)

    output_path="risk_analysis.json"
    with open(output_path,"w",encoding="utf-8") as f:
        json.dump(analysis,f,ensure_ascii=False,indent=4)

    print(f"Analysis Complete. Results saved to {output_path}")

if __name__ == "__main__":
    file_path = "Legal-Example.txt" 
    main(file_path)

Loading
Analyzing



Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Analysis Complete. Results saved to risk_analysis.json


In [7]:
with open("risk_analysis.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Print each entry's context and analysis
for entry in data:
    print("Context:\n", entry["context"])
    print("Analysis:\n", entry["analysis"])
    print("Recommendation:\n",entry["recommendation"])
    print("-" * 80)

Context:
 XXXVII of 1950.
Application under article 32 of the Constitution of India for a writ of certiorari and prohibition.
The facts are set out in the judgment.
B. Banerji for the petitioner.
M.C. Setalvad, Attorney General for India (Gyan Chand, with him) for the opposite party.
522 1950.
May 26.
The following judgments were delivered: KANIA C.J. This is an application for a writ of 'certiorari and prohibition under article 32 of the Constitution of India.
The petitioner who is the President of the All India Hindu Mahasabha since December, 1949, was served with an order of externment dated the gist of March, 1950, that night.
By that order he is directed by the District Magis trate, Delhi, not to remain in the Delhi District, and immediately to remove himself from the Delhi District and not to return to the District.
The order was to continue in force for three months.
By another order of the Madhya Bharat Government he was directed to reside in Nagpur.
That order has been recentl

In [8]:
import json
with open("risk_analysis.json","r",encoding="utf-8") as f:
    data=json.load(f)

df=pd.DataFrame(data)
df

Unnamed: 0,context,analysis,recommendation
0,XXXVII of 1950.\nApplication under article 32 ...,The following judgments were delivered:,The following judgments were delivered:
1,d.\nThe petitioner disputes the validity of th...,The following are the possible risks and depen...,The following are the possible risks and depen...
2,"Case No. 29 of 1950, Brij nandan vs The State ...",No.,No.
3,ition of reasonable restrictions on the exerci...,The Court of First Instance considers the law ...,The Court of First Instance considers the law ...
4,also.\nIt is submitted that in deciding whethe...,'clause (5) must be given its full meaning ing.,'clause (5) must be given its full meaning ing.
5,"urt, on either inter pretation, will be entitl...",the Court,the Court
6,"f the Court, as it has to determine if the exe...",t,t
7,hat under section 4 the power to make the orde...,a period of time.,a period of time.
8,more than three months.\nIt was argued that th...,The period of three months is not unreasonable.,The period of three months is not unreasonable.
9,ilarly 526 the District Magistrate is not perm...,statutory duty to enforce the law,statutory duty to enforce the law


In [9]:
import openpyxl
output_file = "output.xlsx"
df.to_excel(output_file, index=False, engine='openpyxl')

print(f"Excel file created successfully at {output_file}")

Excel file created successfully at output.xlsx


In [10]:
import streamlit as st
import pandas as pd
import json
import requests
from transformers import pipeline

In [11]:
import json
import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build

SPREADSHEET_ID = "1PZaFBsyUnaUbOwVOLU8XnOug4eGZNlMaGr561Ael6FM"  
RANGE_NAME = "Sheet1!A1"  

SERVICE_ACCOUNT_FILE = "ai-driven-legal-analyzer-a469ea4bc49d.json"  # Path to your credentials JSON file
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]

credentials = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("sheets", "v4", credentials=credentials)

values = [df.columns.tolist()] + df.values.tolist()

body = {"values": values}

result = (
    service.spreadsheets()
    .values()
    .update(
        spreadsheetId=SPREADSHEET_ID,
        range=RANGE_NAME,
        valueInputOption="RAW",  # or 'USER_ENTERED' for formatted input
        body=body,
    )
    .execute()
)

print(f"{result.get('updatedCells')} cells updated.")

96 cells updated.


In [42]:
import json
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# Set up the SMTP server
smtp_server = "smtp.gmail.com"
smtp_port = 587
sender_email = "aayush.kumar2422@gmail.com"
receiver_email = "that077guy@gmail.com"
with open('pass.json') as config_file:
    config = json.load(config_file)

password = config['email_password']

# Create the email
subject = "Subject: Your Link"
body = "Legal Document Analysis update: https://docs.google.com/spreadsheets/d/1PZaFBsyUnaUbOwVOLU8XnOug4eGZNlMaGr561Ael6FM/edit?gid=0#gid=0"

msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = receiver_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))

# Send the email
try:
    server = smtplib.SMTP(smtp_server, smtp_port)
    server.starttls()
    server.login(sender_email, password)
    server.sendmail(sender_email, receiver_email, msg.as_string())
    print("Email sent successfully!")
except Exception as e:
    print(f"Error: {e}")
finally:
    server.quit()


Email sent successfully!
