In [1]:
import pandas as pd
import numpy as np
import re
import os
import spacy
from spacy import displacy

In [2]:
import requests
from urllib.parse import urljoin
from datetime import datetime
import pandas as pd

class IndianKanoon:
  """
    https://github.com/aishik-pyne/kanoon
    Search query	https://api.indiankanoon.org/search/?formInput=<query>&pagenum=<pagenum>
    Document	https://api.indiankanoon.org/doc/<docid>/
    Document fragments	https://api.indiankanoon.org/docfragment/<docid>/?formInput=<query>
    Document Metainfo	https://api.indiankanoon.org/docmeta/<docid>/
  """

  def __init__(self):
    self.base_url = "https://api.indiankanoon.org/"
    self.auth_token = "6c0262cd025351a53ab784b21634260276288d19"
    
    self.headers = {
        'authorization': "Token {}".format(self.auth_token),
        'cache-control': "no-cache",
    }
    self.api_session = requests.Session()
    self.api_session.headers = self.headers

  def search(self, formInput, pagenum=0,
             fromdate=None, todate=None,
             title=None, author=None,
             cite=None, bench=None, maxpages=50):
    #  Creating parameters
    params = {
        'formInput': formInput,
        'pagenum': pagenum,
        'maxpages': maxpages
    }
    if fromdate:
      assert isinstance(fromdate, datetime) 
      params['fromdate'] = fromdate.strftime('%d-%m-&Y')

    if todate:
      assert isinstance(todate, datetime) 
      params['todate'] = todate.strftime('%d-%m-&Y')

    # Making the request
    response = self.api_session.post(
        urljoin(self.base_url, 'search/'), params=params)
    response.raise_for_status()
    return response.json()

  def doc(self, docid):
    response = self.api_session.post(
        urljoin(self.base_url, 'doc/{}/'.format(docid)))
    response.raise_for_status()
    return response.json()

  def docfragment(self, docid, formInput):
    params = {
        'formInput': formInput,
    }
    response = self.api_session.post(
        urljoin(self.base_url, 'docfragment/{}/'.format(docid)), params=params)
    response.raise_for_status()
    return response.json()

  def docmeta(self, docid):
    response = self.api_session.post(
        urljoin(self.base_url, 'docmeta/{}/'.format(docid)))
    response.raise_for_status()
    return response.json()

In [3]:
ik = IndianKanoon()

In [29]:
df = pd.DataFrame(columns=["Judgement Name","Judgement","Similar Judgement1","Similar Judgement2","Similar Judgement3"])

In [30]:
df

Unnamed: 0,Judgement Name,Judgement,Similar Judgement1,Similar Judgement2,Similar Judgement3


In [6]:
dhanpat = {
    'F. Hoffmann-La Roche Ltd. And Anr. vs Cipla Limited':64813,
    'Garware-Wall Ropes Ltd. vs Techfab India And 5 Ors':1258171,
    'Glaverbel S.A. vs Dave Rose & Ors':705613
}

amarnath = {
    'Super Cassette Industries ... vs Bathla Cassette Industries Pvt':999762,
    'Raj Rewal vs Union Of India & Ors':186730577,
    'Arun Chadha vs Oca Productions Pvt. Ltd. &Ors':99260867
}

rameshwari = {
    'Super Cassettes Industries ... vs Mr Chintamani Rao & Ors':576454,
    'The Chancellor Masters & Scholars ... vs Narendera Publishing House':138192511,
    'Indian Performing Right Society ... vs Aditya Pandey & Ors':85777093
}

delux = {
    'Zee Telefilms Ltd. And Film And ... vs Sundial Communications Pvt. Ltd.':603848,
    'Chatrapathy Shanmugham vs S. Rangarajan':530572,
    'King Features Syndicate Inc. & ... vs Sunil Agnihotri & Ors':1440160
}

In [11]:
os.listdir()

['.ipynb_checkpoints',
 'Amar Nath Sehgal v UOI.txt',
 'Dhanpat Seth v Nilkamal Plastic.txt',
 'Rameshwari Photocopy.txt',
 'RG Anand v Delux Films & Ors.txt',
 'Untitled.ipynb']

In [12]:
files = [file for file in os.listdir() if file.endswith('.txt')]

In [13]:
files

['Amar Nath Sehgal v UOI.txt',
 'Dhanpat Seth v Nilkamal Plastic.txt',
 'Rameshwari Photocopy.txt',
 'RG Anand v Delux Films & Ors.txt']

In [14]:
import html2text
h = html2text.HTML2Text()
h.ignore_links = True

In [23]:
def get_text_file(docid):
    res = ik.doc(docid=str(docid))
    
    content = res["doc"]
    text = h.handle(content)
    
    f = open(str(docid)+".txt",'w+',encoding='utf-8')
    f.write(text)
    f.close()

In [27]:
for i in delux.values():
    get_text_file(i)

In [28]:
for file in os.listdir():
    if file.endswith(".txt"):
        print(file)

1258171.txt
138192511.txt
1440160.txt
186730577.txt
530572.txt
576454.txt
603848.txt
64813.txt
705613.txt
85777093.txt
99260867.txt
999762.txt
Amar Nath Sehgal v UOI.txt
Dhanpat Seth v Nilkamal Plastic.txt
Rameshwari Photocopy.txt
RG Anand v Delux Films & Ors.txt


In [32]:
def run_ner(jds):
    jd_name = jds[0]
    
    row = [jd_name]
    
    for file in jds:
        f = open(file,"r",encoding="utf-8")
        
        text = f.read()
        
        nlp = spacy.load("en_core_web_sm")
        
        doc = nlp(text)
        
        res = ""
        
        for word in doc.ents:
            res += word.text + " -- " + word.label_
            res += "\n"
        
        row.append(res)
    
    return row
        

In [33]:
row1 = ['Dhanpat Seth v Nilkamal Plastic.txt','64813.txt','1258171.txt','705613.txt']
row2 = ['Amar Nath Sehgal v UOI.txt','999762.txt','186730577.txt','99260867.txt']
row3 = ['Rameshwari Photocopy.txt']
for i in rameshwari.values():
    x = str(i)
    x += ".txt"
    row3.append(x)

In [34]:
row4 = ['RG Anand v Delux Films & Ors.txt']
for i in delux.values():
    x = str(i)
    x += ".txt"
    row4.append(x)

In [35]:
row1,row2,row3,row4

(['Dhanpat Seth v Nilkamal Plastic.txt',
  '64813.txt',
  '1258171.txt',
  '705613.txt'],
 ['Amar Nath Sehgal v UOI.txt', '999762.txt', '186730577.txt', '99260867.txt'],
 ['Rameshwari Photocopy.txt', '576454.txt', '138192511.txt', '85777093.txt'],
 ['RG Anand v Delux Films & Ors.txt',
  '603848.txt',
  '530572.txt',
  '1440160.txt'])

In [37]:
df

Unnamed: 0,Judgement Name,Judgement,Similar Judgement1,Similar Judgement2,Similar Judgement3


In [38]:
rows = [row1,row2,row3,row4]

In [39]:
for i in range(len(rows)):
    row = rows[i]
    
    df.loc[i] = run_ner(row)
    
df.to_csv("ner_results.csv",index=False)

In [40]:
df

Unnamed: 0,Judgement Name,Judgement,Similar Judgement1,Similar Judgement2,Similar Judgement3
0,Dhanpat Seth v Nilkamal Plastic.txt,Dhanpat Seth -- PERSON\nNil Kamal Plastic Crat...,Delhi High Court\n\nF. Hoffmann-La Roche Ltd. ...,Gujarat High Court -- ORG\nTechfab -- GPE\nInd...,"Dave Rose & Ors -- ORG\n27 January, 2010 -- DA..."
1,Amar Nath Sehgal v UOI.txt,Amar Nath Sehgal -- PERSON\nUnion Of India -- ...,Bathla Cassette Industries Pvt -- ORG\n9 -- CA...,"Union Of India & Ors -- ORG\n28 May, 2019\n\n ...",Oca Productions Pvt. -- ORG\n5 July -- DATE\n2...
2,Rameshwari Photocopy.txt,2016 -- CARDINAL\n2016 -- DATE\n235 -- CARDINA...,Chintamani Rao & Ors -- ORG\n11 November -- DA...,Narendera Publishing House -- ORG\n17 Septembe...,Aditya Pandey & Ors -- ORG\n8 May -- DATE\nFeb...
3,RG Anand v Delux Films & Ors.txt,"18 August, 1978 -- DATE\n18 August, 1978 -- DA...",Sundial Communications Pvt. -- ORG\n27 March -...,Madras High Court\n\n -- WORK_OF_ART\nChatrapa...,"Sunil Agnihotri & Ors -- ORG\n11 April, 1997 -..."
