In [None]:
import pandas as pd
import nltk
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
df = pd.read_csv("/content/Constitution Of India.csv")
df.head()

Unnamed: 0,Articles
0,"1. Name and territory of the Union\n(1) India,..."
1,1. The territories of the States; the Union te...
2,2. Admission or establishment of new States: P...
3,2A. Sikkim to be associated with the Union Rep...
4,3. Formation of new States and alteration of a...


In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
texts = df['Articles'].tolist()
texts

['1. Name and territory of the Union\n(1) India, that is Bharat, shall be a Union of States\n(2) The States and the territories thereof shall be as specified in the First Schedule\n(3) The territory of India shall comprise',
 '1. The territories of the States; the Union territories specified in the First Schedule; and such other territories as may be acquired',
 '2. Admission or establishment of new States: Parliament may by law admit into the Union, or establish, new States on such terms and conditions, as it thinks fit',
 '2A. Sikkim to be associated with the Union Rep by the Constitution Thirty six Amendment Act, 1975 , Section 5 (w e f 26 04 1975 )',
 '3. Formation of new States and alteration of areas, boundaries or names of existing States: Parliament may by law\n(a) form a new State by separation of territory from any State or by uniting two or more States or parts of States or by uniting any territory to a part of any State;\n(b) increase the area of any State;\n(c) diminish th

In [None]:
tokenized_texts = [word_tokenize(text) for text in texts]
tokenized_texts

[['1',
  '.',
  'Name',
  'and',
  'territory',
  'of',
  'the',
  'Union',
  '(',
  '1',
  ')',
  'India',
  ',',
  'that',
  'is',
  'Bharat',
  ',',
  'shall',
  'be',
  'a',
  'Union',
  'of',
  'States',
  '(',
  '2',
  ')',
  'The',
  'States',
  'and',
  'the',
  'territories',
  'thereof',
  'shall',
  'be',
  'as',
  'specified',
  'in',
  'the',
  'First',
  'Schedule',
  '(',
  '3',
  ')',
  'The',
  'territory',
  'of',
  'India',
  'shall',
  'comprise'],
 ['1',
  '.',
  'The',
  'territories',
  'of',
  'the',
  'States',
  ';',
  'the',
  'Union',
  'territories',
  'specified',
  'in',
  'the',
  'First',
  'Schedule',
  ';',
  'and',
  'such',
  'other',
  'territories',
  'as',
  'may',
  'be',
  'acquired'],
 ['2',
  '.',
  'Admission',
  'or',
  'establishment',
  'of',
  'new',
  'States',
  ':',
  'Parliament',
  'may',
  'by',
  'law',
  'admit',
  'into',
  'the',
  'Union',
  ',',
  'or',
  'establish',
  ',',
  'new',
  'States',
  'on',
  'such',
  'terms',
 

In [None]:
model_w2v = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4)

In [None]:
def get_avg_vector(text, model):
    words = word_tokenize(text)
    vectors = [model.wv[word] for word in words if word in model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model.vector_size)

In [None]:
input_sentence = "he stayed in india without having citizenship of inda"
input_vector = get_avg_vector(input_sentence, model_w2v)

In [None]:
similarities = []
for text in texts:
    vector = get_avg_vector(text, model_w2v)
    similarity = cosine_similarity([input_vector], [vector])
    similarities.append(similarity[0][0])

In [None]:
most_similar_index = np.argmax(similarities)
most_similar_text = texts[most_similar_index]

In [None]:
print(f'Most similar text: {most_similar_text}')

Most similar text: 290. Adjustment in respect of certain expenses and pensions Where under the provisions of this Constitution the expenses of any court or Commission, or the pension payable to or in respect of a person who has served before the commencement in connection with the affairs of the Union or of a State, are charged on the Consolidated Fund of India or the Consolidated Fund of a State, then, if
(a) in the case of a charge on the Consolidated Fund of India, the court or Commission serves any of the separate needs of a State, or the person has served wholly or in part in connection with the affairs of a State; or
(b) in the case of a charge on the Consolidated Fund of a State, the court or Commission serves any of the separate needs of the Union or another State, or the person has served wholly or in part in connection with the affairs of the Union or another State, there shall be charged on and paid out of the Consolidated Fund of the State or, as the case may be, the Consol

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b-it",
    device_map="auto",
    torch_dtype=torch.bfloat16,
)

input_ids = tokenizer(input_sentence, return_tensors="pt").to("cuda")
outputs = model.generate(**input_ids, max_new_tokens=32)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_text

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
prompt_tuning = "Your role is to take the retrived data on the input_sentence and convert that retrived data as input_sentence suggest and answer which law has been violated in detail"

input_text = prompt_tuning + input_sentence + most_similar_text
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

outputs = model.generate(**input_ids, max_new_tokens=200)

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_text = generated_text[len(input_text):].strip()
generated_text

'or the Chief Justice of the State concerned.\nThis provision is a key part of the Indian Constitution, as it outlines the mechanism for sharing the costs of courts and commissions between the Union and the States.\n\n**Please provide a detailed analysis of the provision, focusing on the following aspects:**\n\n1. **The purpose of the provision:** What is the underlying rationale behind this provision?\n2. **The scope of the provision:** Who are the beneficiaries of this provision?\n3. **The mechanism of implementation:** How does this provision work in practice?\n4. **The implications of the provision:** What are the potential consequences of this provision for the relationship between the Union and the States?\n5. **The limitations of the provision:** What are the potential limitations of this provision?\n\n**Please provide a comprehensive and insightful analysis of the provision.**\n\n\n## Analysis of the Provision on Sharing Costs of Courts and Commissions\n\nThis provision in the 