In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
import numpy as np

In [None]:
doc1 = """The United Kingdom is a state made up of the historic countries of England, Wales and Scotland, as well as Northern Ireland. It is known as the home of both modern parliamentary democracy and the Industrial Revolution. Two world wars and the end of empire diminished its role in the 20th century, and the 2016 referendum vote to leave the European Union has raised significant questions about the country's global role. Nonetheless, the United Kingdom remains an economic and military power with great political and cultural influence around the world.
His Majesty King Charles III ascended to the throne in September 2022, on the death of his mother Queen Elizabeth II. The UK's first British Asian prime minister, Mr Sunak took over from the previous prime minister, Liz Truss, just 49 days after she herself had taken over from Boris Johnson. Mr Truss was forced to resign after big tax cuts in a her mini-budget prompted financial turmoil. Mr Sunak became leader of the Conservative party without a ballot of its members when his only remaining rival Penny Mordaunt withdrew."""

doc2 = """Hours after he took charge as the UK Prime Minister, Rishi Sunak Tuesday spoke with US President Joe Biden, said the White House. During the congratulatory call made by Biden, the leaders agreed on the importance of working together to support Ukraine and hold Russia accountable for its aggression, address the challenges posed by China, and secure sustainable and affordable energy resources, it added. Meanwhile, soon after taking over as the British Prime Minister, Rishi Sunak made key Cabinet appointments and decided to keep the new Chancellor, Jeremy Hunt, in place for economic stability. He brought back Indian-origin Suella Braverman as Home Secretary while James Cleverly will remain Foreign Secretary despite not being a Sunak loyalist. The 42-year-old became UK’s first Indian-origin British Prime Minister after being invited by King Charles III to form a government. He said that he had been appointed in part to fix the mistakes made by his predecessor Liz Truss and warned the country might have to face difficult decisions to overcome a “profound economic crisis”."""

doc3 = """Just a decade ago, Indian GDP was the eleventh largest in the world. Now, with 7 percent growth forecast for 2022, India's economy has overtaken the United Kingdom's in terms of size, making it the fifth biggest. That's according to the latest figures from the International Monetary Fund. India's growth is accompanied by a period of rapid inflation in the UK, creating a cost of living crisis and the risk of a recession which the Bank of England predicts could last into 2024. This situation, coupled with a turbulent political period and the continued hangover of Brexit, led to Indian output overtaking that of the UK in the final quarter of 2021, with the first of 2022 offering no change in the ranking. Looking ahead, the IMF forecasts this to become the new status quo, with India expected to leap further ahead of the UK up to 2027 - making India the fourth largest economy by that time, too, and leaving the UK behind in sixth."""

In [None]:
#punctuation removal and tokenization
nltk.download('punkt')
def punctuation_removal(txt):
  notations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
  for words in txt:
    if words in notations:
      txt = txt.replace(words, "")
  txt = txt.lower()
  doc = nltk.word_tokenize(txt)
  return doc

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
doc1 = punctuation_removal(doc1)
doc2 = punctuation_removal(doc2)
doc3 = punctuation_removal(doc3)

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
  
word_tokens = doc1 + doc2 + doc3 
filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
unique_words=set(filtered_sentence)

In [None]:
print(set(doc1))

{'financial', 'diminished', 'tax', 'it', 'parliamentary', 'british', 'up', 'previous', 'united', 'death', 'days', 'ballot', 'herself', 'state', 'role', 'well', 'majesty', 'known', '2022', 'its', 'made', 'nonetheless', 'historic', 'on', 'his', 'scotland', 'referendum', 'iii', 'she', '49', 'became', 'world', 'when', 'military', 'throne', 'and', 'home', 'sunak', 'ii', 'england', 'leader', 'from', 'countrys', 'cultural', 'two', 'leave', 'a', 'uks', 'northern', 'truss', 'members', 'withdrew', 'questions', 'kingdom', 'has', 'an', 'boris', 'industrial', 'european', 'global', 'mother', 'minibudget', 'both', 'party', 'century', 'cuts', 'had', 'around', 'ascended', 'johnson', 'after', 'with', 'took', 'democracy', 'as', 'influence', 'revolution', 'charles', 'prompted', 'penny', 'taken', 'of', 'her', 'conservative', 'liz', 'economic', 'september', 'to', 'vote', 'raised', 'turmoil', '2016', 'rival', 'just', 'remains', 'is', 'prime', 'king', 'mordaunt', 'only', 'remaining', 'political', 'significant

In [None]:
from collections import defaultdict
doc_vectors=defaultdict(list)
for i in unique_words:
  if i in set(doc1):
    doc_vectors[i].append(1)
  else:
    doc_vectors[i].append(0)
  if i in set(doc2):
    doc_vectors[i].append(1)
  else:
    doc_vectors[i].append(0)
  if i in set(doc3):
   doc_vectors[i].append(1)
  else:
    doc_vectors[i].append(0)

In [None]:
doc_vectors

defaultdict(list,
            {'economic': [1, 1, 0],
             'chancellor': [0, 1, 0],
             'final': [0, 0, 1],
             'charles': [1, 1, 0],
             'financial': [1, 0, 0],
             'wars': [1, 0, 0],
             'address': [0, 1, 0],
             'industrial': [1, 0, 0],
             'creating': [0, 0, 1],
             'leave': [1, 0, 0],
             'overtaking': [0, 0, 1],
             'remaining': [1, 0, 0],
             'international': [0, 0, 1],
             '2016': [1, 0, 0],
             'posed': [0, 1, 0],
             '2022': [1, 0, 1],
             'stability': [0, 1, 0],
             'leap': [0, 0, 1],
             'crisis': [0, 1, 1],
             'king': [1, 1, 0],
             'last': [0, 0, 1],
             'kingdom': [1, 0, 0],
             'appointed': [0, 1, 0],
             'part': [0, 1, 0],
             'hold': [0, 1, 0],
             'hunt': [0, 1, 0],
             'fourth': [0, 0, 1],
             'jeremy': [0, 1, 0],
             

In [None]:

def boolean_query():

  query=input("Enter the query:")
  query=word_tokenize(query)
  operator_words=[]
  query_words=[]

  for word in query:
      if word.lower() != "and" and word.lower() != "or" and word.lower() != "not":
        query_words.append(word.lower())
      else:
        operator_words.append(word.lower())

  if operator_words[0]=="and":
    query_op=np.array(doc_vectors[query_words[0]]) & np.array(doc_vectors[query_words[1]])
  elif operator_words[0]=="or":
    query_op=np.array(doc_vectors[query_words[0]]) | np.array(doc_vectors[query_words[1]])
  elif operator_words[0]=="not":
    query_op=1-np.array(doc_vectors[query_words[1]])
    query_op=np.array(doc_vectors[query_words[0]]) & query_op
  operator_words.pop(0)
  query_words.pop(0)
  query_words.pop(0)

  for word in operator_words:
    if word == "and":
      query_op = query_op & np.array(doc_vectors[query_words[0]])
      operator_words.pop(0)
      query_words.pop(0)
    elif word == "or":
      query_op = query_op | np.array(doc_vectors[query_words[0]])
      operator_words.pop(0)
      query_words.pop(0)
    elif word == "not":
      query_opt=1-np.array(doc_vectors[query_words[0]])
      query_op =query_op & query_opt
      operator_words.pop(0)
      query_words.pop(0)

  print(query_op)


In [None]:
boolean_query()

Enter the query:truss or support
[1 1 0]


In [None]:
boolean_query()

Enter the query:biden and russia
[0 1 0]
