# 1. PREPROCESSING THE DATA

## IMPORTING REQUIRED LIBRARIES

In [3]:
%pip install pandas
%pip install numpy
import pandas as pd
import numpy as np


Note: you may need to restart the kernel to use updated packages.


In [5]:
dataframe = pd.read_csv(r"C:\Users\kkmca\AIRIW-PROJECT-2024\zomato_dataset.csv") 
## PUT THE PATH OF zomato.csv 

In [None]:
dataframe.shape

(51717, 17)

## DROPPING THE RATING COLUMN

In [None]:
dataframe = dataframe.drop(['rate'], axis=1)

## TREATING SAME RESTAURANT AT DIFFERENT LOCATIONS AS ONE ENTITY TO REDUCE SIZE OF POSTING LIST IN LATER IMPLEMENTATIONS

In [None]:
dataframe['reviews_list'] = dataframe[['name','reviews_list']].groupby(['name'])['reviews_list'].transform(lambda x: ''.join(str(x)))

In [None]:
dataframe.shape

(51717, 16)

## DELETING ALL DUPLICATE COLUMNS

In [None]:
dataframe = dataframe[['name','reviews_list']].drop_duplicates()

In [None]:
dataframe.shape

(8792, 2)

In [None]:
dataframe

Unnamed: 0,name,reviews_list
0,Jalsa,"0 [('Rated 4.0', 'RATED\n A beautiful ..."
1,Spice Elephant,"1 [('Rated 4.0', 'RATED\n Had been here..."
2,San Churro Cafe,"2 [('Rated 3.0', ""RATED\n Ambience is ..."
3,Addhuri Udupi Bhojana,"3 [('Rated 4.0', ""RATED\n Great food and..."
4,Grand Village,"4 [('Rated 4.0', 'RATED\n Very good res..."
...,...,...
51607,Chime - Sheraton Grand Bengaluru Whitefield Ho...,"51607 [('Rated 4.0', 'RATED\n Nice and fri..."
51610,The Nest - The Den Bengaluru,"51610 [('Rated 5.0', 'RATED\n Great ambien..."
51614,Nawabs Empire,"51614 [('Rated 1.0', 'RATED\n This place i..."
51632,SeeYa Restaurant,"51632 [('Rated 4.0', 'RATED\n Good food, t..."


## MAKING ALL STRINGS LOWER CASE

In [None]:
dataframe['reviews_list'] = dataframe['reviews_list'].str.lower()

## REMOVING ERRONEOUS NUMBERS FROM THE REVIEWS

In [None]:
dataframe.reviews_list = dataframe.reviews_list.str.replace('\d+', '')

## REMOVING STOP WORDS

In [None]:
%pip install nltk
import nltk.corpus
nltk.download('stopwords')
stop = nltk.corpus.stopwords.words('english')

Note: you may need to restart the kernel to use updated packages.


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kkmca\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
dataframe['reviews_list'] = dataframe['reviews_list'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

## ONLY KEEPING WORDS PRESENT IN THE ENGLISH DICTIONARY

In [None]:
import nltk
nltk.download('words')
words = set(nltk.corpus.words.words())

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\kkmca\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


In [None]:
dataframe['reviews_list'] = dataframe['reviews_list'].apply(lambda x: ' '.join([word for word in x.split() if word in (words)]))

In [None]:
dataframe

Unnamed: 0,name,reviews_list
0,Jalsa,beautiful place beautiful place beautiful plac...
1,Spice Elephant,spice object
2,San Churro Cafe,ambience ambience ambience ambience ambience a...
3,Addhuri Udupi Bhojana,great food great food object
4,Grand Village,good restaurant good restaurant good restauran...
...,...,...
51607,Chime - Sheraton Grand Bengaluru Whitefield Ho...,nice friendly nice friendly nice friendly chim...
51610,The Nest - The Den Bengaluru,great ambience great ambience nest den object
51614,Nawabs Empire,place object
51632,SeeYa Restaurant,good take bit object


## RESETING INDEX

In [None]:
dataframe.reset_index(drop=True, inplace = True)

In [None]:
import nltk

## LEMMETIZING WITHOUT POS INDEX

In [None]:
# import nltk

# w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
# lemmatizer = nltk.stem.WordNetLemmatizer()

# def lemmatize_text(text):
#     return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]


# dataframe['review'] = dataframe.review.apply(lemmatize_text)

In [None]:
dataframe

Unnamed: 0,name,reviews_list
0,Jalsa,beautiful place beautiful place beautiful plac...
1,Spice Elephant,spice object
2,San Churro Cafe,ambience ambience ambience ambience ambience a...
3,Addhuri Udupi Bhojana,great food great food object
4,Grand Village,good restaurant good restaurant good restauran...
...,...,...
8787,Chime - Sheraton Grand Bengaluru Whitefield Ho...,nice friendly nice friendly nice friendly chim...
8788,The Nest - The Den Bengaluru,great ambience great ambience nest den object
8789,Nawabs Empire,place object
8790,SeeYa Restaurant,good take bit object


## Words like friendly do not get converted so lemmetizing with POS tag

## LEMMETIZING WITH POS TAG

SIZE OF DICTIONARY BEFORE LEMMETIZATION

In [None]:
d = set()
for words in dataframe.reviews_list.str.findall(r"\w+").map(set):
    for word in words:
        d.add(word)
print(len(d))

2398


In [None]:
# WORDNET LEMMATIZER (with appropriate pos tags)

import nltk
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK resources
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')  # Add this line to download the punkt tokenizer models
nltk.download('wordnet')  # Ensure that wordnet is also downloaded

from nltk.corpus import wordnet

lemmatizer = WordNetLemmatizer()

def pos_tagger(nltk_tag):
	if nltk_tag.startswith('J'):
		return wordnet.ADJ
	elif nltk_tag.startswith('V'):
		return wordnet.VERB
	elif nltk_tag.startswith('N'):
		return wordnet.NOUN
	elif nltk_tag.startswith('R'):
		return wordnet.ADV
	else:		
		return None

def pos_tagged(sentence):
    return nltk.pos_tag(nltk.word_tokenize(sentence))

def wordnet_tagged(sentence):
    return list(map(lambda x: (x[0], pos_tagger(x[1])), sentence))

def lem(sentence):
    lemmatized_sentence = []
    for word, tag in sentence:
        if tag is None:
            # if there is no available tag, append the token as is
            lemmatized_sentence.append(word)
        else:	
            # else use the tag to lemmatize the token
            lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return " ".join(lemmatized_sentence)


def final(sentence):
    sentence = pos_tagged(sentence)
    sentence = wordnet_tagged(sentence)
    sentence = lem(sentence)
    return sentence
    
dataframe['reviews_list'] = dataframe['reviews_list'].apply(final)


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kkmca\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kkmca\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kkmca\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
dataframe

Unnamed: 0,name,reviews_list
0,Jalsa,beautiful place beautiful place beautiful plac...
1,Spice Elephant,spice object
2,San Churro Cafe,ambience ambience ambience ambience ambience a...
3,Addhuri Udupi Bhojana,great food great food object
4,Grand Village,good restaurant good restaurant good restauran...
...,...,...
8787,Chime - Sheraton Grand Bengaluru Whitefield Ho...,nice friendly nice friendly nice friendly chim...
8788,The Nest - The Den Bengaluru,great ambience great ambience nest den object
8789,Nawabs Empire,place object
8790,SeeYa Restaurant,good take bit object


In [None]:
#estimating the size of the dictionary AFTER LEMMETIZATION

d = set()
for words in dataframe.reviews_list.str.findall(r"\w+").map(set):
    for word in words:
        d.add(word)
print(len(d))

2326


In [None]:
#4889 to 4633 is the length of the dictionary after lemmetization

In [None]:
#trying to compress posting list by using docid instead

## 2.  CREATING INVERTED INDEX

In [None]:

new_list = []
for i in range(dataframe.shape[0]):
    for j in dataframe.iloc[i,1].split():
        new_list.append([j,i])
new_list = sorted(new_list)
dict_index = {}
words = []
for i in new_list:
    if i[0] not in words:
        words.append(i[0])
        dict_index[i[0]] = [1,[i[1]]]
    else:  
        if(i[1] not in dict_index[i[0]][1]):    
            dict_index[i[0]][0]+=1
            dict_index[i[0]][1].append(i[1])

In [None]:
dict_index

{'aa': [1, [6184]],
 'absolute': [14,
  [647,
   863,
   1821,
   1931,
   2087,
   2611,
   2633,
   4050,
   4315,
   4806,
   4862,
   6456,
   7427,
   7563]],
 'absolutely': [34,
  [50,
   124,
   222,
   272,
   600,
   749,
   758,
   871,
   1992,
   2436,
   2559,
   2574,
   2668,
   2752,
   2845,
   2892,
   2978,
   3065,
   3391,
   3574,
   3790,
   3886,
   4233,
   4236,
   5425,
   5669,
   6011,
   6068,
   6304,
   6623,
   6807,
   6983,
   7500,
   7860]],
 'accept': [1, [4407]],
 'access': [2, [1307, 8544]],
 'accessible': [1, [4415]],
 'accidently': [1, [2826]],
 'accord': [2, [257, 6627]],
 'across': [4, [1063, 2566, 4087, 7212]],
 'actual': [2, [3598, 4900]],
 'actually': [10, [382, 688, 802, 1539, 1983, 1994, 2836, 3465, 4137, 5911]],
 'add': [4, [1890, 4020, 4260, 5994]],
 'adda': [3, [1599, 2853, 4032]],
 'addict': [2, [599, 3245]],
 'address': [1, [6274]],
 'admit': [1, [4072]],
 'admitted': [1, [4072]],
 'adore': [2, [2531, 3012]],
 'affa': [1, [5335]],
 

## CREATING BIGRAM INVERTED INDEX

In [None]:

bigrams = {}
words = []
for i in range(dataframe.shape[0]):
    for word in dataframe.iloc[i,1].split():
        if word not in words:
            words.append(word)
            new = '$'+word+'$'
            for i in range(len(word)):
                if new[i:i+2] not in bigrams:
                    bigrams[new[i:i+2]] = [word]
                else:
                    bigrams[new[i:i+2]].append(word)

                    

In [None]:
bigrams

{'$b': ['beautiful',
  'best',
  'bar',
  'base',
  'bad',
  'big',
  'bake',
  'buffet',
  'benne',
  'basically',
  'back',
  'breakfast',
  'behave',
  'baby',
  'become',
  'beer',
  'bos',
  'bowl',
  'behind',
  'bear',
  'birthday',
  'bazaar',
  'bucket',
  'beat',
  'blue',
  'bit',
  'buy',
  'butter',
  'busy',
  'bite',
  'bakery',
  'believe',
  'bugle',
  'bath',
  'brown',
  'brew',
  'brilliant',
  'box',
  'belly',
  'bedridden',
  'black',
  'bought',
  'bungalow',
  'boneless',
  'burnt',
  'budget',
  'bawarchi',
  'basic',
  'bet',
  'banana',
  'bong',
  'brand',
  'babu',
  'bond',
  'body',
  'builder',
  'bottle',
  'bib',
  'bean',
  'burrito',
  'bun',
  'baba',
  'bread',
  'brownie',
  'balcony',
  'boa',
  'boy',
  'berry',
  'bright',
  'bay',
  'batch',
  'brewery',
  'bubble',
  'b',
  'beetle',
  'brunch',
  'behavior',
  'bull',
  'business',
  'blanc',
  'brewing',
  'beef',
  'blueberry',
  'buff',
  'bahar',
  'broadway',
  'blow',
  'brimstone',
 

## INVERTED INDEX WITH POSITIONAL INFORMATION

In [None]:
new_list = []
for i in range(dataframe.shape[0]):
    count = 0
    for j in dataframe.iloc[i,1].split():
        new_list.append([j,i,count])
        count+=1
new_list = sorted(new_list)
dict_index = {}
words = []
for i in new_list:
    if i[0] not in words:
        words.append(i[0])
        dict_index[i[0]] = [1,{i[1]:[i[2]]}]
    else:
        if i[1] not in dict_index[i[0]][1]:            
            dict_index[i[0]][0]+=1
            dict_index[i[0]][1][i[1]] = [i[2]]
        else:
            dict_index[i[0]][1][i[1]].append(i[2])
print(dict_index)


{'aa': [1, {6184: [1, 3]}], 'absolute': [14, {647: [0, 6, 8, 18, 20], 863: [2, 23], 1821: [29], 1931: [0], 2087: [6], 2611: [0, 1, 2], 2633: [0, 2, 3, 4, 5], 4050: [26, 28, 30, 32], 4315: [0, 1, 2, 3, 4, 5], 4806: [0, 1, 2, 3], 4862: [4], 6456: [0, 2], 7427: [4], 7563: [3]}], 'absolutely': [34, {50: [8, 19, 48, 54], 124: [0], 222: [33], 272: [20, 28, 30, 65], 600: [6, 14, 15, 60], 749: [0, 2, 4, 6], 758: [8], 871: [0, 2, 4, 6, 8, 10, 12, 14], 1992: [0], 2436: [0, 2, 4, 6, 8, 10, 12, 14, 16], 2559: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22], 2574: [11], 2668: [17, 19, 21, 23, 25, 27, 29], 2752: [18, 20], 2845: [0, 2, 4, 6, 8, 10, 12, 14, 16], 2892: [14, 16, 20, 22], 2978: [6, 8, 10], 3065: [0, 1, 2, 3, 4], 3391: [6, 8], 3574: [0, 2, 4], 3790: [16], 3886: [18, 19], 4233: [14, 15], 4236: [0, 6, 8, 10], 5425: [0], 5669: [0, 2], 6011: [4, 5, 6, 7], 6068: [0, 2, 4, 6], 6304: [0, 2, 4, 6], 6623: [0], 6807: [0, 1, 2], 6983: [0, 2], 7500: [0, 2, 4], 7860: [0]}], 'accept': [1, {4407: [1, 3, 5, 

In [None]:
#when positional information is included
#dict_index is a dictionary structure
#each word is a key
#The value of each word is a list where first element is document frequency and 
#second element is a dictionary with doc id as key and positional info as a list

In [None]:
%pip install scikit-learn
import pandas as pd
import numpy as np
import re
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity




# FUNCTION TO CALCULATE COSINE SIMILARITY

In [None]:
def preprocess(text):
    text = str(text).lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text


def cosine_similarity(query_vector, doc_vector):
    dot_product = np.dot(query_vector, doc_vector)
    query_norm = np.linalg.norm(query_vector)
    doc_norm = np.linalg.norm(doc_vector)
    if query_norm == 0 or doc_norm == 0:
        return 0
    else:
        return dot_product / (query_norm * doc_norm)




# SEARCH FUNCTION TAKES CARE OF CREATING VECTORS, WILDCARD QUERIES,  SIMILARITY SCORES AND RANKING RESULTS

In [None]:
def search(query, feedback=False):
    query = preprocess(query)
    query_terms = query.split()
    # Generate vocab and query vector
    vocab = list(set(query_terms))
    query_vector = np.zeros(len(vocab))
    for i, term in enumerate(vocab):
        query_vector[i] = query_terms.count(term)
        
    # Generate phrase vectors
    phrase_vectors = []
    for i in range(len(query_terms)-1):
        if query_terms[i] == '"' and '"' in query_terms[i+1:]:
            j = i+1+query_terms[i+1:].index('"')
            phrase = ' '.join(query_terms[i:j+1])
            phrase_terms = phrase.split()
            phrase_vector = np.zeros(len(vocab))
            for k, term in enumerate(vocab):
                if term in phrase_terms:
                    phrase_vector[k] = phrase_terms.count(term)
            phrase_vectors.append((phrase, phrase_vector))
            
    # Generate wildcard regexes
    wildcard_regexes = []
    for term in query_terms:
        if '*' in term:
            regex = term.replace('*', '\w+')
            wildcard_regexes.append(regex)
            
            
    # Compute similarity scores
    scores = []
    for i,reviews_list in enumerate(dataframe['reviews_list']):######
        terms = reviews_list.split()
        doc_vector = np.zeros(len(vocab))
        for j, term in enumerate(vocab):
            doc_vector[j] = terms.count(term)
        # Check phrase queries
        phrase_match = True
        for phrase, phrase_vector in phrase_vectors:
            if phrase not in reviews_list:
                phrase_match = False
                break
            phrase_score = cosine_similarity(phrase_vector, doc_vector)
            phrase_match = phrase_match and (phrase_score > 0)
        if not phrase_match:
            continue
        # Check wildcard queries
        
        for regex in wildcard_regexes:
            if not any(re.match(regex, t) for t in terms):
                continue
        score = cosine_similarity(query_vector, doc_vector)
        scores.append((i, score))
        
        
    # Rank results
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    results = []
    for i, score in scores:
        reviews_list = dataframe.loc[i]['reviews_list']
        name = dataframe.loc[i]['name']
        results.append((name, reviews_list, score))
        

    # Re-rank results using relevance feedback
    if feedback:
        relevant_docs = []
        nonrelevant_docs = []
        for i, (name, reviews_list, score) in enumerate(results):
            print(f'Review {i+1}:')
            print(name)
            print(reviews_list)
    
            print(f'Similarity score: {score}')
            feedback = input('Is this review relevant? (y/n): ')
            if feedback.lower() == 'y':
                relevant_docs.append(i)
            else:
                nonrelevant_docs.append(i)
        relevant_scores = [score for i, score in enumerate(scores) if i in relevant_docs]
        nonrelevant_scores = [score for i, score in enumerate(scores) if i in nonrelevant_docs]
        if len(relevant_scores) > 0:
            mean_relevant_score = sum(relevant_scores) / len(relevant_scores)
        else:
            mean_relevant_score = 0
        if len(nonrelevant_scores) > 0:
            mean_nonrelevant_score = sum(nonrelevant_scores) / len(nonrelevant_scores)
        else:
            mean_nonrelevant_score = 0
        alpha = 0.1
        beta = 0.1
        new_scores = []
        for i, (name,reviews_list , score) in enumerate(results):
            if i in relevant_docs:
                new_score = (1-alpha)*score + alpha*mean_relevant_score
            elif i in nonrelevant_docs:
                new_score = (1-beta)*score - beta*mean_nonrelevant_score
            else:
                new_score = score
            new_scores.append((i, new_score))
        new_scores = sorted(new_scores, key=lambda x: x[1], reverse=True)
        results = []
        for i, score in new_scores:
            reviews_list = dataframe.loc[i]['review']
            name = dataframe.loc[i]['name']
            results.append((name, reviews_list, score))
    return results



# RESULTS OF SEARCH

In [None]:
search("tasty cake")

[('The Chocolate Heaven - Cakes',
  'tasty cake tasty cake nice decent place nice decent place nice decent place nice decent place tasty cake tasty cake chocolate heaven object',
  0.9999999999999998),
 ('Me And My Cake',
  'order order cake order order cake bad bad bad cake cake cake bad one worst one one one get cake get cake object',
  0.7071067811865476),
 ('The Cake Ville',
  'awesome cake awesome cake awesome cake awesome cake awesome cake awesome cake cake object',
  0.7071067811865476),
 ('Status',
  'tasty pocket tasty pocket tasty pocket tasty pocket best best tasty pocket tasty pocket tasty pocket best best best object',
  0.7071067811865476),
 ('Namms Cafe',
  'good tasty food good tasty food good tasty food good tasty food good tasty food good tasty food good tasty food object',
  0.7071067811865476),
 ('Tasty Bytes', 'nice nice nice tasty object', 0.7071067811865475),
 ('Cake of the Day',
  'good good resist resist resist resist pocket friendly cake pocket friendly cake r

# LOADING PRETRAINED MODEL FOR SEMANTIC MATCHING

In [None]:
%pip install gensim


import gensim.downloader as api

# List available models/datasets
print(api.info())

# Download a pre-trained Word2Vec model
model = api.load("word2vec-google-news-300")

Note: you may need to restart the kernel to use updated packages.
{'corpora': {'semeval-2016-2017-task3-subtaskBC': {'num_records': -1, 'record_format': 'dict', 'file_size': 6344358, 'reader_code': 'https://github.com/RaRe-Technologies/gensim-data/releases/download/semeval-2016-2017-task3-subtaskB-eng/__init__.py', 'license': 'All files released for the task are free for general research use', 'fields': {'2016-train': ['...'], '2016-dev': ['...'], '2017-test': ['...'], '2016-test': ['...']}, 'description': 'SemEval 2016 / 2017 Task 3 Subtask B and C datasets contain train+development (317 original questions, 3,169 related questions, and 31,690 comments), and test datasets in English. The description of the tasks and the collected data is given in sections 3 and 4.1 of the task paper http://alt.qcri.org/semeval2016/task3/data/uploads/semeval2016-task3-report.pdf linked in section “Papers” of https://github.com/RaRe-Technologies/gensim-data/issues/18.', 'checksum': '701ea67acd82e75f95e1d

In [None]:
def tokenize(query):
    query = preprocess(query)
    query_terms = query.split()
    # Generate vocab and query vector
    query_tokens = list(set(query_terms))
    return query_tokens
    

In [None]:
def semantic_matching(query, dataframe):
    # Tokenize query
    query_tokens = tokenize(query)
    # Convert query terms to vectors
    query_vectors = [model[word] for word in query_tokens if word in model.key_to_index]
    # Compute mean vector of query
    query_vector = np.mean(query_vectors, axis=0)
    # Compute similarity between query vector and document vectors
    results = []
    for i, row in dataframe.iterrows():
        document = row['reviews_list']
        name = row['name']
        document_tokens = tokenize(document)
        document_vectors = [model[word] for word in document_tokens if word in model.key_to_index]
        if len(document_vectors) > 0:
            document_vector = np.mean(document_vectors, axis=0)
            similarity = cosine_similarity(np.squeeze(np.asarray(query_vector)),np.squeeze(np.asarray(document_vector)))
            results.append((name, document, similarity))
    # Sort results by similarity score
    results = sorted(results, key=lambda x: x[2], reverse=True)
    return results


# RESULTS OF SEMANTIC MATCHING

In [None]:
results = semantic_matching("tasty cakes", dataframe)

In [None]:
results

[('Tasty Khaana Khazana',
  'delicious food delicious food tasty object',
  0.7783584),
 ('The Chocolate Heaven - Cakes',
  'tasty cake tasty cake nice decent place nice decent place nice decent place nice decent place tasty cake tasty cake chocolate heaven object',
  0.7212344),
 ('Apple Cafe', 'delicious yummy apple object', 0.7184376),
 ('99Foods', 'nice tasty food object', 0.6888607),
 ('Chicken Dera',
  'tasty food tasty food tasty food tasty food chicken object',
  0.68803644),
 ('Saatvikk',
  'food tasty food tasty food tasty food tasty food tasty food tasty food tasty food tasty food tasty food tasty object',
  0.6829584),
 ('Mathsya Darshini', 'tasty food object', 0.6829584),
 ('Svi Food And Juice', 'tasty food object', 0.6829584),
 ('Terra Bites',
  'tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy tasty food cosy object',
  0.6829584),
 ('Food Corner

In [None]:
import tkinter as tk
from tkinter import scrolledtext

class IRSystemGUI:
    def __init__(self, master):
        self.master = master
        master.title("Bengaluru Restaurant Choice Assistant")

        # Create title label
        self.title_label = tk.Label(master, text="Welcome to Restaurant Choice Assistant", font=("Helvetica", 16, "bold"))
        self.title_label.pack(pady=10)

        # Create text box for input query
        self.query_label = tk.Label(master, text="Enter Food item or (something related to food or restaurant):", font=("Helvetica", 12))
        self.query_label.pack()
        self.query_box = tk.Entry(master, font=("Helvetica", 12))
        self.query_box.pack(pady=5)




        # Create button to initiate search
        self.search_button = tk.Button(master, text="Go", command=self.search, font=("Helvetica", 12, "bold"))
        self.search_button.pack(pady=5)

        # Create text box for displaying results
        self.results_label = tk.Label(master, text="Results:", font=("Helvetica", 12))
        self.results_label.pack()
        self.results_box = scrolledtext.ScrolledText(master, height=15, width=60, font=("Helvetica", 12))
        self.results_box.pack(pady=5)

    def search(self):
        query = self.query_box.get()
        # Perform search operation using IR system and get results
        results = [search(query)[i][0] for i in range(10)]

        # Clear results box
        self.results_box.delete(1.0, tk.END)

        # Display results in results box
        for result in results:
            self.results_box.insert(tk.END, result + "\n")


# Create GUI instance and start main event loop
root = tk.Tk()
ir_system_gui = IRSystemGUI(root)
root.mainloop()
