# Homework 3 - Find the perfect place to stay in Texas!
### Group 14 - PavanKumar Alikana, Matteo Cavalletti, Francesca Porcu

The homework consists in analyzing the text of Airbnb property listings and building a search engine.

In [6]:
#Import required libraries
import pandas as pd
# For displaying search results in a table
from IPython.display import HTML, display
from os.path import join as pjoin
import csv

# For persisting indexes in an external file
import pickle
import math
import heapq
from pathlib import Path

# For word tokenization
import nltk
import re
import os
from nltk.tokenize import RegexpTokenizer
# For stop words list
from nltk.corpus import stopwords
# For word stemming
from nltk.stem.snowball import SnowballStemmer

#First we import stopwords from nltk
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
#To remove punctuation we use regexptokenizer, but we leave dollar symbol $ because maybe is used in some queries
tokenizer = RegexpTokenizer(r'\w+|\$')
#we create the stemmer
ps = SnowballStemmer('english')

# Path to the current working directory to refer to all the files relatively
my_path = os.path.dirname(os.path.realpath('__file__'))

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/vamsigunturi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Step 1: Data

In [16]:
#Reading the main CSV file
m = pd.read_csv("Airbnb_Texas_Rentals.csv")
doc_len = len(m)

#We found '\\t','\\n' and '\\r' in the dataset so we cleaned it, replacing them with spaces. This is necessary because
#they corrupt some words creating problems during stemming procedure.
m = m.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\\n',  ' ', regex=True)
m = m.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\\t',  ' ', regex=True)
m = m.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\\r',  ' ', regex=True)

#To create tsv files we droped the first column because it's not useful
m = m.drop(['Unnamed: 0'], axis=1)

# Step 2: Create documents

In [None]:
#Here we create a separate csv file for every row in the reviews csv file
for i in range(len(m)):
    with open(os.path.join(my_path, 'docu_hw3/doc_' + str(i) + '.tsv', 'w', newline='',encoding='utf-8') as output:
        tsv_output = csv.writer(output, delimiter='\t')
        tsv_output.writerow(m.iloc[i])

# Step 3: Search Engine

At this point we create the ***vocabulary***. We don't modify directly tsv files because we need them for the output of the search engines, but we create a dictionary that assigns to each file words that would have contained if we had preprocessed them. In particular, we apply to the words contained in each file these procedures:
- *Removing stopwords*
- *Removing punctuation*
- *Stemming*
- *Lower-case letters*

## 3.1) Conjunctive query

### 3.1.1) Create your index!

In [7]:
review_content_persist = {}
vocabulary_persist = {}
words_persist = {}

# Retrieving persisted information for review content and word map
# Please create a directory(in your current working directory) with name 'indexes'  
content_file = Path(os.path.join(my_path, "indexes/review_content.pkl"))
vocabulary_file = Path(os.path.join(my_path, "indexes/vocabulary.pkl"))
words_file = Path(os.path.join(my_path, "indexes/words.pkl"))


# Retrieving already persisted information

# Check if the index file exists, 
#if yes load the previously persisted indexes and content
if content_file.is_file():
    with open(content_file, "rb") as review_content:
        review_content_persist = pickle.load(review_content)
        review_content.close()
        
# Check if the vocabulary file exists, 
#if yes load the previously persisted vocabulary
if vocabulary_file.is_file():
    with open(vocabulary_file, "rb") as vocabulary:
        vocabulary_persist = pickle.load(vocabulary)
        vocabulary.close()
        
# Check if the words file exists, 
#if yes load the previously persisted words
if words_file.is_file():
    with open(words_file, "rb") as words:
        words_persist = pickle.load(words)
        words.close()

if(len(review_content_persist.keys()) == 0):
    
    review_word_map = {}
    
    # We reach here if we don't have indexes already present
    print("Indexes are being created")
    
    #we create the vocabulary of preprocessed documents,but we don't modify the documents because 
    #we'll use them in search engine
    
    for i in range(doc_len):
        with open(os.path.join(my_path, 'docu_hw3/doc_' + str(i) + '.tsv'),encoding='utf8') as tsvfile:
             tsvreader = list(csv.reader(tsvfile, delimiter="\t"))
        
        # For review title
        l1 = tsvreader[0][4]
        
        # For review content
        l2 = tsvreader[0][7]
        
        l = l1+ ' ' +l2
        l = l.lower()
        l = tokenizer.tokenize(l)
        
        # This array will contain all the valid words in a given review after removing 
        # all the stop words, punctuations, stemming etc..,, we will use this information
        # to find out the term frequency there by tf-idf values
        file_words = []
        
        for r in l :
            if not r in stop_words:
                sr = ps.stem(r)
                
                file_words.append(sr)
                
                if not  sr in review_word_map:
                    review_word_map[sr] = [i]
                else:
                    review_word_map[sr]+=[i]
                    
                    
        review_content_persist[i] = ' '.join(file_words)
    
    # Saving the content and indexes for the first time
    # We made use of pickle python module
    #Saving content dictionary
    with open(content_file, "wb") as review_content:
        pickle.dump(review_content_persist, review_content)
        review_content.close()
    
    # Word and Vocabulary indexes based on word map
    c = 0
    for key in review_word_map:
        words_persist[key] = c
        vocabulary_persist[c] = review_word_map[key]
        c += 1
    
    #Save vocabulary and words
    with open(vocabulary_file, "wb") as vocabulary:
        pickle.dump(vocabulary_persist, vocabulary)
        vocabulary.close()
        
   
    with open(words_file, "wb") as words:
        pickle.dump(words_persist, words)
        words.close()
    
    
                

            

### 3.1.2) Execute the query

In [8]:
word = input('Enter a search query: ')

# Cleaning user input similar to what we did for creating indexes for words
def clean_input(w):
    w_list = []
    w = w.lower()
    w = tokenizer.tokenize(w)
    
    for r in w :
        if not r in stop_words:
            sr = ps.stem(r)
            if not  sr in w_list:
                w_list.append(sr)
    return w_list

# Show search results in tabular format
def show_results(results, doc_list, isScore):
    
    if(len(doc_list)):
        print('Found ' + str(len(doc_list))  + ' matching reviews to your query')
    
    if(len(results)):
        if(isScore):
            tableFormat = '<table border="1"><tr><th>Title</th><th>Description</th><th>City</th><th>URL</th><th>Score</th></tr><tr>{}</tr></table>'
        else:
            tableFormat = '<table border="1"><tr><th>Title</th><th>Description</th><th>City</th><th>URL</th></tr><tr>{}</tr></table>'
        
        
        display(HTML(tableFormat.format('</tr><tr>'.join('<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in results)
)))
    else:
        display(HTML('<h1>No results found. Please try a different query</h1>'))
    


word_list = clean_input(word)

# print("Cleaned word: ", word_list)

list_doc_list = []

for w in word_list:
        doc_list = []
        
        if w in words_persist: 
            doc_list = vocabulary_persist[words_persist[w]]
            
        list_doc_list.append(doc_list)  

# Initially assinging the list intersection to the matching documents of first word
list_intersect = list_doc_list[0]

def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3 
        
for docList in list_doc_list:
    list_intersect = intersection(list_intersect, docList)
    

def process_text_common(text):
    textList = text.split(' ')
        
    if(len(textList) > 100):
        textList = textList[:100]
        
    return ' '.join(textList)    
        
results = []

# Removing the duplicates in the document intersection
list_intersect = list(set(list_intersect))

i_len = len(list_intersect)

if(i_len):
    
    r_limit = 10
    
    if(i_len < 10):
        r_limit = i_len
    
    # Showing at most ten results
    for doc in list_intersect[:r_limit]:
        
        # Reading each document based on document id in list intersect 
        with open(os.path.join(my_path, 'docu_hw3/doc_' + str(doc) + '.tsv'),encoding='utf8') as tsvfile:
             tsvreader = list(csv.reader(tsvfile, delimiter="\t"))

        title = tsvreader[0][7]        
        description = tsvreader[0][4]
        
        description = process_text_common(description)
        
        city = tsvreader[0][2]
        url = tsvreader[0][8]

        results.append([title, description, city, url])

# Displaying the results
show_results(results, list_intersect, False)

Enter a search query: beautiful house
Found 797 matching reviews to your query


Title,Description,City,URL
Unique Location! Alamo Heights - Designer Inspired,"Stylish, fully remodeled home in upscale NW – Alamo Heights Area. Amazing location - House conveniently located in quiet street, with beautiful seasoned trees, prestigious neighborhood and very close to the airport, 281, 410 loop and down-town area. Featuring an open floor plan, original hardwood floors, 3 bedrooms, 3 FULL bathrooms + an independent garden-TV room which can sleep 2 more European inspired kitchen and “top of the line” decor. Driveway can park 4 cars.",San Antonio,https://www.airbnb.com/rooms/17481455?location=Cibolo%2C%20TX
Friendly Private Room in َQuiet Neighborhood,"This is a beautiful bedroom with a queen size bed and closet. We do not have pets and the house is always clean. The bathroom is shared and supplies such as towels and shampoo are available. We are only some miles from Downtown, TCU, TCC, and Stockyards.",Fort Worth,https://www.airbnb.com/rooms/18977363?location=Cleburne%2C%20TX
East Austin Hillside Gem,"Beautiful and modern 3Br, 2.5Ba located minutes from downtown. Amenities include front deck, back patio, peaceful backyard with tiered gardens and fire pits. There is also gym equipment, wifi, Xbox, and cable with HBO, Showtime, Starz, etc. There is free offstreet parking for two cars. Perfect location for families(with kids or furry friends,) couples, business travelers or friends looking for a spacious and comfortable location with plenty of amenities. This house has beautiful sunset views of downtown Austin just 6 miles from Darrell K Royal Stadium, 8 miles from Zilker Park and 14 miles from Circuit of the",Austin,https://www.airbnb.com/rooms/17555039?location=Bastrop%20County%2C%20TX
A Cozy Home with a Beautiful Nature Views,"This home is on the North Side of San Antonio, 3 minutes away from a great golfing course and a JW Marriott Resort and Spa. This is a great stopping place for a family and comes with all amenities from cable, internet to a laundry room and 2 bathrooms. This house is against a green belt so you can enjoy the back yard and nature.This area is safe and my neighbors are also great! Hope you enjoy your stay and more so enjoy San Antonio!",San Antonio,https://www.airbnb.com/rooms/19190311?location=Cibolo%2C%20TX
Cottage in the Oaks,"This cottage sits on a 2 acre lot with beautiful swayed oaks. Large front patio which is GREAT for entertaining or just relaxing and watching the sun set. This house has a backyard with a small deck and ivy covered oaks. Inside, there are two bedrooms and two bathrooms. Smart TV in living room with ROKU technology. Additional smart TV in master bedroom. Fully stocked kitchen (just bring your own groceries!) Washer and dryer available. Only 200 yards from the bay! Great for families.",Rockport,https://www.airbnb.com/rooms/16585732?location=Bayside%2C%20TX
"The Woodlands, BEAUTIFUL HOME, 1 Floor, 2 BT, 3 BD","Attractions: The Woodlands, incredible views, golf courses, theme parks with tennis courts and lakes, pools, cycle paths, tranquility and quality of life for the family. You will love my house because of its cozy spaces with high ceilings in an excellent location, beautiful views, and proximity to the great shopping areas. It is a single story house with a large garden; ideal for couples, business travelers, and families with children.",Spring,https://www.airbnb.com/rooms/13065223?location=Conroe%2C%20TX
Super Bowl Bungalow Near Downtown & Stadium!!!,"Super Bowl Ready!!! Close to Downtown, Midtown, Montrose and the Stadium! Charming corner lot bungalow in quiet historic neighborhood. Jogging trail along bayou. Across from beautiful cemetery. One private bedroom with a queen and lock on door. Back bedroom with pull out queen and back door entrance. Middle bedroom with queen (must pass through middle bedroom to access back bedroom). Two large couches in living. Small house with only one bathroom that can sleep 8 adults.",Houston,https://www.airbnb.com/rooms/17006488?location=Channelview%2C%20TX
Beautiful Home with tons of Amenities,"Hey traveler! This listing is for a spacious 5 bedroom home. Four rooms are equipped with queen sized mattress, plenty of closet space, an wall to wall carpet. There's also a theater room, large backyard with a pool, hot tub, and space to hangout! The house is in a quiet neighborhood and just minutes away from shops, and 15 minutes from Downtown Dallas.",Dallas,https://www.airbnb.com/rooms/17920483?location=Addison%2C%20TX
Fortunata Casa Felicita,"3 bedroom 2 bath private home with sleeping accommodations for 10. Bedroom 1 consists of 1 King Bed, Bedroom 2 1 Queen Bed, Bedroom 3 2 Sets of Bunk Beds, Living Room has sleeper sofa. 2 Bathrooms, all rooms share full kitchen and beautiful outdoor patio! Booking is for whole house. All Villas provide free Wifi, Cable TV, private coffee bar, toiletries, plush robes, private entrance, parking, and access to the tasting room within walking distance.",Aubrey,https://www.airbnb.com/rooms/18462311?location=Corinth%2C%20TX
"2 bedrooms with exotic pool, home away from home","Beautiful home with 2 bedrooms available, a brand-new deluxe pool, parking space, and located in a very quiet and safe neighborhood. Moreover, if you would like to go grocery shopping, to the pharmacy, get some gas, to the bank, to get a quick bite, to have fun in a nightlife setting, or if in case of an accident you need to get a car part you will be able to find all of these locations only 3-5 minutes away from the house. The house and rooms are a perfect fit for a stay and rest getaway.",San Antonio,https://www.airbnb.com/rooms/19131969?location=Converse%2C%20TX


## 3.2) Conjunctive query & Ranking score

### 3.2.1) Inverted index

In [9]:
iindex_tf_idf_persist = {}


# Check if the index file exists, if yes load the previously persisted indexes and content
# Please create a directory(in your current working directory) with name indexes  
index_file = Path(os.path.join(my_path, "indexes/iindex_tf_idf.pkl"))

# Check if the index file exists, if yes load the previously persisted indexes
if index_file.is_file():
    # Retriving precreated inverted indexes
    with open(index_file, "rb") as iindex_tf_idf:
        iindex_tf_idf_persist = pickle.load(iindex_tf_idf)
        iindex_tf_idf.close()
        
    
if(len(iindex_tf_idf_persist.keys()) == 0):
    
    print("Inverted Indexes are being calculated")

    word_iindex = {}

    #Creating inverted index using tf-idf and consine similarity
    for word in words_persist:
        word_doc_list = vocabulary_persist[words_persist[word]]
        word_iindex[word] = []

        # Store indexes based on number of times a particular word is present in a given document
        for doc in word_doc_list:
            doc_content = review_content_persist[doc]
            # Pushing the term frequency with document id
            word_iindex[word].append([doc, doc_content.split().count(word)])

    # Store indexes based on tf-idf
    docs_length = len(review_content_persist.keys())
    iindex_tf_idf_persist = word_iindex

    for key, word in iindex_tf_idf_persist.items():
        # find out the relative importance of a particular terms relating it to document count
        idf= math.log10( docs_length / len(word) )

        for elem in word:
            # Add the document score corresponding to a particular term which we then use in the 
            # search results ranking of documents
            elem[1] = idf * elem[1]
    
    # Persisting the indexes calculated 
    with open(index_file, "wb") as iindex_tf_idf:
        pickle.dump(iindex_tf_idf_persist, iindex_tf_idf)
        iindex_tf_idf.close()
        


### 3.2.2) Execute the query

In [10]:
dict_qcos = {}
dict_norm = {}

#print("Search started")

# Need to remove this 100 hardcoding
for doc in list_intersect:
    
    num = 0
    
    #print("Current document ID: " + str(doc))
    
    # Calculating numerator of the cosine similarity equation
    
    for word in word_list:
        w_index = iindex_tf_idf_persist[word]
        w_i_len = len(w_index)
        for i in range(w_i_len):
            if w_index[i][0] == doc: 
                num +=  w_index[i][1]
                
    dict_qcos[doc]=num

    
    # Calculating denominator of the cosine similarity equation
    norm = 0
    for word in iindex_tf_idf_persist.values():
        for i in range(len(word)):
            if word[i][0] == doc:
                norm +=  word[i][1]**2
    
    dict_norm[doc]=math.sqrt(norm)

#print("Numerator and Denominator calculated")

# Once numerator and denominator is calculated find the score of each document in the intersection list 
# By applying the consine similarity formala
for doc,num in dict_qcos.items():
    # Eleminating divided by zero problem to check if the normalization value for a document is non-zero
    if dict_norm[doc] != 0:
        dict_qcos[doc] = num/(math.sqrt(len(word_list))*dict_norm[doc])

#print("Cosine similarity done")

In [11]:
# Applying heap data structure to print to top-k documents

h = []
results = []

for doc in dict_qcos.keys():
    
    # Reading the document meta data to print in the search results
    
    with open(os.path.join(my_path, 'docu_hw3/doc_' + str(doc) + '.tsv'),encoding='utf8') as tsvfile:
         tsvreader = list(csv.reader(tsvfile, delimiter="\t"))
    
    title = tsvreader[0][7]        
    description = tsvreader[0][4]
    description = process_text_common(description)
    city = tsvreader[0][2]
    url = tsvreader[0][8]
    
    # Pushing the document information to heap data structure
    
    heapq.heappush(h,(dict_qcos[doc], title, description, city, url))

#print("Applying Heap")    
# Applying max heap algorithm
heapq._heapify_max(h)

limit = 10
doc_len = len(dict_qcos.keys())

if(doc_len < 10):
    limit = doc_len

for i in range(limit):
    # Popping the document with maximum score at every step and adding it to the result list
    
    # Since the data structure used in heap is a tuple, we convert it in to list for ease of manipulation
    results.append(list(heapq.heappop(h)))
    
    # re-applying the max heap algorithm
    heapq._heapify_max(h)

#print("Applying Heap")  

#print(results)
results_formatted = []

for i in results:
    
    first = round(i.pop(0), 4)
    i.append(first) 
    #first = round(float(results[i].pop(0)), 4)
    #results[i] = results[i].append(first)
    results_formatted.append(i)


# Displaying the results
show_results(results_formatted, [], True)

Title,Description,City,URL,Score
"Beautiful, New Beach House","Our house is close to restaurants and dining, the beach, and family-friendly activities. You’ll love our house because of the coziness, the high ceilings, the views, the location, and the people. Our house is good for couples, solo adventurers, and families (with kids).",Crystal Beach,https://www.airbnb.com/rooms/15068378?location=Anahuac%2C%20TX,0.9222
Private room in spacious 3 bedroom house,"Me and a roommate live in this spacious 2,500 square feet single story house. The house is walking distance to Warren Sports Complex (sports park with multiple fields and a beautiful lake.) The house is also close to restaurants and dinning, and all of Frisco's new development. You will not be disappointed, the house has a private pool, an office space, and comfortable living areas. This place is great for business travelers, or any solo adventurers.",Frisco,https://www.airbnb.com/rooms/11591124?location=Celina%2C%20TX,0.7225
Quiet place,Beautiful house,New Caney,https://www.airbnb.com/rooms/16743790?location=Cleveland%2C%20TX,0.7002
Quite Spacious Private Room in Beautiful House,"Our house is in a small and quite family neighborhood. We have a very spacious family home. My wife and I work a lot and are rarely here but keep the house very clean. The house is located right off I-10 while quite a bit of restaurants, shopping centers, and enertainment close to our house.",Houston,https://www.airbnb.com/rooms/18784719?location=Cinco%20Ranch%2C%20TX,0.6543
Zilker House - Beautiful and Modern,"Wonderful house with 2 queen bedrooms and a bathroom. Completely updated. House is stocked with linens, kitchen and bath essentials. Quite and safe, but close to the action. House has wifi. Max 4 adults / 2 kids. Easy walking distance to lots of places to eat / drink and shop.",Austin,https://www.airbnb.com/rooms/911955?location=Colorado%20River%2C%20TX,0.6532
Zilker House - Beautiful and Modern,"Wonderful house with 2 queen bedrooms and a bathroom. Completely updated. House is stocked with linens, kitchen and bath essentials. Quite and safe, but close to the action. House has wifi. Max 4 adults / 2 kids. Easy walking distance to lots of places to eat / drink and shop.",Austin,https://www.airbnb.com/rooms/911955?location=Brazos%20River%2C%20TX,0.6532
Private house/beautiful grounds on Lake Granbury!,"The Pool House is a private 700 sq foot house with a bedroom/king bed, a galley kitchen with full size refrigerator, living room, and large bathroom with double vanities and a large glassed in shower. The Pool House is a short walk to downtown Granbury for shopping, dining and entertainment. You’ll love this house because of the lake views, the location (within walking distance to shopping and dining), the coziness, and the saltwater pool. My place is good for couples and business travelers.",Granbury,https://www.airbnb.com/rooms/13330174?location=Cleburne%2C%20TX,0.6238
Beautiful private room at the lake house,"Beautiful room with a queen size in my gorgeous lake home. Access to the kitchen,living room and laundry. I have a beautiful deck that overlooks the water and a boathouse with hammocks to fish on and relax in. It's a fabulous place to come and just relax.",Bullard,https://www.airbnb.com/rooms/17249703?location=Athens%2C%20TX,0.6095
The Beach House Too,"Beautiful new house with all the amenities of home! 3 bedrooms, 2 baths -- Stunning open floor plan , beautiful kitchen with bar area. Central Air/Heat, Microwave, Dishwasher, Washer/Dryer. No carpet , all tile floor . Features 4 flat screen tvs and a dvd player. Large covered decks front and back,downstairs outdoor shower , short walk to the beach, House is located in quiet area of beach that is wider and has less traffic. Great family house and area",Bolivar Peninsula,https://www.airbnb.com/rooms/18568358?location=Bolivar%20Peninsula%2C%20TX,0.5875
The Beach House Too,"Beautiful new house with all the amenities of home! 3 bedrooms, 2 baths -- Stunning open floor plan , beautiful kitchen with bar area. Central Air/Heat, Microwave, Dishwasher, Washer/Dryer. No carpet , all tile floor . Features 4 flat screen tvs and a dvd player. Large covered decks front and back,downstairs outdoor shower , short walk to the beach, House is located in quiet area of beach that is wider and has less traffic. Great family house and area",Bolivar Peninsula,https://www.airbnb.com/rooms/18568358?location=Anahuac%2C%20TX,0.5875


# Step 4: Define a new score!

New score is based on:
- *average price per night (av_r_n_u)*
- *n° of bedrooms (bedr_c_u)*
- *zone (zone_u)*

**Average price per night**: we simply use average price per night from the tsv files and from this variable any house can get at maximum 2 points and at least 0. Maximum number of points (2) is achieved if the price is lower than half of the price suggested by the user, instead if price is two times the requested price the house get 0. To get 1 point you need to have the same price or lower than 1.25 times the suggested price. Naturally, other intermediate marks can be achieved.

**n° of bedrooms**: if the n° of bedrooms requested are more or the same, house gets 1 point; if there are less bedrooms, house gets a lower mark. If there are three bedrooms less than requested or worse, house gets 0 point. 

**zone**: We found a set of coordinates (31,169621, -99,683617) that can be considered the center of texas; that is near to the city of Brady (County of McCulloch), so we divided Texas in four zones according to this center (like a cartesian coordinate system). Zones are: North East (NE), North West (NW), South East (SE) and South West (SW). If the house is in the requested zone it gets 1 point else 0. Is important to clarify that we didn't use cities because, given the fact that there are cities with the same name in different counties (e.g. we have 3 Austin), we can't use them because they can be confused.

Finally we can calculate the Score as sum/4 where sum is sum of the accomulated scores from individul fields based on user choices and 4 is maximum number points that can be accumulated.

In [14]:
#Here we define default values, necessary because the user could insert only a part of the need information (default 
#values give 0 points)
av_r_n_u = 0
bedr_c_u = 1000
zone_u = 'NA'
#User inserts, if he can, needed informations
try:
    av_r_n_u = int(input('please, enter maximum price that you can pay(e.g. 10): '))
except:
    print('No price suggested')
    pass
try:
    bedr_c_u = int(input('please, enter the number of bedrooms that you need(e.g 5): '))
except:
    print('No indication about the needed number of bedrooms')
    pass
try:
    zone_u = input('please enter the zone you want to reside(e.g. NE): ')
except:
    print('No zone was indicated')
    pass
list_ord =[]

n_h = []
#Computing score according to user information and characteristics of the houses
for doc in list_intersect:
    with open(os.path.join(my_path, 'docu_hw3/doc_' + str(doc) + '.tsv'),encoding='utf8') as tsvfile:
         tsvreader = list(csv.reader(tsvfile, delimiter="\t"))
    title = tsvreader[0][7]        
    description = tsvreader[0][4]
    description = process_text_common(description)
    latitude = tsvreader[0][5]
    longitude = tsvreader[0][6]
    url = tsvreader[0][8]
    price = list(tsvreader[0][0])
    eff_price = ''
    
    for i in range(1,len(price)):
        eff_price += price[i]
    
    
    try:
        av_r_n = int(eff_price)
        bedr_c = int(tsvreader[0][1])
    except ValueError:
        av_r_n = 1000000
        bedr_c = 1
    
    sum_values = 0
    if av_r_n >= 2*av_r_n_u :
               sum_values += 0          
    elif av_r_n < 2*av_r_n_u and av_r_n >= 1.75*av_r_n_u:
               sum_values += 0.25
    elif  av_r_n < 1.75*av_r_n_u and av_r_n >= 1.5*av_r_n_u:
               sum_values += 0.50  
    elif av_r_n < 1.5*av_r_n_u and av_r_n >= 1.25*av_r_n_u:
               sum_values += 0.75 
    elif av_r_n < 1.25*av_r_n_u and av_r_n >= av_r_n_u:
               sum_values += 1           
    elif av_r_n < av_r_n_u and av_r_n >=0.75*av_r_n_u:
               sum_values += 1.25
    elif av_r_n < 0.75*av_r_n_u and av_r_n >=0.5*av_r_n_u :
               sum_values += 1.5
    elif av_r_n < 0.5*av_r_n_u :
               sum_values += 2.
    
    
    if bedr_c >= bedr_c_u:
               sum_values += 1
    elif bedr_c == bedr_c_u-1:
               sum_values += 0.75
    elif bedr_c == bedr_c_u-2:
               sum_values += 0.5
    elif bedr_c <= bedr_c_u-3:
               sum_values += 0 
            
    try:
        la = float(latitude)
        lo = float(longitude)
        if la <=31.169621 and lo <= -99.683617:
             zone ='SW'
        elif la <=31.169621 and lo > -99.683617:
             zone ='SE'
        elif la >31.169621 and lo > -99.683617:
             zone ='NE'
        elif la >31.169621 and lo <= -99.683617:
             zone ='NW'
        if zone == zone_u:
            sum_values +=1
    except:
        pass
    
    
    score = round(sum_values/4,2)
    
    
    heapq.heappush(n_h,(score, title, description, city, url))

results = []

# Applying heapify max algorithm
heapq._heapify_max(n_h)

limit = 10
d_len = len(list_intersect)

if(d_len < 10):
    limit = d_len

for i in range(limit):
    results.append(list(heapq.heappop(n_h)))
    heapq._heapify_max(n_h)

#print("Applying Heap")  

#print(results)
results_formatted = []

for i in results:
    first = round(i.pop(0), 4)
    i.append(first) 
    results_formatted.append(i)
    

# Displaying the results
show_results(results_formatted, [], True)

please, enter maximum price that you can pay(e.g. 10): 30
please, enter the number of bedrooms that you need(e.g 5): 5
please enter the zone you want to reside(e.g. NE): NE


Title,Description,City,URL,Score
The Center of Divine Light,"renovated Mineral Bath House , Wonderful healing Waters for soaks , Labyrinth in Back yard for Walking meditations. Singing Chimes , FIre Pit , Grill for outdoor cooking, Beautiful clear night sky for star gazing. Hanging sky chairs for rest and relaxation. Massage Therapist on hand if you want to schedule some R and R time for yourself. Mysterious magical Orbs that have visit on occasion. Healing Vibro Accoustic Lounge ( sound vibrating table), quite and peaceful setting meditation areas.",Round Top,https://www.airbnb.com/rooms/15479650?location=Bridgeport%2C%20TX,0.62
House on the Creek,"A beautiful house on the Creek, conveniently located in Plano, TX. Close to the shopping centers, and UT Dallas. There is a bus stop in a walking distance from the house that can take you to the Dart Station.",Round Top,https://www.airbnb.com/rooms/17992009?location=Allen%2C%20TX,0.62
Friendly Private Room in َQuiet Neighborhood,"This is a beautiful bedroom with a queen size bed and closet. We do not have pets and the house is always clean. The bathroom is shared and supplies such as towels and shampoo are available. We are only some miles from Downtown, TCU, TCC, and Stockyards.",Round Top,https://www.airbnb.com/rooms/18977363?location=Burleson%2C%20TX,0.62
Friendly Private Room in َQuiet Neighborhood,"This is a beautiful bedroom with a queen size bed and closet. We do not have pets and the house is always clean. The bathroom is shared and supplies such as towels and shampoo are available. We are only some miles from Downtown, TCU, TCC, and Stockyards.",Round Top,https://www.airbnb.com/rooms/18977363?location=Benbrook%2C%20TX,0.62
"Not so ""in the city""","The bedroom has plenty of space, a large closet, windows that let natural light in, a personal ac unit to keep comfortable, a queen bed, and a light, fresh color on the walls. There is a side door which is the quickest way to get to the room and you will have a key for it of course so you can come and go as you please without coming through the front door and all through the house. The neighborhood is quiet, and full of trees, it's quite a beautiful sight. Any questions feel free to message",Round Top,https://www.airbnb.com/rooms/19208947?location=Balch%20Springs%2C%20TX,0.56
Friendly Private Room in َQuiet Neighborhood,"This is a beautiful bedroom with a queen size bed and closet. We do not have pets and the house is always clean. The bathroom is shared and supplies such as towels and shampoo are available. We are only some miles from Downtown, TCU, TCC, and Stockyards.",Round Top,https://www.airbnb.com/rooms/18977363?location=Cleburne%2C%20TX,0.56
luxurious house minutes from Bob Eden Park,"1 acre private cul-de-sac lot backing up to Bob Eden Park Georgian style home with walkout basement to pool spa decked living area main level features stunning hardwood floors in living ,kitchen,and dinning plus serene views from wall to wall windows. large deck overlooking bkyrd bdrm bath &grand kitchen that is a chefs delight. owners retreat &beautiful updated master bath up plus 2 more bdrms &full bath with 2nd washer dryer for ease tons of storage space office,living area,craft room",Round Top,https://www.airbnb.com/rooms/18873698?location=Colleyville%2C%20TX,0.5
luxurious house minutes from Bob Eden Park,"1 acre private cul-de-sac lot backing up to Bob Eden Park Georgian style home with walkout basement to pool spa decked living area main level features stunning hardwood floors in living ,kitchen,and dinning plus serene views from wall to wall windows. large deck overlooking bkyrd bdrm bath &grand kitchen that is a chefs delight. owners retreat &beautiful updated master bath up plus 2 more bdrms &full bath with 2nd washer dryer for ease tons of storage space office,living area,craft room",Round Top,https://www.airbnb.com/rooms/18873698?location=Bedford%2C%20TX,0.5
The Vintage room in Fort Worth,"Our place is a beautiful cozy open concept house in a quiet neighborhood surrounded by nature with easy access to many shops/restaurants and close enough to the city. We are centrally located about 24 minutes from DFW airport, about 21 minutes from Fort Worth Stockyards, Water Gardens( a must see in Texas) and about 26 minutes from Six Flags.",Round Top,https://www.airbnb.com/rooms/18959678?location=Colleyville%2C%20TX,0.5
The Vintage room in Fort Worth,"Our place is a beautiful cozy open concept house in a quiet neighborhood surrounded by nature with easy access to many shops/restaurants and close enough to the city. We are centrally located about 24 minutes from DFW airport, about 21 minutes from Fort Worth Stockyards, Water Gardens( a must see in Texas) and about 26 minutes from Six Flags.",Round Top,https://www.airbnb.com/rooms/18959678?location=Burleson%2C%20TX,0.5


# Bonus Step: Make a nice visualization!

An important feature of Airbnb is the search on the map. 

Our tool will works in the following way:

- Takes in input a set of coordinates and a maximum distance from the coordinates.
- Generate a map, with a circle of the given radius, where the center is represented by the coordinates given in input.
- Shows the houses that are inside the circle of the given radius.

In [17]:
#We drop rows where latitude and/or longitude column contains missing values (NaN)
m = m.dropna(subset=['latitude', 'longitude'])

In [18]:
#We ask as input latitude, longitude and a maximum distance to generate the radius
lat = float(input('Enter a latitude: '))
lon = float(input('Enter a longitude: '))
dis = float(input('Enter distance range (in km): '))

Enter a latitude: 30.02
Enter a longitude: -95.29
Enter distance range (in km): 10


In [19]:
#!pip install geopy
import folium
from geopy import distance

#We create the map with given coordinates
mp = folium.Map(location = [lat, lon], zoom_start = 12)

In [20]:
#Search represents the set of given coordinates
search = (lat, lon)

#Now, we create the marker associated with input set of coordinates, that we call origin
folium.Marker(location = [lat, lon], popup = 'origin', icon = folium.Icon(color = 'green', icon = 'home')).add_to(mp)

#Then, we generate the circle with input set of coordinates as center. 
folium.Circle(location = [lat, lon], radius = dis * 1000).add_to(mp)

#For each house that is in the given distance from the input set of coordinates, we create a marker with price that 
#can be clicked to access the house web page
for row in m.itertuples():
    if distance.distance(search, (row.latitude, row.longitude)).km <= dis:
        folium.Marker(location = [row.latitude, row.longitude], popup = folium.Popup('<a href=' + row.url + '>' + row.average_rate_per_night + ' </a>')).add_to(mp)
        
mp.save('map.html')