In [None]:
!pip install requests beautifulsoup4 pandas

In [None]:
import requests
from bs4 import BeautifulSoup

### 1. We'll use the requests package to download the meqasa web page. The first page for rentals in this case

In [None]:
results = requests.get(f'https://meqasa.com/houses-for-rent-in-ghana?w=1') 

### 2. We'll use beautiful soup to parse the web page

In [None]:
soup = BeautifulSoup(results.text,'lxml')

### 3. Print out the parsed html web page

In [None]:
soup

### 4. Select all listings on this page. These listings are embeded within a div tag with a class="mqs-featured-prop-inner-wrap"

In [None]:
listings = soup.find_all("div", class_="mqs-featured-prop-inner-wrap")
listings

### 5. Examine the first listed property

In [None]:
prop = listings[0]
prop

### 6. We can use various methods in BeautifulSoup to retrieve html elements. Let's find all 'h2' tags

In [None]:
prop.find_all('h2')

### 7. We can also retrieve the first occurence of a tag

In [None]:
prop.find('h2')

### 8. Once we retrieve an element, we can access the text within it

In [None]:
property_name = prop.find('h2').text
property_name

### 9. Notice the property name includes newline characters, so we're going to replace them with the empty string i.e ''

In [None]:
property_name = prop.find('h2').text.replace('\n', '')
property_name

### 10. Now, let's retrieve the property location

In [None]:
location = prop.find('h2').text.replace('\n', '').split('at')[1]

### 11. Next, the description

In [None]:
prop.find_all('p')

### description seems to be the second element

In [None]:
description = prop.find_all('p')[1].text

### 12. Let's find the number of beds

In [None]:
prop.find('li', {'class': 'bed'})


In [None]:
beds = prop.find('li', {'class': 'bed'}).text

### 13. When scraping, we may encounter a lot of errors due to missing data. In this context, some elements within a listing may be missing, so calling .text immediately may not be wise. We'll write a small utility function which will call .text on an element if it exists, else, it'll return None

In [None]:
prop.find('li', {'class': 'area'})

In [None]:
def if_exists(x):
    "Return the text within the html element else return an empty string"
    if x is not None:
        return x.text
    else:
        return ''

### Try to retrieve these elements without using the 'if_exists' function. Any errors?

In [None]:
area = prop.find('li', {'class': 'area'})
showers = prop.find('li', {'class': 'shower'})
garages = prop.find('li', {'class': 'garage'})

print('Area', area)
print('Showers', showers)
print('Garages', garages)

### Let's use the utility function for safety

In [None]:
area = if_exists(prop.find('li', {'class': 'area'}))
showers = if_exists(prop.find('li', {'class': 'shower'}))
garages = if_exists(prop.find('li', {'class': 'garage'}))

print('Area', area)
print('Showers', showers)
print('Garages', garages)

In [None]:
price = if_exists(prop.find('p', {'class': 'h3'}))
price

### 14. Price looks messy, so we'll clean it 

In [None]:
price = price.replace('\n', '').replace('Price', '').split('/')[0]
price

### 15. Finally the url

In [None]:
url = prop.find('a').attrs['href']
url

### 16. Finally, place all the results into a python dictionary

In [None]:
{
    'property': property_name,
    'location': location,     
    'beds': beds,
    'showers': showers,
    'garages': garages,
    'area': area,
    'description': description,
    'price': price,
    'url': url,
}