# Meqasa Scraper

In [6]:
import re
import datetime
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

In [7]:
url = 'https://meqasa.com/properties-for-rent-in-ghana'
result = requests.get(url)
src  = result.content

In [8]:
soup = BeautifulSoup(src, 'lxml')

In [9]:
containers = soup.findAll('div', {'class': 'mqs-featured-prop-inner-wrap'})

In [15]:
base_url = 'https://meqasa.com'
def house_info(houses_list):
    house_names = []
    house_beds  = []
    house_showers = []
    house_garages = []
    house_areas  = [] 
    house_descriptions = []
    house_prices = []
    house_currencys  = []
    house_rent_periods  = []
    house_urls  = []
    house_address  = []
    house_posted_times = []  
    
    for idx, link in enumerate(houses_list):
        if idx == 20:
            break
            
        prop_features_li = link.find('ul', {'class': 'prop-features'})
        lx1 = prop_features_li.find_all('li')

        page_link = base_url + link.find('a').attrs['href']

        areas, garages, addresses, descriptions, posted_times = extract_from_house_link(page_link)
        
        beds = link.find('ul', {'class': 'prop-features'}).li.text
        showers = lx1[1].text
   
        price_snip  = link.find('p', {'class': 'h3'}).text.strip()
        prices = extract_price(price_snip)
        currency = extract_currency(price_snip)
        periods = extract_period(price_snip)

        house_names.append(link.h2.a['href'].split('/')[1])
        house_beds.append(beds)
        house_showers.append(showers)
        
        house_garages.append(garages)    
        house_areas.append(areas)
        house_descriptions.append(descriptions)
        house_prices.append(prices)
        house_currencys.append(currency)
        house_rent_periods.append(periods)
        
        house_urls.append(base_url + link.find('a').attrs['href'])
        house_address.append(addresses)
        house_posted_times.append(posted_times)

    df_houses = pd.DataFrame({'Property': house_names,
                              'Beds': house_beds,
                              'Showers': house_showers,
                              'Garages': house_garages,
                              'Area': house_areas,
                              'Description': house_descriptions,
                              'Price': house_prices,
                              'Currency': house_currencys,
                              'Rent Period': house_rent_periods,
                              'Url': house_urls,
                              'Address': house_address,
                              'Time Posted': house_posted_times,
                             })
    
    return(df_houses)

In [16]:
df_house = house_info(containers)

In [17]:
df_house.head()

Unnamed: 0,Property,Beds,Showers,Garages,Area,Description,Price,Currency,Rent Period,Url,Address,Time Posted
0,townhouse-for-rent-at-Airport-Residential-Area...,4,4,0,na,4 bedroom townhouse for rent in Airport Reside...,28890,GH₵,month,https://meqasa.com/townhouse-for-rent-at-Airpo...,Airport Residential Area,"November 11, 2019"
1,apartment-for-rent-at-Airport-Area-104135?y=63...,3,2,2,na,Simply must be on your viewing list!! I’m exci...,3500,$,month,https://meqasa.com/apartment-for-rent-at-Airpo...,Airport Area,"November 11, 2019"
2,house-for-rent-at-Cantonments-101461?y=63823746,4,4,0,na,A contemporary 4-Bedroom Townhouse located in ...,4000,$,month,https://meqasa.com/house-for-rent-at-Cantonmen...,Cantonments,"November 11, 2019"
3,"apartment-for-rent-at-Shiashie,-Accra,-Ghana-0...",1,1,0,42 m2,"Price is GH¢ 7,506 ($1,390) per month inclusiv...",1390,$,month,https://meqasa.com/apartment-for-rent-at-Shias...,"Shiashie, Accra, Ghana","November 11, 2019"
4,apartment-for-rent-at-Cantonments-101436?y=638...,4,3,0,na,A true lifestyle experience offering our cheri...,4000,$,month,https://meqasa.com/apartment-for-rent-at-Canto...,Cantonments,"November 11, 2019"


In [18]:
def save_data(df):
    file_time = datetime.datetime.now()
    file_time = file_time.strftime('%Y-%m-%d')
    fine_name = 'meqasa_'+file_time+'.csv'
    
    df.to_csv('data/'+fine_name, index=False)

In [19]:
# 
save_data(df_house)

In [1]:
# utils
def extract_from_house_link(house_url):
    result = requests.get(house_url)
    src  = result.content
    soup = BeautifulSoup(src, 'lxml')
    container = soup.find('section', {'class': 'span8'})
    
    table_info = get_table_rows_todict(container)
    areas = extract_vals(table_info, 'Area')
    garages = extract_vals(table_info, 'Garage')
    address = extract_vals(table_info, 'Address')
    
    #     
    descriptions = container.p.text.strip()
    
    date_ctr = soup.find('p', {'class': 'listed-by-text'}).text.strip()
    posted_time = extract_date(date_ctr)
    
    return(areas, garages, address, descriptions, posted_time)

In [2]:
# utils
def extract_price(snip):
    txt = re.sub('[^0-9]','', snip)
    return(txt)

def extract_currency(snip):
    if snip != "Price[Price disclosed on request]":
        start = re.search(r'Price', snip ).end()
        end = re.search(r'[0-9]', snip).start()

        txt = snip[start:end]
    else:
        txt = 'na'
    return(txt)

def extract_period(snip):
    txt = snip.split(" ")[-1].strip()
    return(txt)

In [3]:
def extract_date(snip):
    start = re.search(r'Updated on ', snip ).end()
    end = re.search(r' by:', snip).start()

    txt = snip[start:end]

    return(txt)

def extract_vals(table_dict, str1):
    if str1 in table_dict:
        txt = table_dict[str1][0]
    else:
        txt = 'na'
    return(txt)

In [4]:
def get_table_rows_todict(lnk):
    table = lnk.find_all('table')[0]
    rows = table.find_all('tr')
    row_list = []
    for tr in rows:
        td = tr.find_all('td')
        row = [i.text.rstrip() for i in td]
        row_list.append(row)

    row_list.pop(0)
    row_list.pop(0)
    row_list.pop(0)
    return {d[0]: d[1:] for d in row_list}

## FIN

---

### SCRATCH

In [112]:
def extract_price_old(snip):
    if snip is not None:
        start = re.search(r'\nPriceGH₵', snip ).end()
        end = re.search(r' / ', snip).start()

        txt = snip[start:end].strip()
    else:
        txt = 0
    return(txt)

In [205]:
containers = soup.findAll('div', {'class': 'mqs-featured-prop-inner-wrap'})
a = containers[0]

aa1  = a.find('p', {'class': 'h3'})
aa1.text
# aa1.text.split(" ")[-1].strip()

'\nPriceGH₵28,890 / month\n'

In [204]:
a5 = extract_price(aa1.text)
a5

# n = None
# a6 = extract_price(n)
# a6

'28,890'

In [203]:
aq = 'lkdfhisoe78347834 (())&/&745  '
result = re.sub('[^0-9]','', aq)
result

'78347834745'

In [208]:
qq = '\nPriceGH₵28,890 / month\n'
result = re.sub('[^0-9]','', qq)
result

'28890'

In [246]:
qq2 = 'Price$2,650 / month'
qq3 = 'Price[Price disclosed on request]'
c3 = extract_currency2(qq3)
c3

'na'

In [245]:

def extract_currency2(snip):
    if snip != "Price[Price disclosed on request]":
        start = re.search(r'Price', snip ).end()
        end = re.search(r'[0-9]', snip).start()

        txt = snip[start:end]
    else:
        txt = 'na'
    return(txt)

In [144]:
# result.content
# soup.title.text
# for t in soup.find_all('a'):
#     print(t.text)
containers = soup.findAll('div', {'class': 'mqs-featured-prop-inner-wrap'})
a = containers[0]
# a.h2.a['href']

b = a.h2.a['href'].split('/')
b[1]
# a.h2.a['href'].split('/')[1]
# 'apartment-for-rent-at-Airport-Area-104135?y=11566372'

'townhouse-for-rent-at-Airport-Residential-Area-103217?y=2006959929'

In [145]:
beds  = a.find('ul', {'class': 'prop-features'}).li.text
# beds  = link.find('ul', {'class': 'prop-features'}).div.div.text.split(',')[0].replace('Beds: ','').strip()        
beds

'4'

In [80]:
# v = click_house_link('https://meqasa.com/apartment-for-rent-at-Cantonments-101436?y=770335285')

# v.find()
# v.p
v.select('li.bed')
v.table()[15].get_text()
v.select('li.shower')[0].find(text=True)

# v.select('table.table td')[0]
table = v.find_all('table')[0]
rows = table.find_all('tr')
# rows

In [115]:
row_list = []
for tr in rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    row_list.append(row)
    print(row)

['Categories', 'Apartment for Rent\nApartment for Rent in East Cantonments']
['']
['\xa0\xa0\xa0\xa0 12 months @ $ 4,000/month (GH₵ 22,100)']
['Bedrooms', '4']
['Bathrooms', '3']
['Garage', '0']
['Air conditioning\nCooker\nRefrigerator\nMicrowave\nInternet\nGarden\nSwimming pool\nSecurity service\nGenerator\nWater reservoir\n']
['Address', 'Cantonments']
['101436']


In [119]:
# showers  = a.find('ul', {'class': 'prop-features'})
# showers

a1  = a.find('ul', {'class': 'prop-features'})
a2 = a1.find_all('li')
a2
# a2[1].text
# a2[2].text

[<li class="bed"><span>3</span></li>,
 <li class="shower"><span>2</span></li>,
 <li class="garage" title="Garages"><span>2</span></li>,
 <li class="reduced sl104135 fav-icon"><a data-target="#fav-modal" data-toggle="modal" href=""><img alt="Unfavourite" src="https://dve7rykno93gs.cloudfront.net/assets2/images/fav-icon.png" title="Add to favourites"/></a></li>]

In [118]:
c1 = containers[1]
c12 = c1.find('ul', {'class': 'prop-features'})
c122 = c12.find_all('li')
c122

[<li class="bed"><span>4</span></li>,
 <li class="shower"><span>4</span></li>,
 <li class="reduced sl103217 fav-icon"><a data-target="#fav-modal" data-toggle="modal" href=""><img alt="Unfavourite" src="https://dve7rykno93gs.cloudfront.net/assets2/images/fav-icon.png" title="Add to favourites"/></a></li>]

if c122 has title=='Garages':
    c122[2].text

In [136]:
if a2[2].has_attr('title'):
# if c122[2].has_attr('title'):
    if a2[2].title == 'Garages':
        print('ok')

In [161]:
a2[2].attrs['title']

'Garages'

`<li title="Garages" class="garage"><span>2</span></li>`

In [None]:
# listings = containers.find_all('div', {'class': 'mqs-prop-dt-wrapper'})
# containers'apartment-for-rent-at-Airport-Area-104135?y=11566372'
# listings
# listings = soup.find_all('li', {'class': 'normal--2QYVk gtm-normal-ad'})
# ref = listings.findAll('div', {'class': 'mqs-prop-dt-wrapper'})

In [134]:
tag = a2[2].find(lambda tag: tag.name == 'li' and tag['class'] == ['garage'])
# filter(bool, tag['class']) == ['value', 'price']

In [135]:
tag

In [None]:
mqs-prop-dt-wrapper

In [156]:
# px  = containers.findAll('div', {'class': 'mqs-prop-dt-wrapper'})
px0 = containers[0]
px01 = px0.find('div', {'class': 'mqs-prop-dt-wrapper'})
# px01.p.span.text
px01

<div class="mqs-prop-dt-wrapper">
<h2>
<a href="/apartment-for-rent-at-Airport-Area-104135?y=11566372">3 bedroom furnished apartment for rent at Airport Area, Ghana</a>
</h2>
<p class="h3">
<span class="h3">Price</span>$3,500 <span>/ month</span>
</p>
<p>Simply must be on your viewing list!! I’m excited to market ...</p>
<ul class="prop-features">
<li class="bed"><span>3</span></li>
<li class="shower"><span>2</span></li>
<li class="garage" title="Garages"><span>2</span></li>
<li class="reduced sl104135 fav-icon"><a data-target="#fav-modal" data-toggle="modal" href=""><img alt="Unfavourite" src="https://dve7rykno93gs.cloudfront.net/assets2/images/fav-icon.png" title="Add to favourites"/></a></li>
</ul>
</div>

In [8]:
df_house

Unnamed: 0,Property,Beds,Showers,Garages,Url
0,townhouse-for-rent-at-Airport-Residential-Area...,4,4,,https://meqasa.com/townhouse-for-rent-at-Airpo...
1,apartment-for-rent-at-Airport-Residential-Area...,2,2,120 m2,https://meqasa.com/apartment-for-rent-at-Airpo...
2,apartment-for-rent-at-Spintex-(-studio-apartme...,1,1,200 m2,https://meqasa.com/apartment-for-rent-at-Spint...
3,house-for-rent-at-Adjiringanor-104427?y=200695...,5,4,1,https://meqasa.com/house-for-rent-at-Adjiringa...
4,apartment-for-rent-at-Airport-Area-104135?y=20...,3,2,2,https://meqasa.com/apartment-for-rent-at-Airpo...
5,apartment-for-rent-at-Cantonments-101436?y=200...,4,3,,https://meqasa.com/apartment-for-rent-at-Canto...
6,townhouse-for-rent-at-Cantonments-074711?y=200...,4,4,,https://meqasa.com/townhouse-for-rent-at-Canto...
7,house-for-rent-at-Cantonments-101461?y=2006959929,4,4,,https://meqasa.com/house-for-rent-at-Cantonmen...
8,"apartment-for-rent-at-Shiashie,-Accra,-Ghana-0...",1,1,42 m2,https://meqasa.com/apartment-for-rent-at-Shias...
9,apartment-for-rent-at-West-Legon-104998?y=2006...,2,2,4,https://meqasa.com/apartment-for-rent-at-West-...


In [28]:
vc = click_house_link('https://meqasa.com/apartment-for-rent-at-Cantonments-101436?y=770335285')

<div class="property-box">
<div class="top">
<div class="row">
<div class="top-2-colums-details">
<div class="prop-top-header">
<div class="top-details-wrapper">
<span typeof="v:Breadcrumb"><a href="/" property="v:title" rel="v:url">Home</a></span> »
<span typeof="v:Breadcrumb"><a href="/properties-for-rent-in-ghana" property="v:title" rel="v:url">For Rent</a></span> »
<span typeof="v:Breadcrumb"><a href="/apartments-for-rent-in-ghana" property="v:title" rel="v:url">Apartments</a></span> »
<span typeof="v:Breadcrumb"><a href="/apartments-for-rent-in-Greater Accra-region" property="v:title" rel="v:url">Greater Accra region</a></span> »
<span typeof="v:Breadcrumb"><a href="/apartments-for-rent-in-East Cantonments" property="v:title" rel="v:url">East Cantonments</a></span> »
<span typeof="v:Breadcrumb"><a href="4-bedroom-apartments-for-rent-in-East Cantonments" property="v:title" rel="v:url">4-bedroom</a></span> »
<h1>4 bedroom apartment for rent at Cantonments</h1>
</div>
<div class="pri