# Land price web scraping project
The data to be scraped is at https://www.jumia.cm/en/land-plots. It contains the location, surface area (squared metre) and the prices per squared metre for various neigborhoods in Litoral region, Cameroon.


## Steps involved
- Import libraries
- Create ETL functions
- Scrape the data
- Create CSV file of the data

### 1.) Import libraries

In [13]:
import requests
from bs4 import BeautifulSoup

### 2.) Create ETL functions

In [84]:
# list to append urls
urls_list = []

# define first function
def get_page_urls(page):
    """ Get URLs on the page and concatenate the base URL to each
    
    Arg:
        page (int): the page number
        
    Returns:
        list: list of URLs
    """
    base_url = 'https://www.jumia.cm'
    # Access the web page
    response = requests.get(f'https://www.jumia.cm/en/land-plots?page={page}')
    # Get the text from the web page
    soup = BeautifulSoup(response.text, 'html.parser')
    # Find urls of all articles on the web page and append to url_list
    list_urls = soup.find_all('article')
    for partial_url in list_urls:
        new_url = base_url + partial_url.find('a')['href']
        urls_list.append(new_url)
    return urls_list
        
    
    

In [90]:
# list to append items
items_list = []

# define the second function
def extract_transform(url):
    """ Extract items from the URL and transform or clean them
    Arg:
        url (str): URL of the web page
        
    Returns:
        
    """
    # Access the web page
    response = requests.get(url)
    # Get the text from the web page
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract items
    location = soup.find('span',{'itemprop':'addressLocality'}).get_text()
    area = float(soup.find_all('h3')[1].get_text().replace('Area', '').replace(' m2', ''))
    price = int(soup.find('span', {'itemprop':'price'}).get_text().replace(',', ''))
    seller = soup.find_all('dd')[0].get_text()
    
    # Create a dictionary to store items
    items = {
        'Location': location,
        'Area': area,
        'Price': price,
        'Seller': seller
    }
    # Append items to item_list
    items_list.append(items)
    
    return items_list
             
    
    
    