In [1]:
# we first import all neccessary packages
# request is for creating a connection with the webpage(s)
import requests 
# BeautifulSoup is for parsing the html returned by request
from bs4 import BeautifulSoup as soup
# time module for getting the current time
from time import time
# sleep is to create a delay between request
from time import sleep
# randint generates random integers
from random import randint
# pandas for creating a dataframe
import pandas as pd
# warnings to alert us when request exceeds what we require
from warnings import warn

# we create empty list to load up our data later
description = []
brands = []
current_price = []
old_price = []
discount = []
reviews  = []
ratings = []

# time() is the currnet time at any point of running the code
start_time = time()

# initial request is set to zero and increased by 1 on every successful request
request = 0

# for multiple page scraping, we iterate through the pages. In this case, we want to scrape 15 pages
pages = [str(i) for i in range(1, 16)]


for page in pages:
    link = 'https://www.jumia.com.ng/phones-tablets/?operating_system=Android--iOS&rating=1-5&q=android+phones&page=' + page
    page = requests.get(link)
# sleep the process after getting response from the request for any number of seconds between 10 and 17
    sleep(randint(10, 18))

    request += 1
# calculate the elapsed time for getting response by subtracting start_time from the current_time
    elasped_time = time() - start_time
    
# get the frequency of the request displayed so we can monitor the process
    print('request: {}; frequency: {} request/s'.format(request, request/elasped_time))

# set a condition that if request isn't successful, alert us with a warning and if request exceeds 10, break the scraping
    if page.status_code != 200:
        warn('request: {}; status_code: {}'.format(requests, page.status_code))
    if request > 15:
        warn('Number of request greater than expected')
        break
    
# on successful request, parse the html returned with beautifulsoup
    page_html = soup(page.text, 'html.parser')
    
# check the internal html from webpage and get the main container of all the items
    containers = page_html.find_all('div', class_="sku -gallery")
# iterate each containers
    for container in containers:
            # phone brands
            brand = container.find('span', class_='brand').text.replace('\xa0','')
            brands.append(brand)
            # phone description
            title = container.a.h2.find('span', class_="name").text
            description.append(title)

            # current phone prices
            price = container.find('span', class_='price').text
            current_price.append(price)

            #  rating in percentage
            rating = container.find('div', class_='stars-container').div['style'].split(':')[-1].replace('%',"")
            ratings.append(rating)

            # phones old prices
            try:
                former_price = container.find('span', class_ ='price-box ri')
                former_price = former_price.find('span', class_= 'price -old ').text
                old_price.append(former_price)
            except Exception:
                former_price = None
                old_price.append(former_price)

            # discounts
            percent_off = container.find('div', class_='price-container clearfix').span.text
            discount.append(percent_off)

            # number of reviews
            no_rating = container.find('div', class_='total-ratings').text.replace("(","").replace(")","")
            reviews.append(no_rating)

# we create a pandas dataframe to store all the save lists in columns. 
jumia_phones = pd.DataFrame({'brand': brands, 'current_price': current_price,'old_price': old_price,'discount': discount,
                             'description': description,'ratings': ratings,'times_reviewed': reviews}, columns =[
    'brand','current_price','old_price','discount','description','ratings','times_reviewed'
])
                            

request: 1; frequency: 0.05508172161372757 request/s
request: 2; frequency: 0.05415518015663541 request/s
request: 3; frequency: 0.05266298284927965 request/s
request: 4; frequency: 0.05553517861167177 request/s
request: 5; frequency: 0.05769365718713864 request/s
request: 6; frequency: 0.058750851876968144 request/s
request: 7; frequency: 0.058123487851847074 request/s
request: 8; frequency: 0.059652127154240904 request/s
request: 9; frequency: 0.06003275359943356 request/s
request: 10; frequency: 0.06103421029780065 request/s
request: 11; frequency: 0.0612347645433021 request/s
request: 12; frequency: 0.06251285807828723 request/s
request: 13; frequency: 0.06237361884111677 request/s
request: 14; frequency: 0.06250630209698468 request/s
request: 15; frequency: 0.06275367057054947 request/s


In [2]:
jumia_phones.head(10)

Unnamed: 0,brand,current_price,old_price,discount,description,ratings,times_reviewed
0,Gionee,"₦ 48,000","₦ 73,630",-35%,Gionee K3 (M100) 4GB+64GB 6.22 Inch HD+ Androi...,70,10
1,Nokia,"₦ 59,520","₦ 119,660",-50%,"7， 5.2-inch (4GB, 64GB ROM) Android 7.1, 16MP ...",76,10
2,UMIDIGI,"₦ 25,990","₦ 29,990",-13%,"A3S Android 10 Global Band 3950mAh 5.7"" Smartp...",86,99
3,Nokia,"₦ 42,990","₦ 70,485",-39%,3.2 3GB RAM 32GB 6.26 Inch 13MP Camera Dual SI...,81,104
4,UMIDIGI,"₦ 25,990","₦ 29,990",-13%,"A3S Android 10 Global Band 3950mAh 5.7"" Smartp...",78,148
5,Gionee,"₦ 37,350","₦ 49,990",-25%,"S11 Lite 5.7-Inch HD (4GB,64GB ROM) Android 7....",85,1145
6,Gionee,"₦ 37,970","₦ 55,000",-31%,"S11 Lite 5.7-Inch HD (4GB,64GB ROM) Android 7....",85,668
7,Tecno,"₦ 36,000",,"₦ 36,000","Spark 3 (KB7) 6.2-Inch HD (1GB,16GB ROM) Andro...",75,11
8,Nokia,"₦ 32,300","₦ 59,670",-46%,3.1 Global Version Phone 5.2 Inch 3GB RAM+32GB...,87,36
9,Nokia,"₦ 60,699","₦ 80,000",-24%,"6.1 Plus (Nokia X6) 5.8"" (4GB RAM, 64GB ROM) A...",98,22


In [7]:
jumia_phones.shape

(508, 7)

In [24]:
jumia_phones.to_csv("jumia.csv", encoding="utf-8", index =False)

In [26]:
pd.read_csv('jumia.csv').head()

Unnamed: 0,brand,current_price,old_price,discount,description,ratings,times_reviewed
0,Gionee,"₦ 48,000","₦ 73,630",-35%,Gionee K3 (M100) 4GB+64GB 6.22 Inch HD+ Androi...,70,10
1,Nokia,"₦ 59,520","₦ 119,660",-50%,"7， 5.2-inch (4GB, 64GB ROM) Android 7.1, 16MP ...",76,10
2,UMIDIGI,"₦ 25,990","₦ 29,990",-13%,"A3S Android 10 Global Band 3950mAh 5.7"" Smartp...",86,99
3,Nokia,"₦ 42,990","₦ 70,485",-39%,3.2 3GB RAM 32GB 6.26 Inch 13MP Camera Dual SI...,81,104
4,UMIDIGI,"₦ 25,990","₦ 29,990",-13%,"A3S Android 10 Global Band 3950mAh 5.7"" Smartp...",78,148
