# Set-up

In [71]:
# load packages
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL of the site
base_site = "https://www.ouedkniss.com/informatique"

In [3]:
# sending a request to the webpage
response = requests.get(base_site)
response.status_code

200

In [4]:
# get the HTML from the webpage
html = response.content

## Choosing a parser

### lxml

In [10]:
# convert the HTML to a BeatifulSoup object
soup = BeautifulSoup(html, 'lxml')

In [11]:
# Exporting the HTML to a file
with open('Ouedkinss-informatique.html', 'wb') as file:
    file.write(soup.prettify('utf-8'))

## Extracting annones 

In [13]:
divs = soup.find_all('div', {'class': 'annonce'})

In [61]:
titles = [div.find('li', {'class' : 'annonce_titre'}) for div in divs]

In [32]:
titles[0]

<li class="annonce_titre">
<a href="touchbar-13-3-2017-i5-1ghz-8g-256g-alger-bab-ezzouar-ain-naadja-setif-constantine-algerie-informatique-d22497120?utm_medium=listing" itemprop="url" target="_blank" title="TOUCHBAR 13.3 2017 I5 3.1GHZ 8G 256G">
<h2 itemprop="name">TOUCHBAR 13.3 2017 I5 3.1GHZ 8G 256G</h2> <span>- Informatique</span>
</a>
</li>

In [63]:
titles[0].find('h2')

<h2 itemprop="name">TOUCHBAR 13.3 2017 I5 3.1GHZ 8G 256G</h2>

In [64]:
title_names = [[t.find('h2').string] for t in titles if t != None] 
title_names 

[['TOUCHBAR 13.3 2017 I5 3.1GHZ 8G 256G'],
 ['Cable Console Cisco 72-3383-01DB9>RJ45'],
 ["TOUCHBAR 13.3'' 2019 I5 1.4GHZ 8G 128G"],
 ['Asus'],
 ['ASUS DUAL RTX208TI OC 11G'],
 ['PC GAMING'],
 ['ASUS TUF 3GTX1660S O6G GAMING '],
 ['HP'],
 ['ASUS DUAL RTX2060 O6G'],
 ['Manette PS3 sans fil original'],
 ['ASUS TUF GTX1660 O6G GAMING '],
 ['Modem Huawei 4g lte'],
 ['ASUS VP278H'],
 ['Asus gaimer'],
 ['ASUS ROG STRIX XG32VQR'],
 ['Inité G2020'],
 ['MSI OPTIX MAG322 CQRV'],
 ['PC Gamer'],
 ['ASUS VG248QE'],
 ['Pc carte mer processor ram'],
 ['ASUS TUF Z390-PLUS GAMING WIFI'],
 ['Dell Inspiron 15 5567 i5 7eme gen'],
 ['ASUS TRIX B450-F GAMING '],
 ['PC Gamer'],
 ['MSI GTX1650 GAMING X 4G'],
 ['Imprimante canon mg5750 en panne '],
 ['ASUS VP248H'],
 ['PC GAMING - CONFIG I3 7100'],
 ['PC GAMING- I3 7100'],
 ['PC GAMING- CONFIG RYZEN7 3800X'],
 ['PC GAMING - CONFIG RYZEN9 3900X'],
 ['PC GAMING - CONFIG RYZEN 7 3800X'],
 ['PC GAMING - CONFIG RYZEN5 3600X'],
 ['PC GAMING - CONFIG RYZEN5 3600'],
 

In [66]:
prices = [div.find('span', {'itemprop' : 'price'}) for div in divs]

In [70]:
prices_values = [[t.string] for t in titles if t != None] 
prices_values

[['158000 DA Négociable'],
 ['4500 DA Négociable'],
 ['180000 DA Négociable'],
 ['25000 DA Fixe'],
 ['269000 DA Fixe'],
 ['58 DA Offert'],
 ['65900 DA Fixe'],
 ['40000 DA Fixe'],
 ['85000 DA Fixe'],
 ['2500 DA Négociable'],
 ['58000 DA Fixe'],
 ['4200 DA Fixe'],
 ['38500 DA Fixe'],
 ['75000 DA Négociable'],
 ['148000 DA Fixe'],
 ['13000 DA Négociable'],
 ['109000 DA Fixe'],
 ['1 DA Négociable'],
 ['47500 DA Fixe'],
 ['20000 DA Fixe'],
 ['42900 DA Fixe'],
 ['82000 DA Négociable'],
 ['27500 DA Fixe'],
 ['36000 DA Fixe'],
 ['27500 DA Fixe'],
 ['77900 DA Fixe'],
 ['82900 DA Fixe'],
 ['191000 DA Fixe'],
 ['275900 DA Fixe'],
 ['209000 DA Fixe'],
 ['141900 DA Fixe'],
 ['128900 DA Fixe'],
 ['66900 DA Fixe'],
 ['71000 DA Fixe'],
 ['144900 DA Fixe'],
 ['122900 DA Fixe'],
 ['83900 DA Fixe'],
 ['74900 DA Fixe'],
 ['58000 DA Fixe'],
 ['79000 DA Négociable'],
 ['87000 DA Négociable'],
 ['82000 DA Négociable'],
 ['40000 DA Fixe'],
 ['67000 DA Négociable'],
 ['59000 DA Négociable'],
 ['7000 DA Négocia

In [95]:
items = []
for div in divs :
    if div != None :
        
        item_title = div.find('li', {'class' : 'annonce_titre'})
        if item_title != None :
            item_name = item_title.find('h2').string
        else :
            item_price = "Article sans titre ???"
            
        price = div.find('span', {'itemprop' : 'price'})
        if price != None :
            item_price = price.string
        else :
            item_price = ""

        items.append({'name' : item_name, 'price' : item_price})

In [97]:
df = pd.DataFrame(items)

In [99]:
df.head()

Unnamed: 0,name,price
0,TOUCHBAR 13.3 2017 I5 3.1GHZ 8G 256G,158000 DA Négociable
1,Cable Console Cisco 72-3383-01DB9>RJ45,4500 DA Négociable
2,TOUCHBAR 13.3'' 2019 I5 1.4GHZ 8G 128G,180000 DA Négociable
3,Asus,25000 DA Fixe
4,ASUS DUAL RTX208TI OC 11G,269000 DA Fixe


In [101]:
df.to_csv('ouedkniss_informatique_test.csv', index = False, header = True)