# Importing Libraries

In [33]:
import pandas

from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup

# Requesting for a smartphone info

In [34]:
# Treating IncompleteRead errors when requesting
# This is a zoom server problem so we need this workaround to keep using zoom
# Not necessary if using other sites.
from http.client import IncompleteRead

def html_reader(response):
    try:
        return response.read().decode("utf-8")
    except IncompleteRead as e:
        print("error when reading treated")
        return e.partial

In [35]:
from urllib.parse import urlencode

def request_smartphone_info(device_model):
    search_params = { "q": device_model }
    base_url = f"https://www.zoom.com.br/search?{urlencode(search_params)}"
    headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" }

    try:
        request = Request(base_url, headers=headers)
        response = urlopen(request)
        html = html_reader(response)
        return BeautifulSoup(html, "html.parser")
    except HTTPError as error:
        print(f"HTTPError: {error}")
    except URLError as error:
        print(f"URLError: {error}")    
    except Exception as exception:
        print(f"Exception: {exception}")

soup = request_smartphone_info("moto g7")
soup

error when reading treated


111,3.60161945 L5.96844235,0.317735293 C6.11250453,0.035745241 6.4664385,-0.080250667 6.75898704,0.058811094 C6.87553639,0.114275804 6.96996487,0.205339258 7.02747914,0.317735293 L8.71004804,3.60695259 C8.71571651,3.61761888 8.7266387,3.6246853 8.73908169,3.6256186 L12.4940999,4.15893312 C12.8160968,4.20319822 13.0399324,4.49092141 12.9940316,4.80144379 C12.9757819,4.92570607 12.9150877,5.04063535 12.8217653,5.12823226 L10.0926026,7.68680867 C10.0838925,7.69467506 10.0801596,7.70627465 10.0829247,7.71747425 L10.7299603,11.3333467 C10.7849859,11.6440024 10.5683396,11.938792 10.2462044,11.9919901 C10.2137144,11.9973233 10.1809478,11.9998565 10.1479048,12"></path></svg></span></span><span style="cursor:inherit;display:inline-block;position:relative"><svg class="Rating_star__qjDRP" height="17.4" stroke-width="1" version="1.1" viewbox="0 0 15 13" width="17.4"><path d="M10.1479048,12 C10.0514025,12.0003898 9.95642098,11.9774573 9.87139387,11.9333255 L6.51593395,10.2267191 C6.50459701,10.2213

# Extracting Raw Data

In [36]:
def data_extractor(card):
    price_element = card.find("span", { "class": "mainValue" })
    name_element = card.find("a", { "class": "name" })
    store_count_element = card.find("a", { "class": "storeCount" })

    return { 
        "price": price_element.get_text() if price_element else None,
        "name": name_element.get_text() if name_element else None,
        "store_count": store_count_element.get_text() if store_count_element else None
    }

In [37]:
search_result_element = soup.find("div", { "id": "pageSearchResultsBody" })
cards = search_result_element.find_all("div", { "class": "card card--prod" })

smartphones_raw_data = list(map(data_extractor, cards))

raw_data_frame = pandas.DataFrame(smartphones_raw_data)
raw_data_frame

Unnamed: 0,price,name,store_count
0,R$ 2.551,Smartphone Apple iPhone 8 64GB 12.0 MP Apple A...,em 16 lojas
1,R$ 2.551,Smartphone Apple iPhone 8 128GB 12.0 MP Apple ...,em 8 lojas
2,R$ 3.254,Smartphone Apple iPhone 8 256GB 12.0 MP Apple ...,em 9 lojas
3,R$ 2.006,Smartphone Apple iPhone 8 Usado 64GB 12.0 MP i...,em 1 lojas
4,R$ 2.969,Smartphone Apple iPhone 8 Plus 64GB Câmera Dup...,em 12 lojas
5,R$ 4.054,Smartphone Apple iPhone 11 64GB Câmera Dupla A...,em 8 lojas
6,,Smartphone Apple iPhone 8 Plus 256GB Câmera Du...,


# Formating Data

In [38]:
import re as regex
from copy import deepcopy

def data_parser(smartphone):
    try:

        smartphone = deepcopy(smartphone)

        smartphone["price"] = int(regex.sub(r"[^0-9]", "", smartphone.get("price"))) * 100 if smartphone.get("price") else None
        smartphone["price_cents"] = smartphone.pop("price") # Renaming key from "price" to "price_cents"
        smartphone["store_count"] = int(regex.sub(r"[^0-9]", "", smartphone.get("store_count"))) if smartphone.get("store_count") else None

        return smartphone
    except Exception as e:
        print(f"Exception: {e}")
        print(f"smartphone: {smartphone}")

In [39]:
smartphones_formatted_data = list(map(data_parser, smartphones_raw_data))

formatted_data_frame = pandas.DataFrame(smartphones_formatted_data)
formatted_data_frame

Unnamed: 0,name,store_count,price_cents
0,Smartphone Apple iPhone 8 64GB 12.0 MP Apple A...,16.0,255100.0
1,Smartphone Apple iPhone 8 128GB 12.0 MP Apple ...,8.0,255100.0
2,Smartphone Apple iPhone 8 256GB 12.0 MP Apple ...,9.0,325400.0
3,Smartphone Apple iPhone 8 Usado 64GB 12.0 MP i...,1.0,200600.0
4,Smartphone Apple iPhone 8 Plus 64GB Câmera Dup...,12.0,296900.0
5,Smartphone Apple iPhone 11 64GB Câmera Dupla A...,8.0,405400.0
6,Smartphone Apple iPhone 8 Plus 256GB Câmera Du...,,


# Exporting data

In [40]:
formatted_data_frame.to_csv("./formatted.csv", index=False)
raw_data_frame.to_csv("./raw.csv", index=False)