# Importing Libraries

In [8]:
import pandas

from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup

# Requesting for a smartphone info

In [9]:
base_url = "https://www.zoom.com.br/search?q=iphone+8"
headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" }

try:
    request = Request(base_url, headers=headers)
    response = urlopen(request)
    html = response.read().decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")
except HTTPError as error:
    print(f"HTTPError: {error}")
except URLError as error:
    print(f"URLError: {error}")    
except Exception as exception:
    print(f"Exception: {exception}") 




# Extracting Raw Data

In [10]:
def data_extractor(card):
    price_element = card.find("span", { "class": "mainValue" })
    name_element = card.find("a", { "class": "name" })
    store_count_element = card.find("a", { "class": "storeCount" })

    return { 
        "price": price_element.get_text() if price_element else None,
        "name": name_element.get_text() if name_element else None,
        "store_count": store_count_element.get_text() if store_count_element else None
    }

In [11]:
search_result_element = soup.find("div", { "id": "pageSearchResultsBody" })
cards = search_result_element.find_all("div", { "class": "card card--prod" })

smartphones_raw_data = list(map(data_extractor, cards))

raw_data_frame = pandas.DataFrame(smartphones_raw_data)
raw_data_frame

Unnamed: 0,price,name,store_count
0,R$ 2.551,Smartphone Apple iPhone 8 64GB 12.0 MP Apple A...,em 14 lojas
1,R$ 2.903,Smartphone Apple iPhone 8 128GB 12.0 MP iOS 11,em 6 lojas
2,R$ 3.254,Smartphone Apple iPhone 8 256GB 12.0 MP Apple ...,em 5 lojas
3,R$ 2.998,Smartphone Apple iPhone 8 Plus 64GB Câmera Dup...,em 11 lojas
4,R$ 4.399,Smartphone Apple iPhone 11 64GB Câmera Dupla A...,em 8 lojas
5,R$ 3.648,Smartphone Apple iPhone 8 Plus 128GB Câmera Du...,em 11 lojas
6,R$ 4.084,Smartphone Apple iPhone X 64GB Câmera Dupla Ap...,em 5 lojas
7,R$ 3.519,Smartphone Apple iPhone 8 Plus 256GB Câmera Du...,em 3 lojas
8,R$ 6.159,Smartphone Apple iPhone 11 Pro 64GB Câmera Tri...,em 8 lojas
9,R$ 1.436,Smartphone Apple iPhone 6 6 32GB 32GB 8.0 MP A...,em 5 lojas


# Formating Data

In [12]:
import re as regex
from copy import deepcopy

def data_parser(smartphone):
    smartphone = deepcopy(smartphone)

    smartphone["price"] = int(regex.sub(r"[^0-9]", "", smartphone.get("price"))) * 100
    smartphone["price_cents"] = smartphone.pop("price") # Renaming key from "price" to "price_cents"
    smartphone["store_count"] = int(regex.sub(r"[^0-9]", "", smartphone.get("store_count")))

    return smartphone

In [13]:
smartphones_formatted_data = list(map(data_parser, smartphones_raw_data))

formatted_data_frame = pandas.DataFrame(smartphones_formatted_data)
formatted_data_frame

Unnamed: 0,name,store_count,price_cents
0,Smartphone Apple iPhone 8 64GB 12.0 MP Apple A...,14,255100
1,Smartphone Apple iPhone 8 128GB 12.0 MP iOS 11,6,290300
2,Smartphone Apple iPhone 8 256GB 12.0 MP Apple ...,5,325400
3,Smartphone Apple iPhone 8 Plus 64GB Câmera Dup...,11,299800
4,Smartphone Apple iPhone 11 64GB Câmera Dupla A...,8,439900
5,Smartphone Apple iPhone 8 Plus 128GB Câmera Du...,11,364800
6,Smartphone Apple iPhone X 64GB Câmera Dupla Ap...,5,408400
7,Smartphone Apple iPhone 8 Plus 256GB Câmera Du...,3,351900
8,Smartphone Apple iPhone 11 Pro 64GB Câmera Tri...,8,615900
9,Smartphone Apple iPhone 6 6 32GB 32GB 8.0 MP A...,5,143600


# Exporting data

In [14]:
formatted_data_frame.to_csv("./formatted.csv", index=False)
raw_data_frame.to_csv("./raw.csv", index=False)