-
Notifications
You must be signed in to change notification settings - Fork 0
/
filters_future_new.py
79 lines (60 loc) · 2.54 KB
/
filters_future_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import bs4
import requests
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
from time import sleep
from random import randint
pages = [str(i) for i in range(0,23)]
filename = "future_filters.csv"
f = open(filename, "w", encoding="utf-8")
#with open(filename, "w", encoding="utf-8") as f:
#header = "manufacturer; description; qty_available; others\n"
header = "manufacturer_part; manufacturer; description; pricing; qty_available; Lead_Time; Package\n"
for page in pages:
my_url = 'https://www.futureelectronics.com/c/passives/filters/products?q=%3Arelevance&text=&pageSize=100&page=' + page
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
headers = {'User-Agent': user_agent}
#opening up connection' grabbing the page
#uClient = uReq.(my_url, headers=headers)
uClient = requests.get(my_url,headers=headers)
page_html = uClient.content
uClient.close()
sleep(randint(30,50))
#html parsing
page_soup = soup(page_html, "html.parser")
table_body = page_soup.find("table")
containers = table_body.find_all("tr",{"class":"list-row"})
f.write(header)
for container in containers:
mfr_prt = container.find_all("a",{"class":"product__list--code"})
try:
manufacturer_part = mfr_prt[0].text.strip()
except IndexError:
manufacturer_part = 'null'
mfr = container.find_all("div",{"class":"product__list--name"})
try:
manufacturer = mfr[0].text.strip()
except IndexError:
manufacturer = 'null'
dsc = container.find_all("div",{"class":"product__list--description"})
try:
description = dsc[0].text.replace(';',' ').strip()
except IndexError:
description = 'null'
prc = container.find_all("td",{"class":"product_price"})
try:
pricing = prc[0].text.strip()
except IndexError:
pricing = 'null'
avb = container.find_all("td",{"class":"product_stock"})
try:
availability = avb[0].text.strip()
except IndexError:
availability = 'null'
asc = container.find_all("td",{"class":"product_pkg"})
others = asc[0].text.strip()
asc = container.find_all("td",{"class":"product_pkg"})
others2 = asc[2].text.strip()
f.write(manufacturer_part + ";" + manufacturer + ";" + description + ";" + pricing + ";" + availability + ";" + others + ";" + others2 + "\n")
print(page)
f.close()