/
houseplant-bot.py
81 lines (68 loc) · 2.92 KB
/
houseplant-bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import json
import re
import random
class HouseplantBot:
def __init__(self, driver, base_url):
self.driver = driver
self.base_url = base_url
self.plant_list = []
def getNameAndPriceList(self):
for page in range(1, 4):
self.driver.get(
f'{self.base_url}/product-category/houseplants-planters-madison-wi/page/{page}/')
plantData = self.driver.find_elements_by_class_name(
"product_cat-houseplants-planters-madison-wi")
counter = 1
for plant in plantData:
plant_image = self.driver.find_element_by_xpath(
f'//*[@id="left-area"]/ul/li[{counter}]/a/span[1]/img').get_attribute('src')
plant_split = plant.text.split("\n")
name = plant_split[0].title()
if len(plant_split) == 2:
price = plant_split[1].split(" ")[0]
else:
""" random price """
price = f'{random.randint(5, 20)}.{random.randint(0, 99)}'
# Add to list
self.plant_list.append({"id": f"12a3b{page}{counter}", "name": name,
"price": price, "image": plant_image})
counter += 1
print(page)
def getDetailsUpdateList(self):
for i in range(len(self.plant_list)):
name = re.sub(' \([^)]*\)', "", self.plant_list[i]
["name"]) # deleting whatever is in ()
nameSearch = "-".join(name.split(" "))
try:
self.driver.get(f'{self.base_url}/product/{nameSearch}')
details = self.driver.find_element_by_class_name(
'woocommerce-product-details__short-description').text
except:
try:
self.driver.get(
f'{self.base_url}/product/{nameSearch.split(" ")[0]}')
details = self.driver.find_element_by_class_name(
'woocommerce-product-details__short-description').text
except:
details = ""
details = ""
self.plant_list[i]["details"] = details.strip(
).capitalize()
def addToJson(self):
out_file = open("plantDB.json", "w")
json.dump(self.plant_list, out_file, indent=6)
out_file.close()
def createPlantData(self):
self.getNameAndPriceList()
self.getDetailsUpdateList()
self.addToJson()
self.driver.quit()
chrome_options = Options()
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install())
BASE_URL = 'https://redsquareflowers.com'
bot = HouseplantBot(driver, BASE_URL)
bot.createPlantData()