In [1]:
import requests

import os
import pickle
import time
import random
from dotenv import load_dotenv
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import InvalidArgumentException
from selenium.common.exceptions import ElementClickInterceptedException

import csv



In [2]:
class Scraper:
	# This time is used when we are waiting for element to get loaded in the html
	wait_element_time = 30

	def __init__(self, url):
		self.url = url

		self.setup_driver_options()
		self.setup_driver()

	# Automatically close driver on destruction of the object
	def __del__(self):
		self.driver.close()

	def get_current_url(self):
		return self.driver.current_url
	# Add these options in order to make chrome driver appear as a human instead of detecting it as a bot
	# Also change the 'cdc_' string in the chromedriver.exe with Notepad++ for example with 'abc_' to prevent detecting it as a bot
	def setup_driver_options(self):
		self.driver_options = Options()

		arguments = [
			'--disable-blink-features=AutomationControlled'
		]

		experimental_options = {
			'excludeSwitches': ['enable-automation', 'enable-logging'],
			'prefs': {'profile.default_content_setting_values.notifications': 2}
		}

		for argument in arguments:
			self.driver_options.add_argument(argument)

		for key, value in experimental_options.items():
			self.driver_options.add_experimental_option(key, value)

	# Setup chrome driver with predefined options
	def setup_driver(self):
		chrome_driver_path = ChromeDriverManager().install()
		self.driver = webdriver.Chrome(service=ChromeService(chrome_driver_path), options = self.driver_options)
		self.driver.get(self.url)
		self.driver.maximize_window()

	def add_login_functionality(self, login_url, is_logged_in_selector):
		self.login_url = login_url
		self.is_logged_in_selector = is_logged_in_selector
		
		self.login()


	# Check if user is logged in based on a html element that is visible only for logged in users
	def login(self):
		self.go_to_page(self.login_url)
  
		load_dotenv()
		username = os.getenv('USERNAME')
		password = os.getenv('PASSWORD')
  
		self.element_send_keys('input[name="loginMail"]', username)
		self.element_send_keys('input[name="password"]', password)

		self.element_click_by_xpath('//span[text()="Sign In"]')
  
		self.find_element('span[class="header__my-gumtree-trigger-text"]', wait_element_time = 5)
  
  
	# Wait random amount of seconds before taking some action so the server won't be able to tell if you are a bot
	def wait_random_time(self):
		random_sleep_seconds = round(random.uniform(1, 2), 2)

		time.sleep(random_sleep_seconds)

	# Goes to a given page and waits random time before that to prevent detection as a bot
	def go_to_page(self, page):
		# Wait random time before refreshing the page to prevent the detection as a bot
		self.wait_random_time()

		# Refresh the site url with the loaded cookies so the user will be logged in
		self.driver.get(page)

	def find_element(self, selector, exit_on_missing_element = True, wait_element_time = None):
		if wait_element_time is None:
			wait_element_time = self.wait_element_time

		# Intialize the condition to wait
		wait_until = EC.element_to_be_clickable((By.CSS_SELECTOR, selector))

		try:
			# Wait for element to load
			element = WebDriverWait(self.driver, wait_element_time).until(wait_until)
		except:
			if exit_on_missing_element:
				print('ERROR: Timed out waiting for the element with css selector "' + selector + '" to load')
				# End the program execution because we cannot find the element
				exit()
			else:
				return False

		return element

	def find_element_by_xpath(self, xpath, exit_on_missing_element = True, wait_element_time = None):
		if wait_element_time is None:
			wait_element_time = self.wait_element_time

		# Intialize the condition to wait
		wait_until = EC.element_to_be_clickable((By.XPATH, xpath))

		try:
			# Wait for element to load
			element = WebDriverWait(self.driver, wait_element_time).until(wait_until)
		except:
			if exit_on_missing_element:
				# End the program execution because we cannot find the element
				print('ERROR: Timed out waiting for the element with xpath "' + xpath + '" to load')
				exit()
			else:
				return False

		return element

	# Wait random time before clicking on the element
	def element_click(self, selector, delay = True):
		if delay:
			self.wait_random_time()

		element = self.find_element(selector)

		try:
			element.click()
		except ElementClickInterceptedException:
			self.driver.execute_script("arguments[0].click();", element)

	# Wait random time before clicking on the element
	def element_click_by_xpath(self, xpath, delay = True):
		if delay:
			self.wait_random_time()

		element = self.find_element_by_xpath(xpath)

		try:
			element.click()
		except ElementClickInterceptedException:
			self.driver.execute_script("arguments[0].click();", element)

	# Wait random time before sending the keys to the element
	def element_send_keys(self, selector, text, delay = True):
		if delay:
			self.wait_random_time()

		element = self.find_element(selector)

		try:
			element.click()
		except ElementClickInterceptedException:
			self.driver.execute_script("arguments[0].click();", element)
		self.wait_random_time()
		element.send_keys(text)

	# Wait random time before sending the keys to the element
	def element_send_keys_by_xpath(self, xpath, text, delay = True):
		if delay:
			self.wait_random_time()

		element = self.find_element_by_xpath(xpath)

		try:
			element.click()
		except ElementClickInterceptedException:
			self.driver.execute_script("arguments[0].click();", element)
		
		element.send_keys(text)

	def input_file_add_files(self, selector, files):
		# Intialize the condition to wait
		wait_until = EC.presence_of_element_located((By.CSS_SELECTOR, selector))

		try:
			# Wait for input_file to load
			input_file = WebDriverWait(self.driver, self.wait_element_time).until(wait_until)
		except:
			print('ERROR: Timed out waiting for the input_file with selector "' + selector + '" to load')
			# End the program execution because we cannot find the input_file
			exit()

		self.wait_random_time()

		try:
			input_file.send_keys(files)
		except InvalidArgumentException:
			print('ERROR: Exiting from the program! Please check if these file paths are correct:\n' + files)
			exit()
	
	def scroll_to_element(self, selector):
		element = self.find_element(selector)

		self.driver.execute_script('arguments[0].scrollIntoView(true);', element)

	def scroll_to_element_by_xpath(self, xpath):
		element = self.find_element_by_xpath(xpath)

		self.driver.execute_script('arguments[0].scrollIntoView(true);', element)
  
	def element_delete_text(self, selector, delay = True):
			if delay:
				self.wait_random_time()

			element = self.find_element(selector)
			
			# Select all of the text in the input
			element.send_keys(Keys.LEFT_SHIFT + Keys.HOME)
			# Remove the selected text with backspace
			element.send_keys(Keys.BACK_SPACE)

In [3]:
def get_data_from_csv(csv_file_name):
	data = []
	file_path = "." + os.path.sep + csv_file_name + '.csv'

	try:
		with open(file_path, encoding="UTF-8-SIG") as csv_file:
			csv_dictionary = csv.DictReader(csv_file, delimiter=',')

			for dictionary_row in csv_dictionary:
				data.append(dictionary_row)
	except:
		print('File was not found in csvs' + file_path)
		exit()

	return data

In [4]:

# Remove and then publish each listing
def update_listings(listings, type, scraper, server):

	# Check if listing is already listed and remove it then publish it like a new one
	for listing in listings:
		scraper.go_to_page("https://www.gumtree.com.au/web/syi")

		publish_listing(listing, type, scraper, server)


def publish_listing(data, listing_type, scraper, server):
	# Click on create new listing button
	scraper.element_click('a[href="/web/syi/title"]')
 
	scraper.element_send_keys('input[name="preSyiTitle"]', data['Title'])
	scraper.element_click_by_xpath('//button[text()="Next"]')

	# scraper.element_click_by_xpath('//span[text()="' + data['Category1'] + '"]')
	scraper.element_click('button[id="9299"]')
	
	scraper.element_click_by_xpath('//span[text()="' + data['Category2'] + '"]')
	scraper.element_click_by_xpath('//span[text()="' + data['Category3'] + '"]')
	scraper.element_click_by_xpath('//button[text()="Next"]')
	
	images_path = generate_multiple_images_path(data['Photos Folder'], data['Photos Names'])
	scraper.input_file_add_files('input[accept="image/gif,image/jpg,image/jpeg,image/pjpeg,image/png,image/x-png"]', images_path)
	
	scraper.scroll_to_element_by_xpath('//h2[text()="Description"]')
	scraper.element_send_keys('textarea[name="description"]', data['Description'])
	
 
	scraper.element_click_by_xpath(f'//span[text()="{data["Condition"]}"]')
 
	scraper.scroll_to_element_by_xpath('//h2[text()="Price"]')
 
	scraper.element_send_keys('input[name="price.amount"]', data['Price'])
	
	scraper.scroll_to_element('label[for="mapAddress"]')
	scraper.element_click_by_xpath('//button[text()="Next"]')

	scraper.element_click('button[value="0"]')

	scraper.scroll_to_element_by_xpath('//h2[text()="Optional extra"]')
	scraper.element_click_by_xpath('//button[text()="Post"]')
 
	success_mess_elm = scraper.find_element_by_xpath('//span[text()="Occasionally ads may take a few hours to go live."]', False, 3)
	
	if success_mess_elm:
		print(f"Listing ${data['Id']} is published successfully")
		try:
			server.post(data['Id'])
			time.sleep(1)
		except Exception:
			raise RuntimeError(f"Listing ${data['Id']} is not updated to database successfully")
	else:
		raise RuntimeError(f"Listing ${data['Id']} is not published successfully")

def generate_multiple_images_path(path, images):
	# Last character must be '/' because after that we are adding the name of the image
	if path[-1] != '/':
		path += '/'

	images_path = ''

	# Split image names into array by this symbol ";"
	image_names = images.split(';')

	# Create string that contains all of the image paths separeted by \n
	if image_names:
		for image_name in image_names:
			# Remove whitespace before and after the string
			image_name = image_name.strip()

			# Add "\n" for indicating new file
			if images_path != '':
				images_path += '\n'

			images_path += path + image_name

	return images_path

In [5]:
scraper = Scraper('https://www.gumtree.com.au')

In [6]:
# Add login functionality to the scraper
scraper.add_login_functionality('https://www.gumtree.com.au/t-login-form.html', 'span.header__my-gumtree-trigger-text')
# scraper.go_to_page('https://www.gumtree.com.au/web/syi')

# Get data for item type listings from csvs/items.csv
# item_listings = get_data_from_csv('items')
# # Publish all of the items into the facebook marketplace

In [7]:
######DELETE ITEMS
delete_listing = get_data_from_csv('delete_items')
delete_listing

[{'Id': '1325554932', 'Reason': 'I sold it elsewhere'},
 {'Id': '1325554944', 'Reason': 'I shipped it to someone on Gumtree'},
 {'Id': '1325554569', 'Reason': 'I sold it elsewhere'}]

In [8]:
delete_listing

[{'Id': '1325554932', 'Reason': 'I sold it elsewhere'},
 {'Id': '1325554944', 'Reason': 'I shipped it to someone on Gumtree'},
 {'Id': '1325554569', 'Reason': 'I sold it elsewhere'}]

In [9]:
def delete_item_by_id(data, scraper: Scraper):
    
    scraper.go_to_page(f"https://www.gumtree.com.au//m-my-ad.html?adId={data['Id']}")
    if scraper.find_element_by_xpath(f'//a[@href="/m-delete-ad.html?adId={data["Id"]}"]', False, 2):
        scraper.scroll_to_element_by_xpath(f'//a[@href="/m-delete-ad.html?adId={data["Id"]}"]')
        scraper.element_click_by_xpath(f'//a[@href="/m-delete-ad.html?adId={data["Id"]}"]')
        scraper.element_click_by_xpath(f'//label[text()="{data["Reason"]}"]')
        scraper.element_click_by_xpath('//button[@id="delete-ad-confirm"]')
    
        time.sleep(2)
        print(f"Listing {data['Id']} is deleted successfully")
    else: 
        print(f"Listing {data['Id']} is not onl Gumtree anymore to be deleted")

In [10]:
for item in delete_listing:
    delete_item_by_id(item, scraper)

scraper.__del__()

In [7]:
from flask import Flask, request, jsonify

app = Flask(__name__)
app.config.update(
    CELERY_BROKER_URL='redis://localhost:6379/0',
    CELERY_RESULT_BACKEND='redis://localhost:6379/0'
)

celery = make_celery(app)

@celery.task(bind=True)
def update_price(self, id, price):
    try:
        scraper = Scraper('https://www.gumtree.com.au')
        
        # Login
        scraper.add_login_functionality('https://www.gumtree.com.au/t-login-form.html', 'span.header__my-gumtree-trigger-text')
        
        scraper.go_to_page(f'https://www.gumtree.com.au/m-my-ad.html?adId={id}')
        
        scraper.scroll_to_element('h1[id="ad-title"]')
        scraper.element_click_by_xpath('//span[text()="Edit price"]')
        
        scraper.element_delete_text('input[name="price.amount"]')
        scraper.element_send_keys('input[name="price.amount"]', price)
        
        scraper.element_click_by_xpath("//button[span[text()='Confirm']]")
    except Exception as e:
        print(f"Error updating price: {e}")

@app.route('/facebook_gummtree/', methods=['POST'])
def update():
    data = request.get_json()
    if not data:
        return jsonify({'error': 'No data provided'}), 400

    if data["isSold"] == 'False':
        price = data["price"]
        id = data["productId"]
              
        # Queue the task with Celery
        update_price.apply_async(args=[id, price])
    
    # Return a response immediately
    response = {
        'message': 'Updating price in the background',
        'data': data
    }
    
    return jsonify(response), 200

if __name__ == '__main__':
    app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
[2024-06-18 18:14:16,149] ERROR in app: Exception on /facebook_gummtree/ [POST]
Traceback (most recent call last):
  File "/media/james/2b44c141-eec6-4c63-a888-30e9ac5660bd/git/gummiebot/.venv/lib/python3.10/site-packages/redis/connection.py", line 276, in connect
    sock = self.retry.call_with_retry(
  File "/media/james/2b44c141-eec6-4c63-a888-30e9ac5660bd/git/gummiebot/.venv/lib/python3.10/site-packages/redis/retry.py", line 46, in call_with_retry
    return do()
  File "/media/james/2b44c141-eec6-4c63-a888-30e9ac5660bd/git/gummiebot/.venv/lib/python3.10/site-packages/redis/connection.py", line 277, in <lambda>
    lambda: self._connect(), lambda error: self.disconnect(error)
  File "/media/james/2b44c141-eec6-4c63-a888-30e9ac5660bd/git/gummiebot/.venv/lib/python3.10/site-packages/redis/connection.py", line 639, in _connect
    raise err
  File "/media/james/2b44c141-eec6-4c63-a888-30e9ac5660bd/git/gummiebot/.venv/li