In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Set up Selenium Chrome driver
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode (without GUI)
service = Service("path/to/chromedriver")  # Replace with the actual path to chromedriver executable
driver = webdriver.Chrome(service=service, options=chrome_options)

# Define the URL pattern for the pages
url_pattern = "https://www.camplify.co.nz/s?seed=8031&page={}"

# Initialize an empty list to store the scraped data
vans_data = []

# Iterate over the pages
for page_number in range(1, 41):  
    # Construct the URL for the current page
    url = url_pattern.format(page_number)
    
    # Load the web page
    driver.get(url)
    
    # Wait for the page to load (adjust the waiting time as needed)
    driver.implicitly_wait(10)
    
    # Find all the van listings on the page
    van_listings = driver.find_elements(By.CLASS_NAME, "RvCard__Content")
    
    # Iterate over each van listing and extract the required information
    for van_listing in van_listings:
        # Extract the van title
        van_title = van_listing.find_element(By.CLASS_NAME, "RvCard__Title").text.strip()
        
        # Extract the description
        description = van_listing.find_element(By.CLASS_NAME, "RvCard__Description").text.strip()
        
        # Extract the location
        location = van_listing.find_element(By.CLASS_NAME, "RvCard__LocationText").text.strip()
        
        # Extract the star rating
        star_rating = van_listing.find_element(By.CLASS_NAME, "StarRatingWithCount__TotalReviews").text.strip()
        
        # Extract the price
        price = van_listing.find_element(By.CLASS_NAME, "RvCard__PriceValue").text.strip()
        
        # Append the extracted information to the vans_data list
        vans_data.append([van_title, description, location, star_rating, price])
    
    print("Scraped data from page", page_number)

# Close the Selenium Chrome driver
driver.quit()

# Authenticate with Google Sheets API
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\USER\.ipynb_checkpoints\credentials.json.json", scope)
client = gspread.authorize(credentials)

# Open the Google Sheets document
spreadsheet = client.open("Van Listings")

# Select the first sheet in the document
sheet = spreadsheet.get_worksheet(0)

# Clear the existing content in the sheet
sheet.clear()

# Write the headers to the Google Sheets document
headers = ["Van Title", "Description", "Location", "Star Rating", "Price"]
sheet.update("A1:E1", [headers])

# Write the scraped data to the Google Sheets document
sheet.update("A2:E{}".format(len(vans_data) + 1), vans_data)

print("Data has been successfully scraped and saved to Google Sheets.")
print("Number of scraped vans:", len(vans_data))


Scraped data from page 1
Scraped data from page 2
Scraped data from page 3
Scraped data from page 4
Scraped data from page 5
Scraped data from page 6
Scraped data from page 7
Scraped data from page 8
Scraped data from page 9
Scraped data from page 10
Scraped data from page 11
Scraped data from page 12
Scraped data from page 13
Scraped data from page 14
Scraped data from page 15
Scraped data from page 16
Scraped data from page 17
Scraped data from page 18
Scraped data from page 19
Scraped data from page 20
Scraped data from page 21
Scraped data from page 22
Scraped data from page 23
Scraped data from page 24
Scraped data from page 25
Scraped data from page 26
Scraped data from page 27
Scraped data from page 28
Scraped data from page 29
Scraped data from page 30
Scraped data from page 31
Scraped data from page 32
Scraped data from page 33
Scraped data from page 34
Scraped data from page 35
Scraped data from page 36
Scraped data from page 37
Scraped data from page 38
Scraped data from pag