In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import *
import time
import pandas as pd
from tqdm import tqdm
import concurrent.futures
import numpy as np
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Setting up the web-browser

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])

driver = webdriver.Chrome(service=Service(
    ChromeDriverManager().install()), options=options)

action = ActionChains(driver)

In [4]:
# Homepage url for NFT Collections
homepage_url = 'https://opensea.io/collection/boredapeyachtclub/activity?search[eventTypes][0]=OFFER_ENTERED'

In [5]:
df = pd.DataFrame(columns=['nft_id','price_crypto','price_usd','quantity','offer_from_name','offer_from_link','time_uploaded'])
df

Unnamed: 0,nft_id,price_crypto,price_usd,quantity,offer_from_name,offer_from_link,time_uploaded


In [6]:
# Scroll up to get data if StaleElement error

def scroll_up_data(web_elem,hover_elem,df):

    posi = len(df)+1
    driver.execute_script('window.scrollTo(document.body.scrollHeight,document.body.scrollHeight-2);')
    time.sleep(3)

    try:
        items_list = web_elem.text.split('\n')
        item_link = web_elem.find_element(by=By.XPATH,value="//*[@class='sc-1f719d57-0 hoTuIF sc-29427738-0 ikrGyo AccountLink--ellipsis-overflow']").get_attribute('href')
        action.move_to_element(hover_elem).click().perform()
        abs_time = driver.find_element(by=By.XPATH,value="//*[@class='sc-29427738-0 sc-f95e0497-0 ikrGyo frmfzd']/div").text
        
        #Enter the extracted data into dataframe
        df.loc[posi] = [items_list[1],items_list[3],items_list[4],items_list[6],items_list[7],item_link,abs_time]
        
    except StaleElementReferenceException:
        scroll_down_data(web_elem,hover_elem,df)


def scroll_down_data(web_elem,hover_elem,df):
    
    posi = len(df)+1
    driver.execute_script('window.scrollTo(document.body.scrollHeight,document.body.scrollHeight+4);')
    time.sleep(3)

    try:
        items_list = web_elem.text.split('\n')
        item_link = web_elem.find_element(by=By.XPATH,value="//*[@class='sc-1f719d57-0 hoTuIF sc-29427738-0 ikrGyo AccountLink--ellipsis-overflow']").get_attribute('href')
        action.move_to_element(hover_elem).click().perform()
        abs_time = driver.find_element(by=By.XPATH,value="//*[@class='sc-29427738-0 sc-f95e0497-0 ikrGyo frmfzd']/div").text
        
        #Enter the extracted data into dataframe
        df.loc[posi] = [items_list[1],items_list[3],items_list[4],items_list[6],items_list[7],item_link,abs_time]
        
    except StaleElementReferenceException:
        pass
    

In [7]:
def store_data(all_items,hover_button,df):
    pos= len(df)
    time.sleep(3)
    for i in range(len(all_items)):
        try:
            items_list = all_items[i].text.split('\n')
            item_link = all_items[i].find_element(by=By.XPATH,value="//*[@class='sc-1f719d57-0 hoTuIF sc-29427738-0 ikrGyo AccountLink--ellipsis-overflow']").get_attribute('href')
            action.move_to_element(hover_button[i]).click().perform()
            abs_time = driver.find_element(by=By.XPATH,value="//*[@class='sc-29427738-0 sc-f95e0497-0 ikrGyo frmfzd']/div").text
            
            #Enter the extracted data into dataframe
            df.loc[pos+i] = [items_list[1],items_list[3],items_list[4],items_list[6],items_list[7],item_link,abs_time]

        except StaleElementReferenceException:
            scroll_up_data(all_items[i],hover_button[i])
        
    

In [8]:
# Getting the webpage till bottom
def get_webpage(url):

    driver.maximize_window()
    n = 0
    driver.implicitly_wait(3)
    # Wait for the page to load
    wait = WebDriverWait(driver, 4)
    height = driver.execute_script('return document.body.scrollHeight;')

    all_items= driver.find_elements(by=By.XPATH,value="//*[@role='listitem']")
    hover_button= driver.find_elements(by=By.XPATH,value="//*[@class='sc-29427738-0 sc-f95e0497-0 ikrGyo frmfzd']")

    store_data(all_items,hover_button,df)

    while n<10:
        #Scroll down
        driver.execute_script('window.scrollTo(0,document.body.scrollHeight);')
        #Scrape items on current page
        all_items= driver.find_elements(by=By.XPATH,value="//*[@role='sc-29427738-0 sc-4e15afd9-0 dVNeWL kcFcYa']")
        hover_button= driver.find_elements(by=By.XPATH,value="//*[@class='sc-29427738-0 sc-f95e0497-0 ikrGyo frmfzd']")
        time.sleep(3)

        new_height = driver.execute_script('return document.body.scrollHeight;')
        if height == new_height:
            break
        height = new_height
        
        #Try to extract elements
        store_data(all_items,hover_button,df)
        n = n+1

In [9]:
driver.get(homepage_url)

In [10]:
get_webpage(homepage_url) #Get webpage and store data

In [11]:
df

Unnamed: 0,nft_id,price_crypto,price_usd,quantity,offer_from_name,offer_from_link,time_uploaded
0,3338,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:24 AM"
1,1486,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:24 AM"
2,6362,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:23 AM"
3,3718,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:23 AM"
4,2717,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:24 AM"
5,829,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:24 AM"
6,2998,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:26 AM"
7,461,61.0777 WETH,"$111,303.73",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:24 AM"
8,5491,63.3636 WETH,"$115,469.39",1,NFTButlerGPT,https://opensea.io/NFTButlerGPT,"March 30, 2023 at 11:54 AM"


In [12]:
df.to_csv('data_opensea.csv',index=False)