In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize WebDriver
driver = webdriver.Chrome()

# Open the De.Fi Rekt Database webpage
driver.get("https://de.fi/rekt-database")

# Wait for the page to load fully
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "mono-selector-header")))

# Click the dropdown menu to open options
dropdown = driver.find_element(By.CLASS_NAME, "mono-selector-header")
dropdown.click()

# Wait for dropdown options to appear
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "item")))

# Find and click the "Show by 200" option
options = driver.find_elements(By.CLASS_NAME, "item")
for option in options:
    if "Show by 200" in option.text:
        option.click()
        break

# Wait for the table to refresh
time.sleep(5)
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "scam-database-body")))

# Initialize lists to store extracted data
tokens, chains, categories, issue_types, funds_lost, dates = [], [], [], [], [], []

# Loop through all 20 pages
for page in range(1, 21):  
    print(f"Scraping Page {page}...")

    # Ensure rows are loaded
    WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "grid-row-template.minimized")))
    
    # Extract all table rows
    rows = driver.find_elements(By.CLASS_NAME, "grid-row-template.minimized")
    
    # Loop through each row and extract data
    for row in rows:
        try:
            token = row.find_element(By.CSS_SELECTOR, ".name").text
            chain = row.find_element(By.CSS_SELECTOR, ".column.tokens.with-extra-info.network").text
            category = row.find_element(By.CSS_SELECTOR, ".column.column-rekt-function").text
            issue_type = row.find_element(By.CSS_SELECTOR, ".column.with-extra-info.column-rekt-function").text
            fund_lost = row.find_element(By.CSS_SELECTOR, ".funds-lost").text
            date = row.find_element(By.CSS_SELECTOR, ".column.date").text
            
            tokens.append(token)
            chains.append(chain)
            categories.append(category)
            issue_types.append(issue_type)
            funds_lost.append(fund_lost)
            dates.append(date)

        except Exception as e:
            print(f"Error extracting row: {e}")

    # Find and click the right arrow to go to the next page
    try:
        next_page_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "arrow.right"))
        )
        next_page_button.click()
        time.sleep(3)  # Allow time for page transition
    except Exception as e:
        print("No next page button found or error clicking:", e)
        break  # Exit loop if there's an issue

# Close the browser
driver.quit()

# Create a DataFrame
df = pd.DataFrame({
    "Token Name": tokens,
    "Chain": chains,
    "Category": categories,
    "Type of Issue": issue_types,
    "Funds Lost": funds_lost,
    "Date": dates
})

# Save to CSV
df.to_csv("rekt_database_full.csv", index=False)

# Display the DataFrame
print(df)


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize WebDriver
driver = webdriver.Chrome()

# Open the De.Fi Rekt Database webpage
driver.get("https://de.fi/rekt-database")

# Wait for the page to load fully
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "mono-selector-header")))

# Click the dropdown menu to open options
dropdown = driver.find_element(By.CLASS_NAME, "mono-selector-header")
dropdown.click()

# Wait for dropdown options to appear
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "item")))

# Find and click the "Show by 200" option
options = driver.find_elements(By.CLASS_NAME, "item")
for option in options:
    if "Show by 200" in option.text:
        option.click()
        break

# Wait for the table to refresh
time.sleep(5)
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "scam-database-body")))

# Initialize lists to store extracted data
tokens, chains, categories, issue_types, funds_lost, dates = [], [], [], [], [], []

# Loop through all 20 pages
for page in range(1, 21):  
    print(f"Scraping Page {page}...")

    # Ensure rows are loaded
    WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "grid-row-template.minimized")))
    
    # Extract all table rows
    rows = driver.find_elements(By.CLASS_NAME, "grid-row-template.minimized")
    
    # Loop through each row and extract data
    for row in rows:
        try:
            token = row.find_element(By.CSS_SELECTOR, ".name").text
            chain = row.find_element(By.CSS_SELECTOR, ".column.tokens.with-extra-info.network").text
            category = row.find_element(By.CSS_SELECTOR, ".column.column-rekt-function").text
            issue_type = row.find_element(By.CSS_SELECTOR, ".column.with-extra-info.column-rekt-function").text
            fund_lost = row.find_element(By.CSS_SELECTOR, ".funds-lost").text
            date = row.find_element(By.CSS_SELECTOR, ".column.date").text
            
            tokens.append(token)
            chains.append(chain)
            categories.append(category)
            issue_types.append(issue_type)
            funds_lost.append(fund_lost)
            dates.append(date)

        except Exception as e:
            print(f"Error extracting row: {e}")

    # Find and click the right arrow to go to the next page
    try:
        next_page_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "arrow.right"))
        )
        next_page_button.click()
        time.sleep(3)  # Allow time for page transition
    except Exception as e:
        print("No next page button found or error clicking:", e)
        break  # Exit loop if there's an issue

# Close the browser
driver.quit()

# Create a DataFrame
df = pd.DataFrame({
    "Token Name": tokens,
    "Chain": chains,
    "Category": categories,
    "Type of Issue": issue_types,
    "Funds Lost": funds_lost,
    "Date": dates
})

issues_to_remove = ["Access Control", "Other", "Flash Loan Attack", "Oracle Issue", "Phishing", "Reentrancy"]

# Filter out unwanted issue types
df = df[~df["Type of Issue"].isin(issues_to_remove)]

# Save the filtered dataset to a CSV file
df.to_csv("rekt_database_filtered.csv", index=False)

print(df)