In [None]:
import dataclasses
from tqdm import tqdm
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
import pandas as pd
from selenium.webdriver.chrome.options import Options

In [None]:
class Row:
    _id: str
    case_number: str
    date: str
    block: str
    IUSR: str
    primary_type: str
    description: str
    location_description: str
    arrest: str
    domestic: str
    beat: str
    district: str
    ward: str
    community_area: str
    fbi_code: str
    x_coordinate: str
    y_coordinate: str
    year: str
    updated_on: str
    latitude: str
    longitude: str

    def location(self) -> (str, str):
        return self.longitude, self.latitude

    def to_dict(self):
        return {
       '_id': self._id,
       'case_number': self.case_number,
       'date': self.date,
       'block': self.block,
       'IUSR': self.IUSR,
       'primary_type': self.primary_type,
       'description': self.description,
       'location_description': self.location_description,
       'arrest': self.arrest,
       'domestic': self.domestic,
       'beat': self.beat,
       'district': self.district,
       'ward': self.ward,
       'community_area': self.community_area,
       'fbi_code': self.fbi_code,
       'x_coordinate': self.x_coordinate,
       'y_coordinate': self.y_coordinate,
       'year': self.year,
       'updated_on': self.updated_on,
       'latitude': self.latitude,
       'longitude': self.longitude,
       'location': self.location
        }

In [None]:
website = 'https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present-Dashboard/5cd6-ry5g'
path = '\Desktop'

options = Options()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36")
driver = webdriver.Chrome(options=options)
driver.get(website)
time.sleep(10)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
cell_counter = 0
attrs = []
rows = []

the_table = driver.find_element(By.CLASS_NAME, 'table-body')
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", the_table)
scroll_increment = 20000
for _ in range(5):
    scroll_script = f"arguments[0].scrollTop += {scroll_increment};"
    driver.execute_script(scroll_script, the_table)
    time.sleep(5)  # Wait for the new data to load

# Get the total number of elements      #div table-body
total_elements = len(driver.find_elements(By.CLASS_NAME, 'cell-content'))
time.sleep(60)
# Initialize tqdm progress bar
pbar = tqdm(total=total_elements, desc='Progress', unit='col')

for content in driver.find_elements(By.CLASS_NAME, 'cell-content'):
    try:
        content = content.text
        attrs.append(content)
        cell_counter += 1

        if cell_counter > 21:
            attrs.pop()
            rows.append(Row(*attrs))
            attrs = []
            cell_counter = 0

        # Update the progress bar
        pbar.update(1)
    except:
        continue

# Close the progress bar
pbar.close()

for row in rows:
    print(row)

df = pd.DataFrame([obj.to_dict() for obj in rows])
df.to_csv('testingData2.csv')
driver.quit()