### This is a snippet of the table that I am scraping.
![image](table_Screenshot.png)

In [1]:
# Import Libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd

### Web Scraping: Extracting Table Rows

In [2]:
# Initialize the WebDriver (SeleniumManager will handle the driver for you)
driver = webdriver.Chrome()

# Navigate to the webpage
url = 'https://ultrasignup.com/results_event.aspx?did=102259'
driver.get(url)

# Allow time for the JavaScript to load
driver.implicitly_wait(10)

# Locate the table by its class name
table = driver.find_element(By.CLASS_NAME, 'ui-jqgrid-btable')

# Extract rows and cells
rows = table.find_elements(By.TAG_NAME, 'tr')

# Store all rows in a list
all_rows = []
for row in rows:
    cells = row.find_elements(By.TAG_NAME, 'td')
    row_data = [cell.text for cell in cells]
    
    # Filter for valid finisher rows
    if row_data and row_data[0] == "results" and row_data[1] != "0":  
        all_rows.append(row_data)
    
# Close the WebDriver
driver.quit() 

In [3]:
# Display list
all_rows[:2]

[['results',
  '1',
  'Daniel',
  'Wilson',
  'Tulsa',
  'OK',
  '35',
  'M',
  '1',
  '8:23:01',
  '68.01',
  '',
  '',
  '',
  ''],
 ['results',
  '2',
  'Eric',
  'Davis',
  'Greenwood',
  'IN',
  '38',
  'M',
  '2',
  '8:57:54',
  '92.17',
  '',
  '',
  '',
  '']]

In [4]:
# Create a DataFrame from the list of rows (all_rows)
df = pd.DataFrame(all_rows)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,results,1,Daniel,Wilson,Tulsa,OK,35,M,1,8:23:01,68.01,,,,
1,results,2,Eric,Davis,Greenwood,IN,38,M,2,8:57:54,92.17,,,,
2,results,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,3,9:24:35,89.34,,,,
3,results,4,Ron,Hammett,Montverde,FL,53,M,4,9:24:36,82.28,,,,
4,results,5,Seth,Cain,Geneva,FL,44,M,5,9:42:17,76.17,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,results,105,David,Buning,Winter Park,FL,49,M,82,15:03:26,50.21,,,,
105,results,106,Kelly,Stoner Baker,Woodstock,GA,53,F,24,15:09:19,68.46,,,,
106,results,107,Chad,Ward,Woodstock,GA,39,M,83,15:10:30,61.92,,,,
107,results,108,Dennis,Stadelman,Cicero,NY,62,M,84,15:20:26,55.87,,,,


In [5]:
# Slice df
df = df.iloc[:, 1:11]
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
0,1,Daniel,Wilson,Tulsa,OK,35,M,1,8:23:01,68.01
1,2,Eric,Davis,Greenwood,IN,38,M,2,8:57:54,92.17
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,3,9:24:35,89.34
3,4,Ron,Hammett,Montverde,FL,53,M,4,9:24:36,82.28
4,5,Seth,Cain,Geneva,FL,44,M,5,9:42:17,76.17
...,...,...,...,...,...,...,...,...,...,...
104,105,David,Buning,Winter Park,FL,49,M,82,15:03:26,50.21
105,106,Kelly,Stoner Baker,Woodstock,GA,53,F,24,15:09:19,68.46
106,107,Chad,Ward,Woodstock,GA,39,M,83,15:10:30,61.92
107,108,Dennis,Stadelman,Cicero,NY,62,M,84,15:20:26,55.87


### Web Scraping: Extracting Table Headers

In [6]:
# Initialize the WebDriver (SeleniumManager will handle the driver for you)
driver = webdriver.Chrome()

# Navigate to the webpage
url = 'https://ultrasignup.com/results_event.aspx?did=102259'
driver.get(url)

# Allow time for the JavaScript to load
driver.implicitly_wait(10)

# Find the table with class 'ui-jqgrid-htable'
table_h = driver.find_element(By.CLASS_NAME, 'ui-jqgrid-htable')

# Extract the header row (th tags) from the table
headers = [th.text for th in table_h.find_elements(By.TAG_NAME, 'th')]

# Close the browser window
driver.quit()

# Print result
headers

[' ',
 ' Place',
 ' First',
 ' Last',
 ' City',
 ' ',
 ' Age',
 ' Division',
 ' DP',
 ' Time',
 ' Rank',
 '',
 '',
 '',
 '']

In [7]:
# Slice headers
headers = headers[1:11]
headers

[' Place',
 ' First',
 ' Last',
 ' City',
 ' ',
 ' Age',
 ' Division',
 ' DP',
 ' Time',
 ' Rank']

In [8]:
# Replace blank value
headers = ['State' if col == ' ' else col for col in headers]
headers

[' Place',
 ' First',
 ' Last',
 ' City',
 'State',
 ' Age',
 ' Division',
 ' DP',
 ' Time',
 ' Rank']

In [9]:
# Assign the updated 'headers' list to the df columns
df.columns = headers
df.columns

Index([' Place', ' First', ' Last', ' City', 'State', ' Age', ' Division',
       ' DP', ' Time', ' Rank'],
      dtype='object')

In [10]:
# Remove whitespaces from columns
df.columns = df.columns.str.strip()
df.columns

Index(['Place', 'First', 'Last', 'City', 'State', 'Age', 'Division', 'DP',
       'Time', 'Rank'],
      dtype='object')

### Extracted Table

In [11]:
df

Unnamed: 0,Place,First,Last,City,State,Age,Division,DP,Time,Rank
0,1,Daniel,Wilson,Tulsa,OK,35,M,1,8:23:01,68.01
1,2,Eric,Davis,Greenwood,IN,38,M,2,8:57:54,92.17
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,3,9:24:35,89.34
3,4,Ron,Hammett,Montverde,FL,53,M,4,9:24:36,82.28
4,5,Seth,Cain,Geneva,FL,44,M,5,9:42:17,76.17
...,...,...,...,...,...,...,...,...,...,...
104,105,David,Buning,Winter Park,FL,49,M,82,15:03:26,50.21
105,106,Kelly,Stoner Baker,Woodstock,GA,53,F,24,15:09:19,68.46
106,107,Chad,Ward,Woodstock,GA,39,M,83,15:10:30,61.92
107,108,Dennis,Stadelman,Cicero,NY,62,M,84,15:20:26,55.87


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 109 entries, 0 to 108
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Place     109 non-null    object
 1   First     109 non-null    object
 2   Last      109 non-null    object
 3   City      109 non-null    object
 4   State     109 non-null    object
 5   Age       109 non-null    object
 6   Division  109 non-null    object
 7   DP        109 non-null    object
 8   Time      109 non-null    object
 9   Rank      109 non-null    object
dtypes: object(10)
memory usage: 8.6+ KB


In [13]:
df.describe()

Unnamed: 0,Place,First,Last,City,State,Age,Division,DP,Time,Rank
count,109,109,109,109,109,109,109,109,109,109.0
unique,109,88,104,88,23,37,2,85,109,107.0
top,1,Matthew,Wilson,Woodstock,FL,35,M,1,8:23:01,66.29
freq,1,4,2,4,52,6,85,2,1,2.0


### Save table 

In [14]:
df.to_csv(r'C:\Users\sahil\Documents\LOOKER\Race_50.csv', index=False)