# This notebook gathers data of NISA team stadings tables

In [1]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Optional cell, stretches cell width for better readability
from IPython.core.display import HTML

custom_css = """
<style>
.container { width: 100% !important; }
.code_cell { flex-grow: 1; width: 100% !important; }
.code_cell .input_area { width: 100% !important; }
</style>
"""

display(HTML(custom_css))

In [3]:
def extract_standings_table(soup):
    
    table = soup.find('table')
    
     # Extract the table data into a list of dictionaries
    data = []
    rows = table.find_all('tr')
    headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]
    
    for row in rows[1:]:
        values = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])]
        data.append(dict(zip(headers, values)))

    # Creating DataFrame
    team_standings_df = pd.DataFrame(data)

    # Cleaning up DataFrame
    team_standings_df = team_standings_df.replace(r'\n', '', regex=True)  # Removes newline characters
    team_standings_df['TEAM'] = team_standings_df['TEAM'].str.strip()  # Strips leading/trailing spaces

    return(team_standings_df)

In [4]:
def create_df(csv_filename):
    
    soup = BS(driver.page_source,'html.parser')
    
    team_standings_df = extract_standings_table(soup)
    
    # Creates a "data" folder if it doesn't exist
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)
    
    #Saves the dataframe locally to a csv
    csv_filepath = os.path.join(data_folder, csv_filename)
    team_standings_df.to_csv(csv_filepath, index=False)
    
    print(team_standings_df)

In [5]:
def download_table(desired_table, year, file_name):
    
    # Finding all available options in the dropdown list for tables and initialize the selected option ID
    options = driver.find_elements(By.CSS_SELECTOR, 'li.select2-results__option')
    dynamic_part = None

    # Looping through the available options to find the desired year group
    for option in options:
        # Getting the year group options, which are elements with class "select2-results__group"
        year_group = option.find_elements(By.CLASS_NAME, 'select2-results__group')
        # Checking if the desired year is in the text of any year group option
        if year in [group.get_attribute('innerText') for group in year_group]:
            # If the desired year is found in a year group option then we check the nested options for the desired table
            nested_options = option.find_elements(By.CLASS_NAME, 'select2-results__option')
            for nested_option in nested_options:
                # Checking if the desired table is in the text of the nested option
                if desired_table in nested_option.get_attribute('innerText'):
                    # If the desired table is found then we get its ID, which is used to select the option
                    dynamic_part = nested_option.get_attribute('id')
                    break
            if dynamic_part:
                break

    if not dynamic_part:
        print(f"No option found for {desired_table} in {year}")
    else:
        table_select = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, f'#{dynamic_part}')))
        table_select.click()

        create_df(file_name)

    driver.quit()

### Run the below cell to see all the tables you can download.
#### (Independent cups for 2023 and 2022 are currenlty unavailable on the NISA website)

In [6]:
# Starting Chrome with Selenium
driver = webdriver.Chrome()

# Navigating to the page
URL = 'https://nisaofficial.com/standings'
driver.get(URL)

# Interacting with the page to get to the dropdown selector
button1 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selectSeason')))
button1.click()

button2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#select2-year-container')))
button2.click()

# Extracting the HTML code of the dropdown options
dropdown_html_code = driver.find_element(By.CLASS_NAME, 'select2-results').get_attribute('outerHTML')

# Finding all the options in the dropdown selector
dropdown_soup = BS(dropdown_html_code, 'html.parser')
options = dropdown_soup.select('li.select2-results__option')

for option in options:
    print(option.text)
    
driver.quit()

2019-2020Fall ShowcaseSpring Season
Fall Showcase
Spring Season
2020-2021Fall ChampionshipFall SeasonIndependent CupLegends CupSpring Season
Fall Championship
Fall Season
Independent Cup
Legends Cup
Spring Season
2021Independent CupFall Season
Independent Cup
Fall Season
20222022 Season2022 Independent Cup
2022 Season
2022 Independent Cup
20232023 Season2023 Independent Cup
2023 Season
2023 Independent Cup


### Table Selection
#### Run both cells for every table you want to download. All you need to do is change are the arguements in the 'download_table()' function. The first arguement is the table you want to download, the second arguement is what year you want to pull a table from, and the third is what you want the file to be called. You will need to end the file name with '.csv' and the file will be downloaded into a directory called 'data'. You do not have to create this directory yourself. The 'create_df()' function will do this for you. Finally, make sure your agruements are encased in apostrophes or quotations.

In [7]:
# Starting Chrome with Selenium
driver = webdriver.Chrome()

# Navigating to the page
URL = 'https://nisaofficial.com/standings'
driver.get(URL)

# Interacting with the page to get to the dropdown selector
button1 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selectSeason')))
button1.click()

button2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#select2-year-container')))
button2.click()

In [8]:
download_table('2023 Season', '2023', '2023_standings.csv')

   P                    TEAM  GP   W  D   L H (W-D-L) A (W-D-L) Latest  GF  \
0  1          Chattanooga FC  17  12  5   0     8-3-0     4-2-0  WTWWT  33   
1  2     Maryland Bobcats FC  19   9  2   8     5-1-4     4-1-4  WWLLW  26   
2  3        ALBION San Diego  17   8  5   4     4-4-1     4-1-3  TTWLW  19   
3  4       Los Angeles Force  17   8  4   5     5-2-0     3-2-5  LWTTW  23   
4  5       Michigan Stars FC  15   7  5   3     4-2-2     3-3-1  WTTLW  18   
5  6         Club de Lyon FC  16   5  4   7     2-1-3     3-3-4  TTLLL  19   
6  7       Flower City Union  18   5  2  11     3-1-5     2-1-6  LLLTT  17   
7  8    Gold Star FC Detroit  16   3  2  11     1-1-6     2-1-5  TLLTL  17   
8  9     Savannah Clovers FC  17   2  5  10     0-2-6     2-3-4  TTTWL  10   

   GA   GD PTS  
0   6   27  41  
1  18    8  29  
2  16    3  29  
3  17    6  28  
4  12    6  26  
5  29  -10  19  
6  25   -8  17  
7  25   -8  11  
8  34  -24  11  


In [9]:
season = ['Spring Season', 'Fall Season', 'Spring Season', 'Fall Season', '2022 Season', '2023 Season']
year = ['2019-2020', '2020-2021', '2020-2021', '2021', '2022', '2023']
file_name = ['2020_spring_standings.csv', '2020_fall_standing.csv', '2021_spring_standings.csv', '2021_fall_standings.csv', '2022_standings.csv', '2023_standings.csv']

# Loop through the years and download tables
for i in range(len(year)):
    print(f'\nLeague Standings for {year[i]}')

    driver = webdriver.Chrome()

    URL = 'https://nisaofficial.com/standings'
    driver.get(URL)
    
    # Interacting with the page to get to the dropdown selector
    button1 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selectSeason')))
    button1.click()

    button2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#select2-year-container')))
    button2.click()

    download_table(season[i], year[i], file_name[i])


League Standings for 2019-2020
   P                      TEAM GP  W  D  L H (W-D-L) A (W-D-L) Latest GF GA  \
0  1          Oakland Roots SC  2  1  1  0     1-1-0     0-0-0     WT  3  2   
1  2    Cal United Strikers FC  2  1  1  0     1-0-0     0-1-0     TW  1  0   
2  3           Detroit City FC  1  1  0  0     0-0-0     1-0-0      W  2  0   
3  4        Stumptown Athletic  2  0  2  0     0-1-0     0-1-0     TT  3  3   
4  5          ALBION San Diego  2  0  2  0     0-1-0     0-1-0     TT  2  2   
5  6            Chattanooga FC  1  0  1  0     0-0-0     0-1-0      T  1  1   
6  7         Los Angeles Force  2  0  1  1     0-1-1     0-0-0     TL  1  3   
7  8         Michigan Stars FC  2  0  0  2     0-0-0     0-0-2     LL  1  3   

   GD PTS  
0   1   4  
1   1   4  
2   2   3  
3   0   2  
4   0   2  
5   0   1  
6  -2   1  
7  -2   0  

League Standings for 2020-2021
   P                      TEAM GP  W  D  L H (W-D-L) A (W-D-L) Latest GF  GA  \
0  1            Chattanooga FC  4  3