# This notebook gathers data of NISA team stadings tables
### (completed)

In [1]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [1]:
# Optional cell, stretches cell width for better readability
from IPython.core.display import HTML

custom_css = """
<style>
.container { width: 100% !important; }
.code_cell { flex-grow: 1; width: 100% !important; }
.code_cell .input_area { width: 100% !important; }
</style>
"""

display(HTML(custom_css))

In [3]:
def extract_standings_table(soup):
    
    table = soup.find('table')
    
     # Extract the table data into a list of dictionaries
    data = []
    rows = table.find_all('tr')
    headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]
    
    for row in rows[1:]:
        values = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])]
        data.append(dict(zip(headers, values)))

    # Creating DataFrame
    team_standings_df = pd.DataFrame(data)

    # Cleaning up DataFrame
    team_standings_df = team_standings_df.replace(r'\n', '', regex=True)  # Removes newline characters
    team_standings_df['TEAM'] = team_standings_df['TEAM'].str.strip()  # Strips leading/trailing spaces

    return(team_standings_df)

In [4]:
def create_df(csv_filename):
    
    soup = BS(driver.page_source,'html.parser')
    
    team_standings_df = extract_standings_table(soup)
    
    # Creates a "data" folder if it doesn't exist
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)
    
    #Saves the dataframe locally to a csv
    csv_filepath = os.path.join(data_folder, csv_filename)
    team_standings_df.to_csv(csv_filepath, index=False)
    
    print(team_standings_df)

In [5]:
def download_table(desired_table, year, file_name):
    
    # Finding all available options in the dropdown list for tables and initialize the selected option ID
    options = driver.find_elements(By.CSS_SELECTOR, 'li.select2-results__option')
    dynamic_part = None

    # Looping through the available options to find the desired year group
    for option in options:
        # Getting the year group options, which are elements with class "select2-results__group"
        year_group = option.find_elements(By.CLASS_NAME, 'select2-results__group')
        # Checking if the desired year is in the text of any year group option
        if year in [group.get_attribute('innerText') for group in year_group]:
            # If the desired year is found in a year group option then we check the nested options for the desired table
            nested_options = option.find_elements(By.CLASS_NAME, 'select2-results__option')
            for nested_option in nested_options:
                # Checking if the desired table is in the text of the nested option
                if desired_table in nested_option.get_attribute('innerText'):
                    # If the desired table is found then we get its ID, which is used to select the option
                    dynamic_part = nested_option.get_attribute('id')
                    break
            if dynamic_part:
                break

    if not dynamic_part:
        print(f"No option found for {desired_table} in {year}")
    else:
        table_select = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, f'#{dynamic_part}')))
        table_select.click()

        create_df(file_name)

    driver.quit()

### Run the below cell to see all the tables you can download.
#### (Independent cups for 2023 and 2022 are currenlty unavailable on the NISA website)

In [9]:
# This may need to be changed based on the file path to your chrome application
chrome_driver_path = 'C:\Program Files (x86)\Google\Chrome\Application\chrome'

# Starting Chrome with Selenium
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service)

# Navigating to the page
URL = 'https://nisaofficial.com/standings'
driver.get(URL)

# Interacting with the page to get to the dropdown selector
button1 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selectSeason')))
button1.click()

button2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#select2-year-container')))
button2.click()

# Extracting the HTML code of the dropdown options
dropdown_html_code = driver.find_element(By.CLASS_NAME, 'select2-results').get_attribute('outerHTML')

# Finding all the options in the dropdown selector
dropdown_soup = BS(dropdown_html_code, 'html.parser')
options = dropdown_soup.select('li.select2-results__option')

for option in options:
    print(option.text)
    
driver.quit()

2019-2020Fall ShowcaseSpring Season
Fall Showcase
Spring Season
2020-2021Fall ChampionshipFall SeasonIndependent CupLegends CupSpring Season
Fall Championship
Fall Season
Independent Cup
Legends Cup
Spring Season
2021Independent CupFall Season
Independent Cup
Fall Season
20222022 Season2022 Independent Cup
2022 Season
2022 Independent Cup
20232023 Season2023 Independent Cup
2023 Season
2023 Independent Cup


### Table Selection
#### Run both cells for every table you want to download. All you need to do is change are the arguements in the 'download_table()' function. The first arguement is the table you want to download, the second arguement is what year you want to pull a table from, and the third is what you want the file to be called. You will need to end the file name with '.csv' and the file will be downloaded into a directory called 'data'. You do not have to create this directory yourself. The 'create_df()' function will do this for you. Finally, make sure your agruements are encased in apostrophes or quotations.

In [7]:
# This may need to be changed based on the file path to your chrome application
chrome_driver_path = 'C:\Program Files (x86)\Google\Chrome\Application\chrome'

# Starting Chrome with Selenium
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service)

# Navigating to the page
URL = 'https://nisaofficial.com/standings'
driver.get(URL)

# Interacting with the page to get to the dropdown selector
button1 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selectSeason')))
button1.click()

button2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#select2-year-container')))
button2.click()

In [8]:
download_table('Independent Cup', '2021', '2021_IndependentCup.csv')

     P                      TEAM GP  W  D  L H (W-D-L) A (W-D-L) Latest  GF  \
0    1           Detroit City FC  3  3  0  0     2-0-0     1-0-0    WWW  17   
1    2      FC Milwaukee Torrent  3  3  0  0     2-0-0     1-0-0    WWW  11   
2    3            Chattanooga FC  3  3  0  0     2-0-0     1-0-0    WWW  10   
3    4       Maryland Bobcats FC  3  2  1  0     1-1-0     1-0-0    WWT   2   
4    5        Louisiana Krewe II  3  2  1  0     2-0-0     0-1-0    TWW   2   
5    6                FC Buffalo  3  2  0  1     2-0-0     0-0-1    WWL  11   
6    7    Cal United Strikers FC  2  2  0  0     1-0-0     1-0-0     WW   5   
7    8         Los Angeles Force  3  2  0  1     1-0-0     1-0-1    WLW   5   
8    9      Lansdowne Yonkers FC  2  2  0  0     1-0-0     1-0-0     WW   3   
9   10            Steel Pulse FC  3  1  2  0     1-1-0     0-1-0    WTT   4   
10  11          Chicago House AC  3  1  1  1     0-0-0     1-1-1    WTL   6   
11  12      Magia Futbol Academy  3  1  1  1     0-0