# This notebook gathers data of NISA player stats tables

In [None]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [None]:
# Optional cell, stretches cell width for better readability
from IPython.core.display import HTML

custom_css = """
<style>
.container { width: 100% !important; }
.code_cell { flex-grow: 1; width: 100% !important; }
.code_cell .input_area { width: 100% !important; }
</style>
"""

display(HTML(custom_css))

In [None]:
def extract_stats_table(soup):
    table = soup.find('table')
    
    # Extract the table headers from the header row
    header_row = table.find('thead').find('tr')
    headers = [header.get_text(strip=True) for header in header_row.find_all('th')]

    # Extract the table data from the data rows
    data = []
    data_rows = table.find('tbody').find_all('tr')
    
    for row in data_rows:
        values = [cell.get_text(strip=True) for cell in row.find_all('td')]
        data.append(dict(zip(headers, values)))
        
    # Create DataFrame
    player_stats_df = pd.DataFrame(data)

    return player_stats_df

In [None]:
def create_df(csv_filename):
    
    soup = BS(driver.page_source,'html.parser')
    
    player_stats_df = extract_stats_table(soup)
    
    # Create a "data" folder if it doesn't exist
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)
    
    # Save the dataframe locally to a csv
    csv_filepath = os.path.join(data_folder, csv_filename)
    player_stats_df.to_csv(csv_filepath, index=False)
    
    print(player_stats_df)

In [None]:
def download_table(desired_year, desired_season, desired_type, file_name):
    
    # Find and click the button to open the drop-down for selecting the year
    year_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-year-container')
    year_button.click()

    # Find and click the specific year option in the drop-down
    year_option = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, f'//li[text()="{desired_year}"]')))
    year_option.click()
    
    # Find and click the button to open the drop-down for selecting the table type
    type_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-type-container')
    type_button.click()

    # Find and click the specific table type option in the drop-down
    type_option = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, f'//li[text()="{desired_type}"]')))
    type_option.click()
    
    # Find and click the button to open the drop-down for selecting the season
    season_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-season-container')
    season_button.click()
    
    # Find and click the specific season option in the drop-down
    season_option = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, f'//li[text()="{desired_season}"]')))
    season_option.click()


    # Call create_df function to download the table data
    create_df(f"{file_name}_{desired_year.replace('-','_')}.csv")

    driver.quit()

In [None]:
def print_options_for_year(desired_year):

    driver = webdriver.Chrome()

    # Navigating to the page
    URL = 'https://nisaofficial.com/player-stats'
    driver.get(URL)

    # Find and click the button to open the drop-down for selecting the year
    year_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-year-container')
    year_button.click()

    # Find and click the specific year option in the drop-down
    year_option = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, f'//li[text()="{desired_year}"]')))
    year_option.click()

    # Wait for the type button to be clickable
    WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'span#select2-type-container')))

    # Hover over the type button to trigger its drop-down
    type_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-type-container')
    driver.execute_script("arguments[0].click();", type_button)

    # Wait for the drop-down to open
    table_dropdown = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'select#type')))

    # Find the drop-down options and print their text
    type_options = table_dropdown.find_elements(By.TAG_NAME, 'option')
    print('TABLE TYPE OPTIONS:')
    for type_option in type_options:
        print(type_option.text)
    
    # Wait for the season button to be clickable
    WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'span#select2-season-container')))

    # Hover over the season button to trigger its drop-down
    season_button = driver.find_element(By.CSS_SELECTOR, 'span#select2-season-container')
    driver.execute_script("arguments[0].click();", season_button)

    # Wait for the drop-down to open
    table_dropdown = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'select#season')))

    # Find the drop-down options and print their text
    season_options = table_dropdown.find_elements(By.TAG_NAME, 'option')
    print()
    print('SEASON OPTIONS:')
    for season_option in season_options:
        print(season_option.text) 
    
    driver.quit()

## Run the below cell to see all the available years.

In [None]:
# Start Chrome with Selenium
driver = webdriver.Chrome()

# Navigate to the page with player stats
player_stats_url = 'https://nisaofficial.com/player-stats'
driver.get(player_stats_url)

# Interact with the first button (year selection)
year_button = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.ID, 'select2-year-container')))
year_button.click()

# Get the available year options from the drop-down menu
year_options = driver.find_elements(By.CSS_SELECTOR, 'li.select2-results__option')
years = [option.text for option in year_options]
print(years)

# Quit the browser
driver.quit()

## Run the below cell to see the filters you can chose based on the year you're interested in.

In [None]:
print_options_for_year('2023')

## Table Selection
### Run both cells for every table you want to download (for some reason it doesn't like it all in one cell). All you need to do is change are the arguements in the 'download_table()' function. The first arguement is the year you want to pull the table from. The second and third arguements are for the season and table type (optional arguements come from the 'print_options_for_year()' function). The final arguement is what you want the file to be called. You will need to end the file name with '.csv' and the file will be downloaded into a directory called 'data'. You do not have to create this directory yourself. The 'create_df()' function will do this for you. Finally, make sure your agruements are encased in apostrophes or quotations.

In [None]:
# Start Chrome with Selenium
driver = webdriver.Chrome()

# Navigate to the page
URL = 'https://nisaofficial.com/player-stats'
driver.get(URL)

In [None]:
download_table('2023', '2023 Season', 'All', 'all_playerstat_2023')

## Below will retirieve all player stats

In [None]:
years = ['2019-2020', '2020-2021', '2020-2021', '2021', '2022', '2023']
year = ['2020', '2020', '2021', '2021', '2022', '2023']
season = ['Spring Season', 'Fall Season', 'Spring Season', 'Fall Season', '2022 Season', '2023 Season']

# Loop through the years and download tables
for i in range(len(years)):
    print(f'\nLeague Standings for {year[i]} {season[i]}')

    driver = webdriver.Chrome()

    URL = 'https://nisaofficial.com/player-stats'
    driver.get(URL)
    
    download_table(years[i], season[i], 'All', f'all_playerstat_{season[i].replace(" ", "_")}') 

## Below will concat all the tables into one, given you used the same file naming as above.

In [None]:
data_frames = []

for i in range(len(year)):
    file = f'all_playerstat_{season[i].replace(" ", "_")}_{years[i].replace("-","_")}'
    df = pd.read_csv(f'data/{file}.csv')
    
    df['Year'] = year[i]
    df['Season'] = season[i]
    
    data_frames.append(df)

complete_playerstats_df = pd.concat(data_frames, ignore_index=False)

In [None]:
column_names_update = {
    '#': 'Jersey Number',
    'Pos': 'Position',
    'GP': 'Games Played',
    'GS': 'Games Started',
    'MIN': 'Minutes Played',
    'G': 'Goals',
    'A': 'Assists',
    'SH': 'Shots',
    'Y': 'Yellow Cards',
    'R': 'Red Cards',
}


# Changing team abbreviations to their full names (mapping didn't work)
complete_playerstats_df['Team'] = complete_playerstats_df['Team'].replace({
    'OAK': 'Oakland Roots SC',
    'STP': 'Stumptown Athletic',
    'DCFC': 'Detroit City FC',
    'CUSFC': 'Cal United Strikers FC',
    'CFC': 'Chattanooga FC',
    'ALB': 'ALBION San Diego', #####
    'LAF': 'Los Angeles Force',
    'MICH': 'Michigan Stars FC',
    'NYC': 'New York Cosmos',
    'NAFC': 'New Amsterdam FC',
    'SAC': 'Stumptown AC',
    'MBFC': 'Maryland Bobcats FC',
    'CHAC': 'Chicago House AC',
    'ACSP': 'Syracuse Pulse',
    'FCU': 'Flower City Union',
    'BCFC': 'Bay Cities FC',
    'CDL': 'CLub de Lyon FC',
    'GSFC': 'Gold Star FC Detroit',
    'SAV': 'Savannah Clovers FC',
})

complete_playerstats_df = complete_playerstats_df.rename(columns=column_names_update)

#New column 'Goal Shot Ratio'
complete_player_stats_df['Goal Shot Ratio'] = complete_player_stats_df['Goals'] / complete_player_stats_df['Shots']

#Save to .csv locally
complete_playerstats_df.to_csv('complete_player_stats.csv', index=False)

print(complete_playerstats_df)