# This notebook gathers data of NISA player stats tables
### (in progress)

In [None]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
# Optional cell, stretches cell width for better readability
from IPython.core.display import HTML

custom_css = """
<style>
.container { width: 100% !important; }
.code_cell { flex-grow: 1; width: 100% !important; }
.code_cell .input_area { width: 100% !important; }
</style>
"""

display(HTML(custom_css))

In [None]:
def extract_stats_table(soup):
    
    table = soup.find('table')
    
     # Extract the table data into a list of dictionaries
    data = []
    rows = table.find_all('tr')
    headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]
    
    for row in rows[1:]:
        values = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])]
        data.append(dict(zip(headers, values)))

    # Creating DataFrame
    player_stats_df = pd.DataFrame(data)

    # Cleaning up DataFrame
    player_stats_df = player_stats_df.replace(r'\n', '', regex=True)  # Removes newline characters
    player_stats_df['TEAM'] = player_stats_df['TEAM'].str.strip()  # Strips leading/trailing spaces

    return(player_stats_df)

In [None]:
def create_df(csv_filename):
    
    soup = BS(driver.page_source,'html.parser')
    
    player_stats_df = extract_stats_table(soup)
    
    # Creates a "data" folder if it doesn't exist
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)
    
    #Saves the dataframe locally to a csv
    csv_filepath = os.path.join(data_folder, csv_filename)
    player_stats_df.to_csv(csv_filepath, index=False)
    
    print(player_stats_df)