# Import Libraries

In [265]:
from bs4 import BeautifulSoup

import urllib
import urllib.request
import re

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

import math

import requests

import matplotlib.pyplot as plt
from pylab import rcParams


### HTML Code for reference

In [266]:
# The 2 boxes
    #class="boxStats full fbIndStats season centerText"
# First Box
    #class="stat_table BBIndvStats"
# Second Box
    #table class="stat_table BBIndvStatsSecond"
#Rows of teh boxes: PLayers info.
    #class="indStatsStarterRow"
#print(page.content[:])

## Both Functions, individual_stats_box_1 and individual_stats_box_2, create a DataFrame with all of their perpective *Box data for each player.
* Parameter: URL to the UCM Basketball Website for Individuals Stats
* A Box is how the UCM Basketball's website separates the data (like a table). There are 2 Boxes 

In [267]:
def individual_stats_box_1(URL):
    
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, 'lxml')

    box_1 = soup.find('table', class_ = "stat_table BBIndvStats")

    players_stats_html = box_1.find_all('tr', class_ = "indStatsStarterRow")
    
    # Column's Names Len= 12
    col = {'Number','Name', 'GP', 'GS', 'MIN', 'MIN/G','FGM','FGA', 'FG%', '3FG-M','3FG-A', '3FG%', 'FTM', 'FTA', 'FT%'}

    # index =  len(players_stats_html[:]) = 16
    index = range(0, len(players_stats_html[:]))

    players_stats = pd.DataFrame(np.nan, index=index, columns= col)
    
    tds_per_player = []
    for player in players_stats_html:
        tds_per_player.append(player.find_all('td'))

    player_numbers = []
    player_names = []
    player_games_played = []
    player_games_started = []
    player_minutes = []
    player_min_per_game = []
    player_field_goal_made = []
    player_field_goal_attempted = []
    player_field_goal_percentage = []
    player_3_point_field_goal_made = []
    player_3_point_field_goal_attempted = []
    player_3_point_field_goal_percentage = []
    player_free_throws_made = []
    player_free_throws_attempted = []
    player_free_throws_percentage = []


    for player in tds_per_player:
        player_numbers.append(player[:][0].get_text())
        player_names.append(player[:][1].get_text())
        player_games_played.append(player[:][2].get_text())
        player_games_started.append(player[:][3].get_text())
        # player[:][4] is a whitespace td tag
        player_minutes.append(player[:][5].get_text())
        player_min_per_game.append(player[:][6].get_text())
        # player[:][7] is a whitespace td tag
        player_field_goal_made.append(player[:][8].get_text().split('-')[0])
        player_field_goal_attempted.append(player[:][8].get_text().split('-')[1])
        player_field_goal_percentage.append(player[:][9].get_text().replace('%', ''))
        player_3_point_field_goal_made.append(player[:][10].get_text().split('-')[0])
        player_3_point_field_goal_attempted.append(player[:][10].get_text().split('-')[1])
        player_3_point_field_goal_percentage.append(player[:][11].get_text().replace('%', ''))
        player_free_throws_made.append(player[:][12].get_text().split('-')[0])
        player_free_throws_attempted.append(player[:][12].get_text().split('-')[1])
        player_free_throws_percentage.append(player[:][13].get_text().replace('%', ''))

    players_stats['Number'] = player_numbers
    players_stats['Name'] = player_names
    players_stats['GP'] = player_games_played
    players_stats['GS'] = player_games_started
    players_stats['MIN'] = player_minutes
    players_stats['MIN/G'] = player_min_per_game
    players_stats['FGM'] = player_field_goal_made
    players_stats['FGA'] = player_field_goal_attempted
    players_stats['FG%'] = player_field_goal_percentage
    players_stats['3FG-M'] = player_3_point_field_goal_made
    players_stats['3FG-A'] = player_3_point_field_goal_attempted
    players_stats['3FG%'] = player_3_point_field_goal_percentage
    players_stats['FTM'] = player_free_throws_made
    players_stats['FTA'] = player_free_throws_attempted
    players_stats['FT%'] = player_free_throws_percentage



    return players_stats

In [268]:
def individual_stats_box_2(URL):
    
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, 'lxml')

    box_2 = soup.find('table', class_ = "stat_table BBIndvStatsSecond")

    players_stats_html = box_2.find_all('tr', class_ = "indStatsStarterRow")
    
    # Column's Names Len= 19
    col = {'Number','Name', 'GP', 'GS', 'REB', 'REB/G','OREB','DREB', 'BLKS', 'STL','AST', 'TO', 'A/TO', 'PF', 'TF','DQ','PTS','PTS/G'}

    # index =  len(players_stats_html[:]) = 16
    index = range(0, len(players_stats_html[:]))

    players_stats = pd.DataFrame(np.nan, index=index, columns= col)
    
    tds_per_player = []
    for player in players_stats_html:
        tds_per_player.append(player.find_all('td'))

    player_numbers = []
    player_names = []
    player_games_played = []
    player_games_started = []
    player_REB = []
    player_REB_PER_G = []
    player_O_REB = []
    player_D_REB = []
    player_BLKS = []
    player_STL = []
    player_AST = []
    player_TO = []
    player_A_PER_TO = []
    player_PF = []
    player_TF = []
    player_DQ = []
    player_PTS = []
    player_PTS_PER_G = []

    for player in tds_per_player:
        player_numbers.append(player[:][0].get_text())
        player_names.append(player[:][1].get_text())
        player_games_played.append(player[:][2].get_text())
        player_games_started.append(player[:][3].get_text())
        # player[:][4] is a whitespace td tag
        player_REB.append(player[:][5].get_text())
        player_REB_PER_G.append(player[:][6].get_text())
        player_O_REB.append(player[:][7].get_text())
        player_D_REB.append(player[:][8].get_text())
        # player[:][9] is a whitespace td tag
        player_BLKS.append(player[:][10].get_text())
        player_STL.append(player[:][11].get_text())
        player_AST.append(player[:][12].get_text())
        player_TO.append(player[:][13].get_text())
        player_A_PER_TO.append(player[:][14].get_text())
        player_PF.append(player[:][15].get_text())
        player_TF.append(player[:][16].get_text())
        player_DQ.append(player[:][17].get_text())
        player_PTS.append(player[:][18].get_text())
        player_PTS_PER_G.append(player[:][19].get_text())




    players_stats['Number'] = player_numbers
    players_stats['Name'] = player_names
    players_stats['GP'] = player_games_played
    players_stats['GS'] = player_games_started
    players_stats['REB'] = player_REB
    players_stats['REB/G'] = player_REB_PER_G
    players_stats['OREB'] = player_O_REB
    players_stats['DREB'] = player_D_REB
    players_stats['BLKS'] = player_BLKS
    players_stats['STL'] = player_STL
    players_stats['AST'] = player_AST
    players_stats['TO'] = player_TO
    players_stats['A/TO'] = player_A_PER_TO
    players_stats['PF'] = player_PF
    players_stats['TF'] = player_TF
    players_stats['DQ'] = player_DQ
    players_stats['PTS'] = player_PTS
    players_stats['PTS/G'] = player_PTS_PER_G



    return players_stats

## Creating the DataFrames for Box 1 and 2 by using individual_stats_box_1 and individual_stats_box_2

In [269]:

URL_2011_2012 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/9/1/1854.php#tabs-2'
URL_2012_2013 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/10/1/1854.php#tabs-2'
URL_2013_2014 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/11/1/1854.php#tabs-2'
URL_2014_2015 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/12/1/1854.php#tabs-2'
URL_2015_2016 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/13/1/1854.php#tabs-2'
URL_2016_2017 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/14/1/1854.php#tabs-2'
URL_2017_2018 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/15/1/1854.php#tabs-2'
URL_2018_2019 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/16/1/1854.php#tabs-2'
URL_2019_2020 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/17/1/1854.php#tabs-2'

players_stats_2011_2012_box_1 = individual_stats_box_1(URL_2011_2012)
players_stats_2012_2013_box_1 = individual_stats_box_1(URL_2012_2013)
players_stats_2013_2014_box_1 = individual_stats_box_1(URL_2013_2014)
players_stats_2014_2015_box_1 = individual_stats_box_1(URL_2014_2015)
players_stats_2015_2016_box_1 = individual_stats_box_1(URL_2015_2016)
players_stats_2016_2017_box_1 = individual_stats_box_1(URL_2016_2017)
players_stats_2017_2018_box_1 = individual_stats_box_1(URL_2017_2018)
players_stats_2018_2019_box_1 = individual_stats_box_1(URL_2018_2019)
players_stats_2019_2020_box_1 = individual_stats_box_1(URL_2019_2020)

players_stats_2011_2012_box_2 = individual_stats_box_2(URL_2011_2012)
players_stats_2012_2013_box_2 = individual_stats_box_2(URL_2012_2013)
players_stats_2013_2014_box_2 = individual_stats_box_2(URL_2013_2014)
players_stats_2014_2015_box_2 = individual_stats_box_2(URL_2014_2015)
players_stats_2015_2016_box_2 = individual_stats_box_2(URL_2015_2016)
players_stats_2016_2017_box_2 = individual_stats_box_2(URL_2016_2017)
players_stats_2017_2018_box_2 = individual_stats_box_2(URL_2017_2018)
players_stats_2018_2019_box_2 = individual_stats_box_2(URL_2018_2019)
players_stats_2019_2020_box_2 = individual_stats_box_2(URL_2019_2020)



## Combined both DataFrames from each season

In [270]:
players_stats_2011_2012 = pd.merge(players_stats_2011_2012_box_1,
                                   players_stats_2011_2012_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2012_2013 = pd.merge(players_stats_2012_2013_box_1,
                                   players_stats_2012_2013_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2013_2014 = pd.merge(players_stats_2013_2014_box_1,
                                   players_stats_2013_2014_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2014_2015 = pd.merge(players_stats_2014_2015_box_1,
                                   players_stats_2014_2015_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2015_2016 = pd.merge(players_stats_2015_2016_box_1,
                                   players_stats_2015_2016_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2016_2017 = pd.merge(players_stats_2016_2017_box_1,
                                   players_stats_2016_2017_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2017_2018 = pd.merge(players_stats_2017_2018_box_1,
                                   players_stats_2017_2018_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2018_2019 = pd.merge(players_stats_2018_2019_box_1,
                                   players_stats_2018_2019_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2019_2020 = pd.merge(players_stats_2019_2020_box_1,
                                   players_stats_2019_2020_box_2, 
                                   on=["Number", "Name","GP","GS"])


## Final Steps: Make Them Into a CSV File
###   * Store them in the data folder 

In [296]:
path = 'C:\\Users\\Jairo\Desktop\\hackmerced_2021\\data\\'

players_stats_2011_2012.to_csv(path+'players_stats_2011_2012.csv', index=False)
players_stats_2012_2013.to_csv(path+'players_stats_2012_2013.csv', index=False)
players_stats_2013_2014.to_csv(path+'players_stats_2013_2014.csv', index=False)
players_stats_2014_2015.to_csv(path+'players_stats_2014_2015.csv', index=False)
players_stats_2015_2016.to_csv(path+'players_stats_2015_2016.csv', index=False)
players_stats_2016_2017.to_csv(path+'players_stats_2016_2017.csv', index=False)
players_stats_2017_2018.to_csv(path+'players_stats_2017_2018.csv', index=False)
players_stats_2018_2019.to_csv(path+'players_stats_2018_2019.csv', index=False)
players_stats_2019_2020.to_csv(path+'players_stats_2019_2020.csv', index=False)
