# Table of Content 
- <a href='#0'>1. Importing Libraries </a> 
- <a href='#1'>2. Information about functions </a> 
- <a href='#2'>3. Function 1: individual_stats_box_1 </a> 
- <a href='#3'>4. Function 2: individual_stats_box_2 </a> 
- <a href='#4'>5. Creating DataFrames for Each BasketBall Season  </a> 
- <a href='#5'>6. Data Type Changed  </a> 
- <a href='#6'>7. Creating CSV Files  </a> 



# <a id='0'> Importing Libraries </a>

In [358]:
from bs4 import BeautifulSoup

import urllib
import urllib.request
import re

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

import math

import requests

import matplotlib.pyplot as plt
from pylab import rcParams


# <a id='1'> Information about Functions </a>

Both Functions:
* individual_stats_box_1 
* individual_stats_box_2

Take in a URL link as a parameter especifically the UCM Basketball Website for Individuals StatsReturn and returns a DataFrame with all of their perpective *Box data for each player.
* A Box is how the UCM Basketball's website separates the data (like a table).
* Box 1 stores the minutes and shooting data
* Box 2 stores rebounds and other stats data

# <a id='2'> Function 1: individual_stats_box_1  </a>

In [359]:
def individual_stats_box_1(URL):
    
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, 'lxml')

    box_1 = soup.find('table', class_ = "stat_table BBIndvStats")

    players_stats_html = box_1.find_all('tr', class_ = "indStatsStarterRow")
    
    # Column's Names Len= 12
    col = {'Number','Name', 'GP', 'GS', 'MIN', 'MIN/G','FGM','FGA', 'FG%', '3FG-M','3FG-A', '3FG%', 'FTM', 'FTA', 'FT%'}

    # index =  len(players_stats_html[:]) = 16
    index = range(0, len(players_stats_html[:]))

    players_stats = pd.DataFrame(np.nan, index=index, columns= col)
    
    tds_per_player = []
    for player in players_stats_html:
        tds_per_player.append(player.find_all('td'))

    player_numbers = []
    player_names = []
    player_games_played = []
    player_games_started = []
    player_minutes = []
    player_min_per_game = []
    player_field_goal_made = []
    player_field_goal_attempted = []
    player_field_goal_percentage = []
    player_3_point_field_goal_made = []
    player_3_point_field_goal_attempted = []
    player_3_point_field_goal_percentage = []
    player_free_throws_made = []
    player_free_throws_attempted = []
    player_free_throws_percentage = []


    for player in tds_per_player:
        player_numbers.append(player[:][0].get_text())
        player_names.append(player[:][1].get_text())
        player_games_played.append(player[:][2].get_text())
        player_games_started.append(player[:][3].get_text())
        # player[:][4] is a whitespace td tag
        player_minutes.append(player[:][5].get_text())
        player_min_per_game.append(player[:][6].get_text())
        # player[:][7] is a whitespace td tag
        player_field_goal_made.append(player[:][8].get_text().split('-')[0])
        player_field_goal_attempted.append(player[:][8].get_text().split('-')[1])
        player_field_goal_percentage.append(player[:][9].get_text().replace('%', ''))
        player_3_point_field_goal_made.append(player[:][10].get_text().split('-')[0])
        player_3_point_field_goal_attempted.append(player[:][10].get_text().split('-')[1])
        player_3_point_field_goal_percentage.append(player[:][11].get_text().replace('%', ''))
        player_free_throws_made.append(player[:][12].get_text().split('-')[0])
        player_free_throws_attempted.append(player[:][12].get_text().split('-')[1])
        player_free_throws_percentage.append(player[:][13].get_text().replace('%', ''))

    players_stats['Number'] = player_numbers
    players_stats['Name'] = player_names
    players_stats['GP'] = player_games_played
    players_stats['GS'] = player_games_started
    players_stats['MIN'] = player_minutes
    players_stats['MIN/G'] = player_min_per_game
    players_stats['FGM'] = player_field_goal_made
    players_stats['FGA'] = player_field_goal_attempted
    players_stats['FG%'] = player_field_goal_percentage
    players_stats['3FG-M'] = player_3_point_field_goal_made
    players_stats['3FG-A'] = player_3_point_field_goal_attempted
    players_stats['3FG%'] = player_3_point_field_goal_percentage
    players_stats['FTM'] = player_free_throws_made
    players_stats['FTA'] = player_free_throws_attempted
    players_stats['FT%'] = player_free_throws_percentage



    return players_stats

# <a id='3'> Function 2: individual_stats_box_2  </a>

In [360]:
def individual_stats_box_2(URL):
    
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, 'lxml')

    box_2 = soup.find('table', class_ = "stat_table BBIndvStatsSecond")

    players_stats_html = box_2.find_all('tr', class_ = "indStatsStarterRow")
    
    # Column's Names Len= 19
    col = {'Number','Name', 'GP', 'GS', 'REB', 'REB/G','OREB','DREB', 'BLKS', 'STL','AST', 'TO', 'A/TO', 'PF', 'TF','DQ','PTS','PTS/G'}

    # index =  len(players_stats_html[:]) = 16
    index = range(0, len(players_stats_html[:]))

    players_stats = pd.DataFrame(np.nan, index=index, columns= col)
    
    tds_per_player = []
    for player in players_stats_html:
        tds_per_player.append(player.find_all('td'))

    player_numbers = []
    player_names = []
    player_games_played = []
    player_games_started = []
    player_REB = []
    player_REB_PER_G = []
    player_O_REB = []
    player_D_REB = []
    player_BLKS = []
    player_STL = []
    player_AST = []
    player_TO = []
    player_A_PER_TO = []
    player_PF = []
    player_TF = []
    player_DQ = []
    player_PTS = []
    player_PTS_PER_G = []

    for player in tds_per_player:
        player_numbers.append(player[:][0].get_text())
        player_names.append(player[:][1].get_text())
        player_games_played.append(player[:][2].get_text())
        player_games_started.append(player[:][3].get_text())
        # player[:][4] is a whitespace td tag
        player_REB.append(player[:][5].get_text())
        player_REB_PER_G.append(player[:][6].get_text())
        player_O_REB.append(player[:][7].get_text())
        player_D_REB.append(player[:][8].get_text())
        # player[:][9] is a whitespace td tag
        player_BLKS.append(player[:][10].get_text())
        player_STL.append(player[:][11].get_text())
        player_AST.append(player[:][12].get_text())
        player_TO.append(player[:][13].get_text())
        player_A_PER_TO.append(player[:][14].get_text())
        player_PF.append(player[:][15].get_text())
        player_TF.append(player[:][16].get_text())
        player_DQ.append(player[:][17].get_text())
        player_PTS.append(player[:][18].get_text())
        player_PTS_PER_G.append(player[:][19].get_text())




    players_stats['Number'] = player_numbers
    players_stats['Name'] = player_names
    players_stats['GP'] = player_games_played
    players_stats['GS'] = player_games_started
    players_stats['REB'] = player_REB
    players_stats['REB/G'] = player_REB_PER_G
    players_stats['OREB'] = player_O_REB
    players_stats['DREB'] = player_D_REB
    players_stats['BLKS'] = player_BLKS
    players_stats['STL'] = player_STL
    players_stats['AST'] = player_AST
    players_stats['TO'] = player_TO
    players_stats['A/TO'] = player_A_PER_TO
    players_stats['PF'] = player_PF
    players_stats['TF'] = player_TF
    players_stats['DQ'] = player_DQ
    players_stats['PTS'] = player_PTS
    players_stats['PTS/G'] = player_PTS_PER_G



    return players_stats

# <a id='4'>Creating DataFrames for Each BasketBall Season</a>

## Mens' individual Statistics Box DFs

In [361]:
URL_2011_2012 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/9/1/1854.php#tabs-2'
URL_2012_2013 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/10/1/1854.php#tabs-2'
URL_2013_2014 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/11/1/1854.php#tabs-2'
URL_2014_2015 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/12/1/1854.php#tabs-2'
URL_2015_2016 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/13/1/1854.php#tabs-2'
URL_2016_2017 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/14/1/1854.php#tabs-2'
URL_2017_2018 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/15/1/1854.php#tabs-2'
URL_2018_2019 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/16/1/1854.php#tabs-2'
URL_2019_2020 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/17/1/1854.php#tabs-2'

players_stats_2011_2012_box_1 = individual_stats_box_1(URL_2011_2012)
players_stats_2012_2013_box_1 = individual_stats_box_1(URL_2012_2013)
players_stats_2013_2014_box_1 = individual_stats_box_1(URL_2013_2014)
players_stats_2014_2015_box_1 = individual_stats_box_1(URL_2014_2015)
players_stats_2015_2016_box_1 = individual_stats_box_1(URL_2015_2016)
players_stats_2016_2017_box_1 = individual_stats_box_1(URL_2016_2017)
players_stats_2017_2018_box_1 = individual_stats_box_1(URL_2017_2018)
players_stats_2018_2019_box_1 = individual_stats_box_1(URL_2018_2019)
players_stats_2019_2020_box_1 = individual_stats_box_1(URL_2019_2020)

players_stats_2011_2012_box_2 = individual_stats_box_2(URL_2011_2012)
players_stats_2012_2013_box_2 = individual_stats_box_2(URL_2012_2013)
players_stats_2013_2014_box_2 = individual_stats_box_2(URL_2013_2014)
players_stats_2014_2015_box_2 = individual_stats_box_2(URL_2014_2015)
players_stats_2015_2016_box_2 = individual_stats_box_2(URL_2015_2016)
players_stats_2016_2017_box_2 = individual_stats_box_2(URL_2016_2017)
players_stats_2017_2018_box_2 = individual_stats_box_2(URL_2017_2018)
players_stats_2018_2019_box_2 = individual_stats_box_2(URL_2018_2019)
players_stats_2019_2020_box_2 = individual_stats_box_2(URL_2019_2020)

## Women's Individual Statistics Box DFs

In [362]:

URL_2012_2013 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/10/2/1854.php#tabs-2'
URL_2013_2014 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/11/2/1854.php#tabs-2'
URL_2014_2015 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/12/2/1854.php#tabs-2'
URL_2015_2016 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/13/2/1854.php#tabs-2'
URL_2016_2017 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/14/2/1854.php#tabs-2'
URL_2017_2018 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/15/2/1854.php#tabs-2'
URL_2018_2019 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/16/2/1854.php#tabs-2'
URL_2019_2020 = 'https://www.ucmercedbobcats.com/SIDHelp/seasonStats/17/2/1854.php#tabs-2'

players_stats_2012_2013_box_1_w = individual_stats_box_1(URL_2012_2013)
players_stats_2013_2014_box_1_w = individual_stats_box_1(URL_2013_2014)
players_stats_2014_2015_box_1_w = individual_stats_box_1(URL_2014_2015)
players_stats_2015_2016_box_1_w = individual_stats_box_1(URL_2015_2016)
players_stats_2016_2017_box_1_w = individual_stats_box_1(URL_2016_2017)
players_stats_2017_2018_box_1_w = individual_stats_box_1(URL_2017_2018)
players_stats_2018_2019_box_1_w = individual_stats_box_1(URL_2018_2019)
players_stats_2019_2020_box_1_w = individual_stats_box_1(URL_2019_2020)

players_stats_2012_2013_box_2_w = individual_stats_box_2(URL_2012_2013)
players_stats_2013_2014_box_2_w = individual_stats_box_2(URL_2013_2014)
players_stats_2014_2015_box_2_w = individual_stats_box_2(URL_2014_2015)
players_stats_2015_2016_box_2_w = individual_stats_box_2(URL_2015_2016)
players_stats_2016_2017_box_2_w = individual_stats_box_2(URL_2016_2017)
players_stats_2017_2018_box_2_w = individual_stats_box_2(URL_2017_2018)
players_stats_2018_2019_box_2_w = individual_stats_box_2(URL_2018_2019)
players_stats_2019_2020_box_2_w = individual_stats_box_2(URL_2019_2020)

# Combined Both DataFrames

## Men's Individual Statistics DF

In [363]:
players_stats_2011_2012 = pd.merge(players_stats_2011_2012_box_1,
                                   players_stats_2011_2012_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2012_2013 = pd.merge(players_stats_2012_2013_box_1,
                                   players_stats_2012_2013_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2013_2014 = pd.merge(players_stats_2013_2014_box_1,
                                   players_stats_2013_2014_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2014_2015 = pd.merge(players_stats_2014_2015_box_1,
                                   players_stats_2014_2015_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2015_2016 = pd.merge(players_stats_2015_2016_box_1,
                                   players_stats_2015_2016_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2016_2017 = pd.merge(players_stats_2016_2017_box_1,
                                   players_stats_2016_2017_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2017_2018 = pd.merge(players_stats_2017_2018_box_1,
                                   players_stats_2017_2018_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2018_2019 = pd.merge(players_stats_2018_2019_box_1,
                                   players_stats_2018_2019_box_2, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2019_2020 = pd.merge(players_stats_2019_2020_box_1,
                                   players_stats_2019_2020_box_2, 
                                   on=["Number", "Name","GP","GS"])


## Women's Individual Statistics DF

In [364]:
players_stats_2012_2013_w = pd.merge(players_stats_2012_2013_box_1_w,
                                   players_stats_2012_2013_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2013_2014_w = pd.merge(players_stats_2013_2014_box_1_w,
                                   players_stats_2013_2014_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2014_2015_w = pd.merge(players_stats_2014_2015_box_1_w,
                                   players_stats_2014_2015_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2015_2016_w = pd.merge(players_stats_2015_2016_box_1_w,
                                   players_stats_2015_2016_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2016_2017_w = pd.merge(players_stats_2016_2017_box_1_w,
                                   players_stats_2016_2017_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2017_2018_w = pd.merge(players_stats_2017_2018_box_1_w,
                                   players_stats_2017_2018_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2018_2019_w = pd.merge(players_stats_2018_2019_box_1_w,
                                   players_stats_2018_2019_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

players_stats_2019_2020_w = pd.merge(players_stats_2019_2020_box_1_w,
                                   players_stats_2019_2020_box_2_w, 
                                   on=["Number", "Name","GP","GS"])

# <a id='5'>Data Type Changed</a>

In [365]:
# Columns to be converted from str -> int
columns_to_int = ['3FG-M', 'FTA', 'MIN', 'GP', 'FGM','FTM', 'FGA', '3FG-A','GS','TO','STL', 'BLKS', 'PF', 'DREB', 'TF', 'PTS', 'DQ', 'REB', 'OREB','AST']

# Columns to be converted from str -> float 
columns_to_float = ['MIN/G','FT%','FG%', '3FG%','PTS/G','A/TO','REB/G']

# the Remaininding columns stay as str

## Data Type Changed: Men

In [367]:
#### str ----> float ####
players_stats_2011_2012[columns_to_float] = players_stats_2011_2012[columns_to_float].astype(float)
players_stats_2012_2013[columns_to_float] = players_stats_2012_2013[columns_to_float].astype(float)
players_stats_2013_2014[columns_to_float] = players_stats_2013_2014[columns_to_float].astype(float)
players_stats_2014_2015[columns_to_float] = players_stats_2014_2015[columns_to_float].astype(float)
players_stats_2015_2016[columns_to_float] = players_stats_2015_2016[columns_to_float].astype(float)
players_stats_2016_2017[columns_to_float] = players_stats_2016_2017[columns_to_float].astype(float)
players_stats_2017_2018[columns_to_float] = players_stats_2017_2018[columns_to_float].astype(float)
players_stats_2018_2019[columns_to_float] = players_stats_2018_2019[columns_to_float].astype(float)
players_stats_2019_2020[columns_to_float] = players_stats_2019_2020[columns_to_float].astype(float)


#### str -----> int ####
players_stats_2011_2012[columns_to_int] = players_stats_2011_2012[columns_to_int].astype(int)
players_stats_2012_2013[columns_to_int] = players_stats_2012_2013[columns_to_int].astype(int)
players_stats_2013_2014[columns_to_int] = players_stats_2013_2014[columns_to_int].astype(int)
players_stats_2014_2015[columns_to_int] = players_stats_2014_2015[columns_to_int].astype(int)
players_stats_2015_2016[columns_to_int] = players_stats_2015_2016[columns_to_int].astype(int)
players_stats_2016_2017[columns_to_int] = players_stats_2016_2017[columns_to_int].astype(int)
players_stats_2017_2018[columns_to_int] = players_stats_2017_2018[columns_to_int].astype(int)
players_stats_2018_2019[columns_to_int] = players_stats_2018_2019[columns_to_int].astype(int)
players_stats_2019_2020[columns_to_int] = players_stats_2019_2020[columns_to_int].astype(int)

## Data Type Changed: Women

In [368]:
#### str ----> float ####
players_stats_2012_2013_w[columns_to_float] = players_stats_2012_2013_w[columns_to_float].astype(float)
players_stats_2013_2014_w[columns_to_float] = players_stats_2013_2014_w[columns_to_float].astype(float)
players_stats_2014_2015_w[columns_to_float] = players_stats_2014_2015_w[columns_to_float].astype(float)
players_stats_2015_2016_w[columns_to_float] = players_stats_2015_2016_w[columns_to_float].astype(float)
players_stats_2016_2017_w[columns_to_float] = players_stats_2016_2017_w[columns_to_float].astype(float)
players_stats_2017_2018_w[columns_to_float] = players_stats_2017_2018_w[columns_to_float].astype(float)
players_stats_2018_2019_w[columns_to_float] = players_stats_2018_2019_w[columns_to_float].astype(float)
players_stats_2019_2020_w[columns_to_float] = players_stats_2019_2020_w[columns_to_float].astype(float)

#### str -----> int ####
players_stats_2012_2013_w[columns_to_int] = players_stats_2012_2013_w[columns_to_int].astype(int)
players_stats_2013_2014_w[columns_to_int] = players_stats_2013_2014_w[columns_to_int].astype(int)
players_stats_2014_2015_w[columns_to_int] = players_stats_2014_2015_w[columns_to_int].astype(int)
players_stats_2015_2016_w[columns_to_int] = players_stats_2015_2016_w[columns_to_int].astype(int)
players_stats_2016_2017_w[columns_to_int] = players_stats_2016_2017_w[columns_to_int].astype(int)
players_stats_2017_2018_w[columns_to_int] = players_stats_2017_2018_w[columns_to_int].astype(int)
players_stats_2018_2019_w[columns_to_int] = players_stats_2018_2019_w[columns_to_int].astype(int)
players_stats_2019_2020_w[columns_to_int] = players_stats_2019_2020_w[columns_to_int].astype(int)


# <a id='6'> Creating CSV Files</a>
###   * Store them in the data folder 

### Men's Individual Stats CSV Files

In [369]:
path = 'C:\\Users\\Jairo\Desktop\\hackmerced_2021\\data\\Men_Individual_Data\\'

players_stats_2011_2012.to_csv(path+'players_stats_2011_2012.csv', index=False)
players_stats_2012_2013.to_csv(path+'players_stats_2012_2013.csv', index=False)
players_stats_2013_2014.to_csv(path+'players_stats_2013_2014.csv', index=False)
players_stats_2014_2015.to_csv(path+'players_stats_2014_2015.csv', index=False)
players_stats_2015_2016.to_csv(path+'players_stats_2015_2016.csv', index=False)
players_stats_2016_2017.to_csv(path+'players_stats_2016_2017.csv', index=False)
players_stats_2017_2018.to_csv(path+'players_stats_2017_2018.csv', index=False)
players_stats_2018_2019.to_csv(path+'players_stats_2018_2019.csv', index=False)
players_stats_2019_2020.to_csv(path+'players_stats_2019_2020.csv', index=False)


### Women's Individual Stats CSV Files

In [370]:
path = 'C:\\Users\\Jairo\Desktop\\hackmerced_2021\\data\\Women_Individual_Data\\'

players_stats_2012_2013_w.to_csv(path+'players_stats_2012_2013_w.csv', index=False)
players_stats_2013_2014_w.to_csv(path+'players_stats_2013_2014_w.csv', index=False)
players_stats_2014_2015_w.to_csv(path+'players_stats_2014_2015_w.csv', index=False)
players_stats_2015_2016_w.to_csv(path+'players_stats_2015_2016_w.csv', index=False)
players_stats_2016_2017_w.to_csv(path+'players_stats_2016_2017_w.csv', index=False)
players_stats_2017_2018_w.to_csv(path+'players_stats_2017_2018_w.csv', index=False)
players_stats_2018_2019_w.to_csv(path+'players_stats_2018_2019_w.csv', index=False)
players_stats_2019_2020_w.to_csv(path+'players_stats_2019_2020_w.csv', index=False)


In [371]:
players_stats_2012_2013_w

Unnamed: 0,Number,3FG-M,FTA,MIN,GP,Name,FGM,MIN/G,FT%,FTM,...,BLKS,REB/G,PF,DREB,TF,PTS,DQ,REB,OREB,AST
0,1,7,116,780,24,Leticia Oceguera,80,32.5,60.3,70,...,7,3.5,64,65,0,237,1,83,18,80
1,3,1,71,732,24,Kayla Polman,60,30.5,49.3,35,...,11,7.0,93,115,0,156,8,168,53,17
2,4,0,8,269,22,Karina Rodriguez,8,12.2,62.5,5,...,1,1.7,17,30,0,21,0,37,7,5
3,5,53,72,747,22,Shelby Howard,122,34.0,77.8,56,...,1,3.3,36,55,0,353,0,72,17,33
4,10,0,8,175,22,Daman Singh,9,8.0,50.0,4,...,3,1.2,18,18,0,22,0,26,8,0
5,15,0,10,172,10,Nelly Kilpatrick,15,17.2,40.0,4,...,7,1.9,18,14,0,34,1,19,5,4
6,20,0,39,695,24,Adrienne Carmona,45,29.0,59.0,23,...,27,5.6,62,90,0,113,2,135,45,17
7,21,13,36,513,24,Samantha Oshiro,30,21.4,75.0,27,...,1,2.4,44,46,1,100,1,58,12,10
8,22,14,27,457,22,Maya Campo,38,20.8,70.4,19,...,1,1.8,18,31,0,109,0,39,8,22
9,24,0,10,181,20,Ifeyinwa Iwekaogwu,8,9.1,40.0,4,...,0,0.8,23,13,0,20,0,16,3,4
