In [1]:
import time

import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Teams
Brazil = "https://www.proballers.com/basketball/team/1651/brazil"
France = 'https://www.proballers.com/basketball/team/196/france'
Germany = 'https://www.proballers.com/basketball/team/198/germany'
Japan = 'https://www.proballers.com/basketball/team/1652/japan'

In [3]:
# Setup Chrome WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

In [4]:
# First Team
driver.get(Germany)

In [5]:
# Navigate throug page
# Schedule -> Preparation
driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div/section[2]/div/ul/li[2]/a').click()
driver.find_element(By.XPATH, '//*[@id="league-Preparation-list"]').click()

In [6]:
# Wait
time.sleep(5)

In [6]:
# Game 1
driver.find_element(By.XPATH, '//*[@id="league-370"]/div[1]/div/table/tbody/tr[3]/td[4]/a').click()

In [7]:
# Create DataFrame for first team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df1 = pd.DataFrame(rows, columns=headers)

In [8]:
# Drop the totals
df1.drop(df1.tail(1).index,
        inplace = True)

# Split shots made from attempted
df1[['2PM', '2PA']] = df1['2M-2A'].str.split('-', expand=True)
df1[['3PM', '3PA']] = df1['3M-3A'].str.split('-', expand=True)
df1[['FTM', 'FTA']] = df1['1M-1A'].str.split('-', expand=True)
df1.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [9]:
# Change types into integers
for col in df1.columns:
    if col != 'PLAYER':
        df1[col] = df1[col].astype(int)

In [10]:
#Create Fild Goal colums
df1['FGM'] = df1['2PM'] + df1['3PM']
df1['FGA'] = df1['2PA'] + df1['3PA']

# Create percentage columns
df1['FG%'] = df1['FGM'].div(df1['FGA'], fill_value=0).fillna(0).round(2) * 100
df1['2P%'] = df1['2PM'].div(df1['2PA'], fill_value=0).fillna(0).round(2) * 100
df1['3P%'] = df1['3PM'].div(df1['3PA'], fill_value=0).fillna(0).round(2) * 100
df1['FT%'] = df1['FTM'].div(df1['FTA'], fill_value=0).fillna(0).round(2) * 100

In [11]:
# Drop duplicated columns
df1 = df1.loc[:, ~df1.columns.duplicated()]

In [12]:
# Add the Team Name and the Game ID
df1['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/a/h3').accessible_name
df1['GAME'] = 'GER3'

In [13]:
# Reorder the Dataframe
df1 = df1[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [14]:
# Create DataFrame for second team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df2 = pd.DataFrame(rows, columns=headers)

In [15]:
# Drop the totals
df2.drop(df2.tail(1).index,
        inplace = True)

# Split shots made from attempted
df2[['2PM', '2PA']] = df2['2M-2A'].str.split('-', expand=True)
df2[['3PM', '3PA']] = df2['3M-3A'].str.split('-', expand=True)
df2[['FTM', 'FTA']] = df2['1M-1A'].str.split('-', expand=True)
df2.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [16]:
# Change types into integers
for col in df2.columns:
    if col != 'PLAYER':
        df2[col] = df2[col].astype(int)

In [17]:
#Create Fild Goal colums
df2['FGM'] = df2['2PM'] + df2['3PM']
df2['FGA'] = df2['2PA'] + df2['3PA']

# Create percentage columns
df2['FG%'] = df2['FGM'].div(df2['FGA'], fill_value=0).fillna(0).round(2) * 100
df2['2P%'] = df2['2PM'].div(df2['2PA'], fill_value=0).fillna(0).round(2) * 100
df2['3P%'] = df2['3PM'].div(df2['3PA'], fill_value=0).fillna(0).round(2) * 100
df2['FT%'] = df2['FTM'].div(df2['FTA'], fill_value=0).fillna(0).round(2) * 100

In [18]:
# Drop duplicated columns
df2 = df2.loc[:, ~df2.columns.duplicated()]

In [19]:
# Add the Team Name and the Game ID
df2['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/a/h3').accessible_name
df2['GAME'] = 'GER3'

In [20]:
# Reorder the Dataframe
df2 = df2[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [21]:
# Return page
driver.back()

In [22]:
# Navigate throug page
# Schedule -> Preparation
driver.find_element(By.XPATH, '//*[@id="league-Preparation-list"]').click()

In [25]:
# Wait
time.sleep(5)

In [23]:
# Game 2
driver.find_element(By.XPATH, '//*[@id="league-370"]/div[1]/div/table/tbody/tr[4]/td[4]/a').click()

In [24]:
# Create DataFrame for first team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df3 = pd.DataFrame(rows, columns=headers)

In [25]:
# Drop the totals
df3.drop(df3.tail(1).index,
        inplace = True)

# Split shots made from attempted
df3[['2PM', '2PA']] = df3['2M-2A'].str.split('-', expand=True)
df3[['3PM', '3PA']] = df3['3M-3A'].str.split('-', expand=True)
df3[['FTM', 'FTA']] = df3['1M-1A'].str.split('-', expand=True)
df3.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [26]:
# Change types into integers
for col in df3.columns:
    if col != 'PLAYER':
        df3[col] = df3[col].astype(int)

In [27]:
#Create Fild Goal colums
df3['FGM'] = df3['2PM'] + df3['3PM']
df3['FGA'] = df3['2PA'] + df3['3PA']

# Create percentage columns
df3['FG%'] = df3['FGM'].div(df3['FGA'], fill_value=0).fillna(0).round(2) * 100
df3['2P%'] = df3['2PM'].div(df3['2PA'], fill_value=0).fillna(0).round(2) * 100
df3['3P%'] = df3['3PM'].div(df3['3PA'], fill_value=0).fillna(0).round(2) * 100
df3['FT%'] = df3['FTM'].div(df3['FTA'], fill_value=0).fillna(0).round(2) * 100

In [28]:
# Drop duplicated columns
df3 = df3.loc[:, ~df3.columns.duplicated()]

In [29]:
# Add the Team Name and the Game ID
df3['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/a/h3').accessible_name
df3['GAME'] = 'GER1'

In [30]:
# Reorder the Dataframe
df3 = df3[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [31]:
# Create DataFrame for second team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df4 = pd.DataFrame(rows, columns=headers)

In [32]:
# Drop the totals
df4.drop(df4.tail(1).index,
        inplace = True)

# Split shots made from attempted
df4[['2PM', '2PA']] = df4['2M-2A'].str.split('-', expand=True)
df4[['3PM', '3PA']] = df4['3M-3A'].str.split('-', expand=True)
df4[['FTM', 'FTA']] = df4['1M-1A'].str.split('-', expand=True)
df4.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [33]:
# Change types into integers
for col in df4.columns:
    if col != 'PLAYER':
        df4[col] = df4[col].astype(int)

In [34]:
#Create Fild Goal colums
df4['FGM'] = df4['2PM'] + df4['3PM']
df4['FGA'] = df4['2PA'] + df4['3PA']

# Create percentage columns
df4['FG%'] = df4['FGM'].div(df4['FGA'], fill_value=0).fillna(0).round(2) * 100
df4['2P%'] = df4['2PM'].div(df4['2PA'], fill_value=0).fillna(0).round(2) * 100
df4['3P%'] = df4['3PM'].div(df4['3PA'], fill_value=0).fillna(0).round(2) * 100
df4['FT%'] = df4['FTM'].div(df4['FTA'], fill_value=0).fillna(0).round(2) * 100

In [35]:
# Drop duplicated columns
df4 = df4.loc[:, ~df4.columns.duplicated()]

In [36]:
# Add the Team Name and the Game ID
df4['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/a/h3').accessible_name
df4['GAME'] = 'GER1'

In [37]:
# Reorder the Dataframe
df4 = df4[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [38]:
# Return page
driver.back()

In [40]:
# Navigate throug page
# Schedule -> Preparation
driver.find_element(By.XPATH, '//*[@id="league-Preparation-list"]').click()

In [43]:
# Wait
time.sleep(5)

In [41]:
# Game 3
driver.find_element(By.XPATH, '//*[@id="league-370"]/div[1]/div/table/tbody/tr[5]/td[4]/a').click()

In [42]:
# Create DataFrame for first team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df5 = pd.DataFrame(rows, columns=headers)

In [43]:
# Drop the totals
df5.drop(df5.tail(1).index,
        inplace = True)

# Split shots made from attempted
df5[['2PM', '2PA']] = df5['2M-2A'].str.split('-', expand=True)
df5[['3PM', '3PA']] = df5['3M-3A'].str.split('-', expand=True)
df5[['FTM', 'FTA']] = df5['1M-1A'].str.split('-', expand=True)
df5.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [44]:
# Change types into integers
for col in df5.columns:
    if col != 'PLAYER':
        df5[col] = df5[col].astype(int)

In [45]:
#Create Fild Goal colums
df5['FGM'] = df5['2PM'] + df5['3PM']
df5['FGA'] = df5['2PA'] + df5['3PA']

# Create percentage columns
df5['FG%'] = df5['FGM'].div(df5['FGA'], fill_value=0).fillna(0).round(2) * 100
df5['2P%'] = df5['2PM'].div(df5['2PA'], fill_value=0).fillna(0).round(2) * 100
df5['3P%'] = df5['3PM'].div(df5['3PA'], fill_value=0).fillna(0).round(2) * 100
df5['FT%'] = df5['FTM'].div(df5['FTA'], fill_value=0).fillna(0).round(2) * 100

In [46]:
# Drop duplicated columns
df5 = df5.loc[:, ~df5.columns.duplicated()]

In [47]:
# Add the Team Name and the Game ID
df5['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[1]/div/div/a/h3').accessible_name
df5['GAME'] = 'USA1'

In [48]:
# Reorder the Dataframe
df5 = df5[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [49]:
# Create DataFrame for second team
table = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/div/div')
headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
rows = [[cell.text for cell in row.find_elements(By.TAG_NAME, 'td')] 
            for row in table.find_elements(By.TAG_NAME, 'tr') if row.find_elements(By.TAG_NAME, 'td')]
df6 = pd.DataFrame(rows, columns=headers)

In [50]:
# Drop the totals
df6.drop(df6.tail(1).index,
        inplace = True)

# Split shots made from attempted
df6[['2PM', '2PA']] = df6['2M-2A'].str.split('-', expand=True)
df6[['3PM', '3PA']] = df6['3M-3A'].str.split('-', expand=True)
df6[['FTM', 'FTA']] = df6['1M-1A'].str.split('-', expand=True)
df6.drop(columns=['2M-2A', '3M-3A', '1M-1A', 'FG%', '1%'], inplace=True)

In [51]:
# Change types into integers
for col in df6.columns:
    if col != 'PLAYER':
        df6[col] = df6[col].astype(int)

In [52]:
#Create Fild Goal colums
df6['FGM'] = df6['2PM'] + df6['3PM']
df6['FGA'] = df6['2PA'] + df6['3PA']

# Create percentage columns
df6['FG%'] = df6['FGM'].div(df6['FGA'], fill_value=0).fillna(0).round(2) * 100
df6['2P%'] = df6['2PM'].div(df6['2PA'], fill_value=0).fillna(0).round(2) * 100
df6['3P%'] = df6['3PM'].div(df6['3PA'], fill_value=0).fillna(0).round(2) * 100
df6['FT%'] = df6['FTM'].div(df6['FTA'], fill_value=0).fillna(0).round(2) * 100

In [53]:
# Drop duplicated columns
df6 = df6.loc[:, ~df6.columns.duplicated()]

In [54]:
# Add the Team Name and the Game ID
df6['TEAM'] = driver.find_element(By.XPATH, '/html/body/main/div/div[2]/div[3]/div/div[2]/div/div/a/h3').accessible_name
df6['GAME'] = 'USA1'

In [55]:
# Reorder the Dataframe
df6 = df6[['GAME' ,'TEAM', 'PLAYER', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FO', 'FGM', 'FGA', 'FG%', '2PM', '2PA', '2P%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OR', 'DR', '+/-', 'EFF']]

In [56]:
# Import dataframe
germany_boxscores = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
germany_boxscores.to_csv('germany_boxscores.csv', index=False)

In [57]:
# Quit WebDriver
driver.quit()