In [None]:
from bs4 import BeautifulSoup
import requests
import urllib.request
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import re
import os

Install chrome driver:

```
wget https://chromedriver.storage.googleapis.com/2.41/chromedriver_linux64.zip
unzip chromedriver_linux64.zip
```

In [None]:
this_championship_id = '023T295LHO000024VS54898DVST4NHAM-G'

## Access championship page and accept cookies

In [None]:
this_championship_overview_url = 'https://www.bfv.de/wettbewerbe/meisterschaften/' + this_championship_id + '#tabelle'

In [None]:
this_championship_overview_url

In [None]:
# chrome session
driver = webdriver.Chrome(executable_path='./chromedriver')
driver.get(this_championship_overview_url)
driver.implicitly_wait(10)

In [None]:
# accept cookies
python_button = driver.find_element_by_id('uc-btn-accept-banner')
python_button.click()

## Get final championship table

In [None]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [None]:
all_teams_container_list = soup.find_all('tr', {'class': 'bfv-table-entry bfv-table-entry--data'})

In [None]:
all_team_results = None

for this_team_container in all_teams_container_list:
    
    # extract team name and url
    this_team_link = this_team_container.find('a').get('href')
    this_team_name = this_team_container.find('a').get_text().replace('\n', '').rstrip().lstrip()
    
    # extract values from table
    comp_names = []
    comp_values = []

    for this_component in this_team_container.find_all('td'):

        this_component_name = this_component.get('class')[1].split('--')[1]
        this_component_value = this_component.get_text().replace('\n', '').rstrip().lstrip()

        comp_names.append(this_component_name)
        comp_values.append(this_component_value)

    comp_names.append('link')
    comp_values.append(this_team_link)

    this_team_table_values = pd.Series(comp_values, index=comp_names, name=this_team_name)
    
    if all_team_results is None:
        all_team_results = this_team_table_values
    else:
        all_team_results = pd.concat([all_team_results, this_team_table_values], axis=1)

In [None]:
all_team_results.T

## Get fairness table / list of teams

In [None]:
# find fairness navigation link
all_navigation_links = driver.find_elements_by_class_name('tab-navigation__link')

fairness_link = None

for this_navigation_link in all_navigation_links:
    
    if this_navigation_link.get_attribute('title') == 'Fairness':
        fairness_link = this_navigation_link
        
this_navigation_link.click()

In [None]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [None]:
all_teams_container_list = soup.find_all('td', {'class': 'bfv-table-collapsed-entry__inner-table'})

In [None]:
list_of_metrics = ['matches', 'yellowcards', 'yellowredcards', 'redcards', 'timepenalties', 
                   'unsportsmanlike', 'gamefailures', 'gamecrashes', 'fscore', 'quote']

In [None]:
all_team_metrics = None

for this_team_container in all_teams_container_list:
    
    # extract team name
    this_team_name_raw = this_team_container.find('div', {'class': 'bfv-table-collapsed-entry__team-name'}).get_text()
    this_team_name = this_team_name_raw.replace('\n', '').rstrip().lstrip()
    
    # extract team metrics
    this_team_metrics_list = this_team_container.find_all('div', {'class': 'bfv-table-collapsed-entry__data-value'})
    metrics = [float(this_metrics.get_text().replace('\n', '').rstrip().lstrip()) for this_metrics in this_team_metrics_list]

    this_team_row = pd.Series(metrics, name=this_team_name, index=list_of_metrics)
    
    if all_team_metrics is None:
        all_team_metrics = this_team_row
    else:
        all_team_metrics = pd.concat([all_team_metrics, this_team_row], axis=1)

In [None]:
all_team_metrics.T

## Get match day matches

In [None]:
this_championship_id = '023T295LHO000024VS54898DVST4NHAM-G'

this_match_day = '4'
this_match_day_url = 'https://www.bfv.de/wettbewerbe/meisterschaften/' + this_championship_id + '#spieltag=' + str(this_match_day)

In [None]:
# chrome session
driver = webdriver.Chrome(executable_path='./chromedriver')
driver.get(this_match_day_url)
driver.implicitly_wait(10)

In [None]:
# accept cookies
python_button = driver.find_element_by_id('uc-btn-accept-banner')
python_button.click()

In [None]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [None]:
all_match_info_list = soup.body.find_all('div', {'class': 'bfv-matchdata-result__body'})
this_match_info = all_match_info_list[0]

In [None]:
all_game_links = soup.body.find_all('a', {'class': 'bfv-spieltag-eintrag__match-link'})

In [None]:
[this_game_link.get('href') for this_game_link in all_game_links]

In [None]:
this_match_info

## Single match

In [None]:
this_match_url = 'https://www.bfv.de/spiele/027IC4HE40000000VS5489B3VVQN7UE9'

In [None]:
# chrome session
driver = webdriver.Chrome(executable_path='./chromedriver')
driver.get(this_match_url)
driver.implicitly_wait(10)

In [None]:
# accept cookies
python_button = driver.find_element_by_id('uc-btn-accept-banner')
python_button.click()

In [None]:
# find from class tab-navigation__link the one with title "Aufstellung" and click it
all_tabs = driver.find_elements_by_class_name('tab-navigation__link')
aufstellung_tab = all_tabs[2]
aufstellung_tab.click()

In [None]:
# get all players

In [None]:
# get second team

In [None]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [None]:
soup.find("a", {'class': 'bfv-composition-entry__team-link'}).span.img.get('title')

In [None]:
driver.find_element_by_class_name('bfv-composition-entry')

In [None]:
teams_span_list = soup.find_all("span", {'class': 'bfv-tab-switch__tab-text'})
teams_span_list

In [None]:
team_names = [this_team_span.get_text() for this_team_span in teams_span_list]
team_names

In [None]:
team_contents = soup.find_all('div', {'class': 'bfv-composition__team js-bfv-tab-switch__content'})

In [None]:
all_people = None

for this_iter in [0, 1, 2, 3, 4, 5]:
    
    all_player_names = []
    all_player_urls = []
    
    if this_iter in [0, 1]:
        this_type = "starting"
    elif this_iter in [2, 3]:
        this_type = 'bench'
    elif this_iter in [4, 5]:
        this_type = 'trainer'
    
    this_team_contents = team_contents[this_iter]
    
    for this_team_entry in this_team_contents.find_all('a'):

        team_info_raw = this_team_entry.find_all('span', {'class': 'bfv-composition-entry__team-name'})

        if len(team_info_raw) > 0:
            if this_iter == 0:
                home_team_name = team_info_raw[0].get_text()
                home_team_id = team_info_raw[0].get('id')
            elif this_iter == 1:
                away_team_name = team_info_raw[0].get_text()
                away_team_id = team_info_raw[0].get('id')

        else:
            this_player_name = this_team_entry.get_text().replace('\n', '').rstrip().lstrip()
            this_player_url = this_team_entry.get('href')

            all_player_names.append(this_player_name)
            all_player_urls.append(this_player_url)

    these_people = pd.concat([pd.Series(all_player_names, name='player_name'), pd.Series(all_player_urls, name='player_url')], axis=1)
    these_people['type'] = this_type
    
    if this_iter in [0, 2, 4]:
        these_people['team'] = home_team_name
        these_people['team_id'] = home_team_id
        
    elif this_iter in [1, 3, 5]:
        these_people['team'] = away_team_name
        these_people['team_id'] = away_team_id
    
    if all_people is None:
        all_people = these_people
    else:
        all_people = pd.concat([all_people, these_people], axis=0)


In [None]:
all_people

In [None]:
driver.quit()

Requires selenium

## Look-up names

In [None]:
'https://www.vorname.com/name,Predrag.html'

## DEV