## 📈 This notebook can show various plots about submission's games: score change, win rate change.

It uses [Selenium](https://selenium-python.readthedocs.io/) and [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
to download and parse submission page (e.g. https://www.kaggle.com/c/lux-ai-2021/leaderboard?dialog=episodes-submission-23032370). Meta Kaggle dataset is not used because of extremely slow data loading.

In [None]:
%%capture
!pip install selenium
!apt-get update 
!apt install chromium-chromedriver -y

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

In [None]:
def getSoup(sub_id):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    browser = webdriver.Chrome(options=options)

    URL = 'https://www.kaggle.com/c/lux-ai-2021/submissions?dialog=episodes-submission-'

    print('Loading submission page...')
    browser.get(URL + str(sub_id))
    time.sleep(2)

    print('Scrolling results...')
    scrolling_element = browser.find_element(
        webdriver.common.by.By.XPATH,
        "//div[@class='mdc-dialog__surface']")
    for k in tqdm(range(100)):
        browser.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrolling_element)
    time.sleep(1)

    print('Parsing page...')
    html_source = browser.page_source
    soup = BeautifulSoup(html_source, 'html.parser')
    print('Done!')
    
    return soup

def getStats(soup):
    outcomes = []
    scores = []
    scores_delta = []
    
    text_select = []
    team_names = []
    
    for span in soup.select('span[class*="sc-"]'):
        text = span.get_text()
        if 'vs' in text and '[' in text and 'ago' not in text:
            text_select.append(text)
            for part in text.split(' vs '):
                part_split = part.split(' ')
                team_name = ' '.join(part_split[1:-2])
                team_names.append(team_name)
                
    team_name = max(set(team_names), key = team_names.count)
    
    for text in text_select:
        for part in text.split(' vs '):
            if team_name in part:
                result = part.split(' ')
                delta = result[-1].strip('()+')
                if delta != 'Validation':
                    scores.append(int(result[-2]))
                    scores_delta.append(int(delta))
                    outcome = result[0].strip('[]')
                    if outcome == 'Win':
                        outcomes.append(1)
                    elif outcome == 'Loss':
                        outcomes.append(0)
                    else: # Tie
                        outcomes.append(0.5)
    
    scores.insert(0, scores[0] + scores_delta[0])
    scores = np.array(scores[::-1])
    outcomes = np.array(outcomes[::-1])
    scores_delta = np.array(scores_delta[::-1])
        
    return scores, outcomes, scores_delta

# Setting up
`SUB_ID` is the number in the end of link in form https://www.kaggle.com/c/lux-ai-2021/leaderboard?dialog=episodes-submission-23032370. It also can be seen from submission's page:

![SUB_ID](https://i.imgur.com/vniyMkL.png)

In [None]:
SUB_ID = 23032370

soup = getSoup(SUB_ID)
scores, outcomes, scores_delta = getStats(soup)

## Score growth plot

In [None]:
plt.figure(figsize=(15, 8))
plt.plot(scores, label='scores')
plt.hlines(np.mean(scores), 0, len(scores), color='tab:orange', label=f'mean score {np.mean(scores):.2f}')
plt.hlines(np.median(scores), 0, len(scores), color='tab:olive', label=f'median score {np.median(scores):.0f}')

plt.scatter(np.argmax(scores), np.max(scores), color='tab:green', label=f'top score {np.max(scores)}')
plt.legend()
plt.show()

## Score changes (delta) plot

In [None]:
plt.figure(figsize=(15, 8))
plt.plot(scores_delta)

plt.scatter(np.argwhere(scores_delta > 0), scores_delta[scores_delta > 0], c='tab:green', label='Positive')
plt.scatter(np.argwhere(scores_delta < 0), scores_delta[scores_delta < 0], c='tab:red', label='Negative')

plt.hlines(0, 0, len(scores_delta), color='black', linestyles='--')
plt.title('score delta')
plt.legend()
plt.show()

## Win/Loss/Tie plot by match

In [None]:
plt.figure(figsize=(15, 8))
plt.plot(outcomes, c='lightgray', linestyle='--')

plt.scatter(np.argwhere(outcomes == 1), outcomes[outcomes == 1], c='tab:green', label='Win')
plt.scatter(np.argwhere(outcomes == 0), outcomes[outcomes == 0], c='tab:red', label='Loss')
plt.scatter(np.argwhere(outcomes == 0.5), outcomes[outcomes == 0.5], c='tab:blue', label='Tie')

plt.hlines(np.mean(outcomes), 0, len(outcomes), color='tab:orange', label='win rate')
plt.legend()
plt.title(f'win rate = {np.mean(outcomes):.3f}')
plt.show()

## Win rate change by match

In [None]:
plt.figure(figsize=(15, 8))

plt.plot(range(1, len(outcomes)+1), [sum(outcomes[:n])/n for n in range(1, len(outcomes)+1)], label='win rate')
plt.hlines(np.mean(outcomes), 1, len(outcomes), color='tab:orange', label='current win rate')
plt.title('win rate change')
plt.legend()
plt.show()