In [1]:
%matplotlib inline
import os
import requests
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from itertools import permutations
import numpy as np
from ipywidgets import interact
import ipywidgets as widgets
sns.set()

In [2]:
KT = "https://www.kicktipp.de"

In [3]:
class Scraper:
    def __init__(self):
        form_data = {'kennung': os.environ['EMAIL'], 'passwort': os.environ['PASSWORD']} 
        login_post_url = f"{KT}/info/profil/loginaction"
        self.session = requests.Session()
        self.session.post(login_post_url, data=form_data)
        
    def fetch_url(self, url):
        response = self.session.get(url)
        html = response.text
        self.soup = BeautifulSoup(html, "html5lib")

    def get_predictions(self):
        scraper.fetch_url(f"{KT}/twitter-kicktipp/tippuebersicht?&spieltagIndex=1")
        pairings = self._get_pairings()
        predictions = {p: {'result': self._get_match_result(i), 'predictions': self._get_match_predictions(i)} for i,p in enumerate(pairings)}
        if len(self.soup.select('.blaettern .down')):
            offset = 40
            scraper.fetch_url(f"{KT}/twitter-kicktipp/tippuebersicht?&spieltagIndex=1&offset={offset}")
            for i,p in enumerate(pairings):
                predictions[p]['predictions'] = pd.concat([predictions[p]['predictions'], self._get_match_result(i)])
            while len(self.soup.select('.blaettern .down')) == 2:
                print(f'getting offset {offset}')
                offset += 20
                scraper.fetch_url(f"{KT}/twitter-kicktipp/tippuebersicht?&spieltagIndex=1&offset={offset}")
                for i,p in enumerate(pairings):
                    predictions[p]['predictions'] = pd.concat([predictions[p]['predictions'], self._get_match_result(i)])
        return predictions
                
    def _get_pairings(self):
        home = self._get_text(self.soup.select('.headerEreignis.heim .ereignis'))
        away = self._get_text(self.soup.select('.headerEreignis.gast .ereignis'))
        return (f'{h}-{a}' for h,a in zip(home, away))

    def _get_match_result(self, match_index):
        home = list(self._get_text(self.soup.select(f'th.ereignis{match_index} .kicktipp-heim')))[0]
        away = list(self._get_text(self.soup.select(f'th.ereignis{match_index} .kicktipp-gast')))[0]
        return (int(home), int(away))
    
    def _get_match_predictions(self, match_index):
        preds = self._get_text(self.soup.select(f'td.nw.ereignis.ereignis{match_index}'))
        pred_pairs = list(map(lambda p: tuple(map(lambda x: int(x), p.split(':') if p else ())), preds))
        if len(self.soup.select('.blaettern .up')):
            pred_pairs = pred_pairs[20:]
        filtered = (p for p in pred_pairs if p)
        return pd.DataFrame(filtered, columns=["HOME", "AWAY"])
    
    @staticmethod
    def _get_text(soups):
        return map(lambda x: x.contents[0] if x.contents else '', soups)

In [4]:
scraper = Scraper()

In [5]:
predictions = scraper.get_predictions()

getting offset 40
getting offset 60
getting offset 80
getting offset 100
getting offset 120
getting offset 140
getting offset 160
getting offset 180


In [6]:
predictions

{'RUS-SAR': {'result': (5, 0), 'predictions':     HOME  AWAY
  0      1     0
  1      1     0
  2      2     1
  3      2     1
  4      1     1
  5      1     1
  6      3     1
  7      2     0
  8      1     0
  9      2     1
  10     2     1
  11     2     0
  12     2     1
  13     2     1
  14     2     0
  15     1     0
  16     2     1
  17     3     1
  18     2     0
  19     2     1
  20     1     0
  21     2     1
  22     2     0
  23     1     0
  24     2     0
  25     3     0
  26     1     0
  27     2     0
  28     2     1
  29     1     0
  30     2     1
  31     2     1
  32     2     0
  33     1     0
  34     2     0
  35     1     1
  36     2     1
  37     2     1
  38     3     1}, 'EGY-URU': {'result': (0, 1), 'predictions':     HOME  AWAY
  0      0     1
  1      1     2
  2      0     1
  3      0     3
  4      1     3
  5      1     2
  6      1     2
  7      1     3
  8      1     2
  9      0     3
  10     0     2
  11     0     1
  12     0

In [484]:
class Plotter:
    def __init__(self, predictions):
        self.predictions = predictions
        self.pairings = predictions.keys()
        
    def plot_predictions(self, pairing):
        pds = predictions[pairing]['predictions']
        res = predictions[pairing]['result']
        max_home = max(res[0], pds.HOME.max())
        max_away = max(res[1], pds.AWAY.max())

        pds = pds.groupby(['HOME', 'AWAY']).size()
        for i in range(max_home+1):
            for j in range(max_away+1):
                if (i,j) not in pds.index:
                    pds[(i,j)] = 0

        pds = pd.DataFrame({'COUNT' : pds}).reset_index()
        pds = pds.pivot('HOME', 'AWAY', 'COUNT')
        pds.sort_index(level=0, ascending=False, inplace=True)

        f, ax = plt.subplots(figsize=(9, 6))
        sns.heatmap(pds, annot=True, fmt="d", linewidths=.5, ax=ax, cmap="BuPu")
        rec = plt.Rectangle((res[1],max_home - res[0]),1,1, fill=False,
                            edgecolor="crimson", lw=2 )
        ax.add_artist(rec)

In [485]:
plotter = Plotter(predictions)

In [486]:
interact(plotter.plot_predictions, pairing=plotter.pairings);

interactive(children=(Dropdown(description='pairing', options=('RUS-SAR', 'EGY-URU', 'MAR-IRN', 'POR-SPA', 'FR…