In [1]:
%matplotlib inline
import os
import requests
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from itertools import permutations
import numpy as np
from ipywidgets import interact
import ipywidgets as widgets
sns.set()

In [2]:
KT = "https://www.kicktipp.de"

In [34]:
class Scraper:
    def __init__(self, spieltag=1, group='twitter-kicktipp'):
        form_data = {'kennung': os.environ['EMAIL'], 'passwort': os.environ['PASSWORD']} 
        login_post_url = f"{KT}/info/profil/loginaction"
        self.session = requests.Session()
        self.session.post(login_post_url, data=form_data)
        self.spieltag = spieltag
        self.group = group
        
    def fetch_url(self, url):
        response = self.session.get(url)
        html = response.text
        self.soup = BeautifulSoup(html, "html5lib")

    def get_predictions(self):
        scraper.fetch_url(f"{KT}/{self.group}/tippuebersicht?&spieltagIndex={self.spieltag}")
        pairings = list(self._get_pairings())
        predictions = {p: {'result': self._get_match_result(i), 'predictions': self._get_match_predictions(i)} for i,p in enumerate(pairings)}
        if len(self.soup.select('.blaettern .down')):
            offset = 40
            scraper.fetch_url(f"{KT}/{self.group}/tippuebersicht?&spieltagIndex={self.spieltag}&offset={offset}")
            for i,p in enumerate(pairings):
                predictions[p]['predictions'] = pd.concat([predictions[p]['predictions'], self._get_match_predictions(i)])
            while len(self.soup.select('.blaettern .down')) == 2:
                offset += 20
                self.fetch_url(f"{KT}/{self.group}/tippuebersicht?&spieltagIndex={self.spieltag}&offset={offset}")
                for i,p in enumerate(pairings):
                    new_pred = self._get_match_predictions(i)
                    predictions[p]['predictions'] = pd.concat([predictions[p]['predictions'], new_pred])
        return predictions
                
    def _get_pairings(self):
        home = self._get_text(self.soup.select('.headerEreignis.heim .ereignis'))
        away = self._get_text(self.soup.select('.headerEreignis.gast .ereignis'))
        return (f'{h}-{a}' for h,a in zip(home, away))

    def _get_match_result(self, match_index):
        home = list(self._get_text(self.soup.select(f'th.ereignis{match_index} .kicktipp-heim')))[0]
        away = list(self._get_text(self.soup.select(f'th.ereignis{match_index} .kicktipp-gast')))[0]
        return (int(home), int(away)) if home.isdigit() else (0,0)
    
    def _get_match_predictions(self, match_index):
        preds = self._get_text(self.soup.select(f'td.nw.ereignis.ereignis{match_index}'))
        pred_pairs = list(map(lambda p: tuple(map(lambda x: int(x) if x.isdigit() else 0, p.split(':') if p else ())), preds))
        if len(self.soup.select('.blaettern .up')):
            pred_pairs = pred_pairs[20:]
        filtered = (p for p in pred_pairs if p)
        return pd.DataFrame(filtered, columns=["HOME", "AWAY"])
    
    @staticmethod
    def _get_text(soups):
        return map(lambda x: x.contents[0] if x.contents else '', soups)

In [40]:
scraper = Scraper(spieltag=4)#, group='grobefamilie')

In [41]:
predictions = scraper.get_predictions()

In [37]:
class Plotter:
    def __init__(self, predictions):
        self.predictions = predictions
        self.pairings = predictions.keys()
        
    def plot_predictions(self, pairing):
        pds = predictions[pairing]['predictions']
        pds = pds[pds['HOME'] <= 10]
        pds = pds[pds['AWAY'] <= 10]
        res = predictions[pairing]['result']
#         max_home = max(res[0], pds.HOME.max())
#         max_away = max(res[1], pds.AWAY.max())
        max_home = 8
        max_away = 8
        
        pds = pds.groupby(['HOME', 'AWAY']).size()
        for i in range(max_home+1):
            for j in range(max_away+1):
                if (i,j) not in pds.index:
                    pds[(i,j)] = 0

        pds = pd.DataFrame({'COUNT' : pds}).reset_index()
        pds = pds.pivot('HOME', 'AWAY', 'COUNT')
        pds.sort_index(level=0, ascending=False, inplace=True)

        f, ax = plt.subplots(figsize=(9, 6))
        sns.heatmap(pds, annot=True, fmt="d", linewidths=.5, ax=ax, cmap="BuPu", cbar=False)
        rec = plt.Rectangle((res[1],max_home - res[0]),1,1, fill=False,
                            edgecolor="crimson", lw=2 )
        ax.add_artist(rec)

In [42]:
plotter = Plotter(predictions)

In [43]:
interact(plotter.plot_predictions, pairing=plotter.pairings);

interactive(children=(Dropdown(description='pairing', options=('DEN-AUS', 'FRA-PER', 'ARG-KRO', 'BRA-CRC', 'NI…