In [46]:
import json
import pathlib

from IPython import display
import matplotlib.pyplot as plt
import pandas as pd
import yaml

In [4]:
with pathlib.Path('../config.yml').open('r') as in_stream:
    config = yaml.load(in_stream, Loader=yaml.CLoader)

## helpers

In [38]:
class Competition:
    def __init__(self, root_path, base_heading_level=3):
        self.root_path = pathlib.Path(root_path).expanduser()
        self.files = {'description': self.root_path/'raw/description.json',
                      'leaderboard': self.root_path/'raw/publicleaderboarddata.zip',
                     }
    
    def description(self):
        with self.files['description'].open('r') as in_stream:
            return json.load(in_stream)
    
    def leaderboard(self):
        return pd.read_csv(self.files['leaderboard'])
    
    def _show_heading(self, content, rel_level=1):
        prefix = '#' * (self.base_heading_level - 1 + rel_level)
        display.display(display.Markdown(f'{prefix} {content}'))
    
    def _ipython_display_(self):
        description = self.description()
        leaderboard = self.leaderboard()
        self._show_heading(''.format(**description))
        
    
    
class CompetitionFinder:
    def __init__(self, root_path):
        self.root_path = pathlib.Path(root_path).expanduser()
    
    def keys(self):
        candidates = (c.name for c in self.root_path.iterdir())
        yield from (k
                    for k in candidates
                    if all(f.exists() for f in self[k].files.values()))
    
    def __getitem__(self, k):
        return Competition(self.root_path/k)
        

In [39]:
comps = CompetitionFinder(pathlib.Path(config['dat_root'])/'comps')
list(comps.keys())

['mlsp-2013-birds',
 'covid19-local-us-ca-forecasting-week-1',
 'predict-who-is-more-influential-in-a-social-network',
 'halite',
 'google-quest-challenge',
 'kobe-bryant-shot-selection',
 'GiveMeSomeCredit',
 'hpa-single-cell-image-classification',
 'text-normalization-challenge-english-language',
 'how-much-did-it-rain-ii',
 'denoising-dirty-documents',
 'tensorflow2-question-answering',
 'icdar2013-gender-prediction-from-handwriting',
 'springleaf-marketing-response',
 'sberbank-russian-housing-market',
 'imaterialist-challenge-fashion-2018',
 'R',
 'Raising-Money-to-Fund-an-Organizational-Mission',
 'expedia-hotel-recommendations',
 'dont-call-me-turkey',
 'planet-understanding-the-amazon-from-space',
 'yelp-restaurant-photo-classification',
 'second-annual-data-science-bowl',
 'ranzcr-clip-catheter-line-classification',
 'nips-2017-defense-against-adversarial-attack',
 'facebook-recruiting-iv-human-or-bot',
 'emvic',
 'airbnb-recruiting-new-user-bookings',
 'rsna-intracranial-hemo

In [40]:
comp = comps['R']

In [44]:
comp.description()

{'ref': 'R',
 'tags': [],
 'description': 'The aim of this competition is to develop a recommendation engine for R libraries (or packages). (R is opensource statistics software.)',
 'id': 2454,
 'title': 'R Package Recommendation Engine',
 'url': 'https://www.kaggle.com/c/R',
 'deadline': '2011-02-08T09:00:00',
 'category': 'Featured',
 'reward': '$150',
 'organizationName': None,
 'organizationRef': None,
 'kernelCount': 0,
 'teamCount': 57,
 'userHasEntered': False,
 'userRank': None,
 'mergerDeadline': None,
 'newEntrantDeadline': None,
 'enabledDate': '2010-10-10T04:00:28',
 'maxDailySubmissions': 2,
 'maxTeamSize': None,
 'evaluationMetric': 'Area Under Receiver Operating Characteristic Curve',
 'awardsPoints': True,
 'isKernelsSubmissionsOnly': False,
 'submissionsDisabled': True}

In [42]:
comp.leaderboard()

Unnamed: 0,TeamId,TeamName,SubmissionDate,Score
0,1362,One Old Dog,2011-02-08 08:48:41,0.9879
1,2973,Record Me Men,2011-02-08 08:24:03,0.98639
2,1418,Machine,2011-02-07 16:31:39,0.98553
3,3247,libGUNDAM,2011-02-08 08:42:28,0.98492
4,2868,Jeremy Howard,2011-01-21 04:48:50,0.98327
5,1291,ivank,2010-10-20 18:43:40,0.98232
6,1558,JAV,2010-12-20 02:59:44,0.98161
7,1282,UC Irvine,2010-10-23 07:51:55,0.97943
8,1448,teamD,2010-11-17 02:37:31,0.97868
9,2935,n0name,2011-01-22 08:06:12,0.97708


In [37]:
f = comp.files['leaderboard']

import zipfile

with zipfile.ZipFile(f) as archive:
    print(dir(archive))
    print(archive.namelist())




['NameToInfo', '_RealGetContents', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_allowZip64', '_comment', '_didModify', '_extract_member', '_filePassed', '_fileRefCnt', '_fpclose', '_lock', '_open_to_write', '_sanitize_windows_name', '_seekable', '_strict_timestamps', '_windows_illegal_name_trans_table', '_write_end_record', '_writecheck', '_writing', 'close', 'comment', 'compression', 'compresslevel', 'debug', 'extract', 'extractall', 'filelist', 'filename', 'fp', 'getinfo', 'infolist', 'mode', 'namelist', 'open', 'printdir', 'pwd', 'read', 'setpassword', 'start_dir', 'testzip', 'write', 'writestr']
['R-publicleaderboard.csv']


## comp

In [7]:
comp_name = 'recursion-cellular-image-classification'

In [12]:
paths = {'dat_root': pathlib.Path(config['dat_root']).expanduser()/'comps'/comp_name}
paths['raw'] = paths['dat_root']/'raw'

## Description

In [13]:
with paths['raw'].joinpath('description.json').open('r') as in_stream:
    description = json.load(in_stream)

In [14]:
description

{'ref': 'recursion-cellular-image-classification',
 'tags': ['research', 'biology', 'image data', 'classification'],
 'description': 'CellSignal: Disentangling biological signal from experimental noise in cellular images',
 'id': 14420,
 'title': 'Recursion Cellular Image Classification',
 'url': 'https://www.kaggle.com/c/recursion-cellular-image-classification',
 'deadline': '2019-09-26T23:59:00',
 'category': 'Research',
 'reward': '$13,000',
 'organizationName': 'Recursion Pharmaceuticals',
 'organizationRef': 'recursionpharma',
 'kernelCount': 0,
 'teamCount': 865,
 'userHasEntered': False,
 'userRank': None,
 'mergerDeadline': '2019-07-22T23:59:00',
 'newEntrantDeadline': '2019-09-19T23:59:00',
 'enabledDate': '2019-06-27T19:46:50',
 'maxDailySubmissions': 5,
 'maxTeamSize': 5,
 'evaluationMetric': 'Categorization Accuracy',
 'awardsPoints': True,
 'isKernelsSubmissionsOnly': False,
 'submissionsDisabled': False}

## leaderboard