In [None]:
import os

from collections import defaultdict
from itertools import product

import pandas as pd
import requests

In [None]:
base = os.environ['BUG_FREE_EUREKA_BASE']
roster_path = os.path.join(base, 'grading', 'roster.tsv')

In [None]:
assignment_1 = ['DNA', 'RNA', 'GC', 'HAMM', 'REVC']
assignment_2 = ['INI', 'DBPR', 'FIB', 'IPRB']
assignment_3 = ['FRMT', 'PROT', 'SUBS', 'CONS', 'FIBD', 'GRPH', 'IEV', 'MPRT', 'MRNA', 'PERM', 'PRTM']
assignment_all = assignment_1 + assignment_2 + assignment_3

In [None]:
roster = pd.read_csv(roster_path, index_col=0, sep='\t')
roster.head()

# Checking GitHub

In [None]:
gh_check = requests.get('https://api.github.com/rate_limit')
gh_check_json = gh_check.json()

if gh_check_json['resources']['core']['remaining'] < len(roster.index):
    raise Exception('Not enough GitHub API calls left. Wait {}'.format(gh_check_json['resources']['core']['reset']))

In [None]:
gh_fmt = 'https://api.github.com/repos/{}/BioDb2016/contents/'

gh_result_dict = defaultdict(list)

for name, github_user, rosalind_user in roster.itertuples():
    print('Checking GitHub: {} ({})'.format(name, github_user))
    
    response = requests.get(gh_fmt.format(github_user))
    
    for d in response.json():
        if 'type' not in d:
            raise Exception('Missing data.')
        if 'file' == d['type']:
            gh_result_dict[name].append(d['name'])

In [None]:
gh_results = defaultdict(dict)

for name, files in gh_result_dict.items():
    filenames = [file.split('.')[0] for file in files]
    for assignment in assignment_all:
        gh_results[name][assignment] = 1 if assignment in filenames else 0
    
gh_result_df = pd.DataFrame.from_dict(gh_results, orient='index')

# Checking Rosalind

In [None]:
rosalind_result_dict = defaultdict(dict)

for name, assignment in product(roster.index, assignment_all):
    rosalind_result_dict[name][assignment] = 0

In [None]:
for name, github, rosalind in roster.itertuples():
    print('Checking Rosalind: {} ({})'.format(name, rosalind))

    res = requests.get(rosalind)

    for line in res.iter_lines():
        line = line.decode('utf-8').strip()
        if 'badge-success' not in line:
            continue
        success = line[1 + line.find('>'): line.find('<', 2)]
        rosalind_result_dict[name][success] = 1

In [None]:
rosalind_result_df = pd.DataFrame(rosalind_result_dict).T
rosalind_result_df[rosalind_result_df.isnull()] = 0 # sanitize from missing assignments
rosalind_result_df = rosalind_result_df.astype(int)

# Summary

In [None]:
print('Assignment 1 has {} tasks'.format(len(assignment_1)))

pd.DataFrame({
    'GitHub': gh_result_df[assignment_1].sum(axis=1),
    'Rosalind': rosalind_result_df[assignment_1].sum(axis=1)
})

In [None]:
print('Assignment 2 has {} tasks'.format(len(assignment_2)))

pd.DataFrame({
    'GitHub': gh_result_df[assignment_2].sum(axis=1),
    'Rosalind': rosalind_result_df[assignment_2].sum(axis=1)
})

In [None]:
print('Assignment 3 has {} tasks'.format(len(assignment_3)))

pd.DataFrame({
    'GitHub': gh_result_df[assignment_3].sum(axis=1),
    'Rosalind': rosalind_result_df[assignment_3].sum(axis=1)
})