In [1]:
from datasets import load_dataset
import polars as pl
from IPython.display import display, Markdown, Latex, clear_output
from collections import defaultdict
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from rich.syntax import Syntax
from rich.console import Console
from enum import Enum
from functools import partial
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
sys.path.append('..')
from evaluation.code_execution import evaluate_sample_codecontests, evaluate_python_code_exec

In [3]:
cc_dataset = load_dataset("deepmind/code_contests")["train"]

In [4]:
cc_dataset

Dataset({
    features: ['name', 'description', 'public_tests', 'private_tests', 'generated_tests', 'source', 'difficulty', 'solutions', 'incorrect_solutions', 'cf_contest_id', 'cf_index', 'cf_points', 'cf_rating', 'cf_tags', 'is_description_translated', 'untranslated_description', 'time_limit', 'memory_limit_bytes', 'input_file', 'output_file'],
    num_rows: 13328
})

In [21]:
# console = Console()

first_n_samples = {}


class Language(Enum):
    UNKNOWN = 0
    PYTHON = 1
    CPP = 2
    PYTHON3 = 3
    JAVA = 4

class ProblemSource(Enum):
    UNKNOWN = 0
    CODECHEF = 1
    CODEFORCES = 2
    HACKEREARTH = 3
    CODEJAM = 4
    ATCODER = 5
    AIZU = 6

def print_code_snippet(snippet: str, console: Console):
    formatted_snippet = Syntax(
        snippet,
        "python",
        theme="monokai",
        line_numbers=True,
    )
    console.print(formatted_snippet)


sample_lang_counts = defaultdict(int)
for i, sample in enumerate(tqdm(cc_dataset)):
    for k, v in sample.items():  # type: ignore
        if k not in first_n_samples:
            first_n_samples[k] = []
        first_n_samples[k].append(v)
    description = sample["description"]
    test_inputs = sample["public_tests"]["input"] + sample["private_tests"]["input"]
    test_outputs = sample["public_tests"]["output"] + sample["private_tests"]["output"]
    problem_source = ProblemSource(sample["source"])
    has_passing_test = False
    # console.clear()
    # console.print(description)
    for j, (lang_txt, solution_code) in enumerate(zip(
        sample["solutions"]["language"], sample["solutions"]["solution"]
    )):
        language = Language(lang_txt)
        sample_lang_counts[language] += 1
        if language != Language.PYTHON3:
            continue

        solution_code = solution_code.replace("sys.exit", "exit")
        clear_output(wait=True)
        display(f"problem {i}, solution {j}")
        display(Markdown(f"```python\n{solution_code}\n```"))
        # print_code_snippet(solution_code, console)

        for inputs, outputs in zip(test_inputs, test_outputs):
            # each time input is called, it will return the next input value
            out = evaluate_python_code_exec(solution_code, inputs)


'problem 23, solution 340'

```python
n,k = map(int, input().split())
oo = list()
oa = list()
ob = list()
for i in range(n):
	t,a,b = map(int, input().split())
	if a == 1 and b == 1:
		oo.append(t)
	elif a == 0 and b == 1:
		ob.append(t)
	elif a == 1 and b == 0:
		oa.append(t)

oo = sorted(oo)
oa = sorted(oa)
ob = sorted(ob)

oo_p = 0
oa_p = 0
ob_p = 0

ca = 0
cb = 0
ans = 0
MAX = 23942034809238409823048
if max(0, max(k-len(oa), k-len(ob))) > len(oo):
	print("-1")
	exit(0)

def get_first_elem_from_list(l, pos):
	if pos < len(l):
		return l[pos]
	else:
		return MAX

def remove_first_elem_from_list(l, pos):
	if len(l)>pos:
		pos += 1
	return pos
while ca < k or cb < k:
	oo_f = get_first_elem_from_list(oo, oo_p)
	oa_f = get_first_elem_from_list(oa, oa_p)
	ob_f = get_first_elem_from_list(ob, ob_p)
	if ca < k and cb < k:
		if oo_f <= oa_f + ob_f:
			if oo_f == MAX:
				print("-1")
				exit(0)
			else:
				ca += 1
				cb += 1
				ans+=oo_f
				oo_p = remove_first_elem_from_list(oo, oo_p)
		elif oa_f + ob_f < oo_f:
			if oa_f + ob_f >= MAX:
				print("-1")
				exit(0)
			else:
				ca += 1
				cb += 1
				ans+=oa_f+ob_f
				oa_p = remove_first_elem_from_list(oa, oa_p)
				ob_p = remove_first_elem_from_list(ob, ob_p)
	elif ca < k:
		if oo_f <= oa_f:
			if oo_f == MAX:
				print("-1")
				exit(0)
			else:
				ca += 1
				ans+=oo_f
				oo_p = remove_first_elem_from_list(oo, oo_p)
		elif oa_f < oo_f:
			if oa_f >= MAX:
				print("-1")
				exit(0)
			else:
				ca += 1
				ans+=oa_f
				oa_p = remove_first_elem_from_list(oa, oa_p)
	else:
		if oo_f <= ob_f:
			if oo_f == MAX:
				print("-1")
				exit(0)
			else:
				cb += 1
				ans+=oo_f
				oo_p = remove_first_elem_from_list(oo, oo_p)
		elif ob_f < oo_f:
			if ob_f >= MAX:
				print("-1")
				exit(0)
			else:
				cb += 1
				ans+=ob_f
				ob_p = remove_first_elem_from_list(ob, ob_p)
print(ans)


```

  0%|          | 23/13328 [00:18<2:56:50,  1.25it/s]


KeyboardInterrupt: 

In [None]:
sample_lang_counts