In [2]:
from datasets import load_dataset

dataset = load_dataset("deepmind/code_contests")

  from .autonotebook import tqdm as notebook_tqdm
Downloading data: 100%|██████████| 39/39 [07:49<00:00, 12.04s/files]
Downloading data: 100%|██████████| 63.1M/63.1M [00:05<00:00, 10.9MB/s]
Downloading data: 100%|██████████| 51.8M/51.8M [00:04<00:00, 10.4MB/s]
Generating train split: 100%|██████████| 13328/13328 [00:25<00:00, 515.05 examples/s]
Generating test split: 100%|██████████| 165/165 [00:00<00:00, 726.17 examples/s]
Generating valid split: 100%|██████████| 117/117 [00:00<00:00, 514.06 examples/s]


In [3]:
import numpy as np

print(dataset)

some_data = dataset['test']

print(some_data)

DatasetDict({
    train: Dataset({
        features: ['name', 'description', 'public_tests', 'private_tests', 'generated_tests', 'source', 'difficulty', 'solutions', 'incorrect_solutions', 'cf_contest_id', 'cf_index', 'cf_points', 'cf_rating', 'cf_tags', 'is_description_translated', 'untranslated_description', 'time_limit', 'memory_limit_bytes', 'input_file', 'output_file'],
        num_rows: 13328
    })
    test: Dataset({
        features: ['name', 'description', 'public_tests', 'private_tests', 'generated_tests', 'source', 'difficulty', 'solutions', 'incorrect_solutions', 'cf_contest_id', 'cf_index', 'cf_points', 'cf_rating', 'cf_tags', 'is_description_translated', 'untranslated_description', 'time_limit', 'memory_limit_bytes', 'input_file', 'output_file'],
        num_rows: 165
    })
    valid: Dataset({
        features: ['name', 'description', 'public_tests', 'private_tests', 'generated_tests', 'source', 'difficulty', 'solutions', 'incorrect_solutions', 'cf_contest_id', 'cf_ind

In [9]:
import datetime
import re

class TestCase:
  problem_id: int
  is_public: bool
  input: str
  output: str
  def __init__(self, problem_id, input, output, is_public):
    self.problem_id = problem_id
    self.input = input
    self.output = output
    self.is_public = is_public
    
  def to_dict(self):
    return {
        'problem_id': self.problem_id,
        'input': self.input,
        'output': self.output,
        'is_public': self.is_public
    }

class Problem:
  problem_id: int
  title: str
  description: str
  created_by: int = 0
  test_cases: list[TestCase]
  difficulty: int
  cf_tags: list[str]
  source: str
  def __init__(self, problem_obj, problem_id):
    self.problem_id = problem_id
    self.title = self.clean_title(problem_obj['name'])
    self.description = problem_obj['description']
    public_tests = self.parse_tests(problem_obj['public_tests'] , is_public = True)
    private_tests = self.parse_tests(problem_obj['private_tests'])
    self.difficulty = self.classify_difficulty(problem_obj['difficulty'])
    self.test_cases = public_tests + private_tests
    self.cf_tags = problem_obj.get('cf_tags', [])
    self.source = problem_obj['source']

  def classify_difficulty(self, difficulty_txt) -> int:
    try:
        difficulty = int(difficulty_txt)
    except ValueError:
      difficulty_txt_clean = difficulty_txt.strip().lower()
      difficulty_mapping = {
          'a': 'easy',
          'b': 'easy',
          'c': 'easy',
          'd': 'easy',
          'e': 'easy',
          'f': 'medium',
          'g': 'medium',
          'h': 'medium',
          'i': 'hard',
          'j': 'hard',
          'k': 'hard',
          'l': 'hard'
      }
      default = -1
      return difficulty_mapping.get(difficulty_txt_clean, default)
    
    if 0 <= difficulty <= 14:
      return 0
    elif 15 <= difficulty <= 18:
      return 1
    elif 18 <= difficulty <= 20:
      return 2
    else:
      return -1
  def clean_title(self, title: str) -> str:
    # Remove the prefix before the first underscore and strip whitespace
    title = re.sub(r'^\d+_[A-Z]\.\s*', '', title).strip()
    return title

  def parse_tests(self, tests, is_public = False) -> list[TestCase]:
    inputs = tests['input']
    outputs = tests['output']
    results : list[TestCase] = []
    for i, o in zip(inputs,outputs):
      results.append(TestCase(self.problem_id, i, o, is_public))
    return results
  
  def get_tests(self):
    return self.test_cases()

  def to_dict(self):
    return {
        'problem_id': self.problem_id,
        'title': self.title,
        'description': self.description,
        'difficulty': self.difficulty,
        'cf_tags': self.cf_tags,
        'source': self.source,
    }


In [10]:
import json

problems = []
test_cases = []
for problemId, problemObj in enumerate(some_data):
    problem = Problem(problemObj, problemId + 1)
    problems.append(problem.to_dict())
    for test_case in problem.test_cases:
        test_cases.append(test_case.to_dict())

problems_json = json.dumps(problems, indent=4)
test_cases_json = json.dumps(test_cases, indent=4)

print("Problems JSON:\n", problems_json)
print("\nTest Cases JSON:\n", test_cases_json)
with open('problems.json', 'w') as problems_file:
    problems_file.write(problems_json)

with open('test_cases.json', 'w') as test_cases_file:
    test_cases_file.write(test_cases_json)


Problems JSON:
 [
    {
        "problem_id": 1,
        "title": "Another Sorting Problem",
        "description": "Andi and Budi were given an assignment to tidy up their bookshelf of n books. Each book is represented by the book title \u2014 a string s_i numbered from 1 to n, each with length m. Andi really wants to sort the book lexicographically ascending, while Budi wants to sort it lexicographically descending.\n\nSettling their fight, they decided to combine their idea and sort it asc-desc-endingly, where the odd-indexed characters will be compared ascendingly, and the even-indexed characters will be compared descendingly.\n\nA string a occurs before a string b in asc-desc-ending order if and only if in the first position where a and b differ, the following holds:\n\n  * if it is an odd position, the string a has a letter that appears earlier in the alphabet than the corresponding letter in b; \n  * if it is an even position, the string a has a letter that appears later in the 


Test Cases JSON:
 [
    {
        "problem_id": 1,
        "input": "5 2\nAA\nAB\nBB\nBA\nAZ\n",
        "output": "5 2 1 3 4 \n",
        "is_public": true
    },
    {
        "problem_id": 2,
        "input": "8 4\n-3 1\n-4 4\n1 5\n2 2\n2 -2\n-2 -4\n-1 -1\n-6 0\n",
        "output": "3.162277660\n",
        "is_public": true
    },
    {
        "problem_id": 2,
        "input": "1 1\n0 0\n",
        "output": "0.000000000\n",
        "is_public": true
    },
    {
        "problem_id": 2,
        "input": "2 1\n864 22752\n864 -22752\n",
        "output": "11384.199576606\n",
        "is_public": false
    },
    {
        "problem_id": 2,
        "input": "4 2\n-100000 100000\n100000 100000\n-100000 -100000\n100000 -100000\n",
        "output": "100000.000000000\n",
        "is_public": false
    },
    {
        "problem_id": 2,
        "input": "10 2\n92740 -45467\n-15266 24717\n29183 -23121\n20557 71595\n8980 69456\n-12480 -40491\n9766 18573\n78361 -88657\n43043 68859\n-23309 -

In [80]:
import bcrypt

def hash_password(password):
    salt = bcrypt.gensalt()
    hashed_password = bcrypt.hashpw(password.encode('utf-8'), salt)
    return hashed_password.decode('utf-8')

username = 'admin'
email = 'hello@world.com'
password = 'admin'
hashed_password = hash_password(password)

user_data = {
  'user_id': 1,
  'username': username,
  'email': email,
  'password': hashed_password,
}
users_json = json.dumps(user_data, indent=4)
with open('users.json', 'w') as json_file:
  json_file.write(users_json)
