In [680]:
from __future__ import annotations
import catboost
import collections
import dataclasses
import functools
import io
import itertools
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import re
import requests
import scipy
import sklearn.cluster
import sklearn.datasets
import sklearn.decomposition
import sklearn.linear_model
import statsmodels.stats.proportion
import subprocess
import string
import sys
import tokenizers
import torch
import torch.nn as nn
import transformers
import time
import tqdm
import typing as tp
import urllib

In [2]:
def run_executable(path: str, arguments: str = "", input_: str = "") -> str:
    completed_process = subprocess.run(
        [path, arguments], input=input_, text=True, capture_output=True
    )
    output = completed_process.stdout
    return output


def factorial(n):
    factorial = 1
    for i in range(1, n + 1):
        factorial *= i
    return factorial


def get_pairwise_difference_matrix(array):
    """Returns matrix M[i, j] = a[i] - a[j]."""
    pairwise_difference_matrix = array[:, None] - array[None, :]
    return pairwise_difference_matrix


def permute(array, first, second):
    array[first], array[second] = array[second], array[first]

def get_yadisk_download_url(
    yadisk_url: str,
    base_url="https://cloud-api.yandex.net/v1/disk/public/resources/download?",
) -> str:
    final_url = base_url + urllib.parse.urlencode(dict(public_key=yadisk_url))
    response = requests.get(final_url)
    download_url = response.json()["href"]
    return download_url

def write_url_response_content_to_file(url: str, filename: str) -> None:
    download_response = requests.get(url)
    with open(filename, "wb") as f:
        f.write(download_response.content)
        
def set_torch_device_order_pci_bus():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

In [3]:
CPP_DIR = "/Users/dimakoshman/CLionProjects/coderun_ml/cmake-build-debug"

In [4]:
@dataclasses.dataclass
class Timer:
    enter_time: list[float] = dataclasses.field(default_factory=list)
    exit_time: list[float] = dataclasses.field(default_factory=list)

    def __enter__(self):
        self.enter_time.append(time.time())
        return self

    def __exit__(self, *args, **kwargs):
        self.exit_time.append(time.time())

    def wrap(self, function: tp.Callable) -> tp.Callable:
        @functools.wraps(function)
        def wrapper(*args, **kwargs):
            with self:
                return function(*args, **kwargs)

        return wrapper

    def stats(self) -> dict[str, float]:
        timings = [a - b for a, b in zip(self.exit_time, self.enter_time, strict=True)]
        stats = dict(mean=np.mean(timings), std=np.std(timings), max=max(timings))
        return stats

    def formatted_stats(
        self, time_unit: tp.Literal["ms", "s"] = "ms", precision: int = 3
    ) -> str:
        formatted_stats: list[str] = []
        for k, v in self.stats().items():
            match time_unit:
                case "ms":
                    v *= 1000
                case "s":
                    pass
                case _:
                    raise ValueError(f"Unknown time unit {time_unit}.")
            formatted_stats.append(f"\t{k}:\t{v:.{precision}f} {time_unit}")

        formatted_stats = "\n".join(formatted_stats)
        return formatted_stats

In [5]:
@dataclasses.dataclass
class Input:
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        raise NotImplementedError


@dataclasses.dataclass
class Output:
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        raise NotImplementedError

    def __str__(self) -> str:
        raise NotImplementedError
        
    def __eq__(self, output_: Output) -> bool:
        raise NotImplementedError


def generate_random_input(test_case: int) -> Input | tp.Tuple[Input, Output]:
    raise NotImplementedError


def generate_stress_input() -> Input:
    raise NotImplementedError


def brute_force_solve(input_: Input) -> Output:
    raise NotImplementedError


def solve(input_: Input) -> Output:
    raise NotImplementedError


def cpp_solve(input_: Input) -> Output:
    raise NotImplementedError
    
    
def is_correct(input_: Input, output: Output, expected_output: Output|None):
    if expected_output is not None:
        try:
            return expected_output == output
        except NotImplementedError:
            pass
    raise NotImplementedError

    
def check(
    solve: tp.Callable,
    input_: Input | str,
    expected_output: Output | str | None = None,
) -> None:
    if isinstance(input_, str):
        input_ = Input.from_string_io(io.StringIO(input_))

    output = solve(input_)

    if isinstance(expected_output, str):
        expected_output = Output.from_string_io(io.StringIO(expected_output))
    if not is_correct(input_, output, expected_output):
        message = f"Wrong answer, input:\n{input_}\nOutput:\n{output}"
        if expected_output is not None:
            message += f"\nExpected output:\n{expected_output}"
        raise RuntimeError(message)

    try:
        brute_force_output = brute_force_solve(input_)
    except NotImplementedError:
        pass
    else:
        if not is_correct(input_, output, brute_force_output):
            raise RuntimeError(
                f"Wrong answer, brute force output:\n{brute_force_output}\nReceived:\n{output}"
            )

    try:
        cpp_output = cpp_solve(input_)
    except NotImplementedError:
        pass
    else:
        if not is_correct(input_, output, cpp_output):
            raise RuntimeError(
                f"Wrong answer, cpp output:\n{cpp_output}\nReceived:\n{output}"
            )


def simple_test():
    #     check(solve, """""", """""")
    #     check(solve, """""", """""")
    #     check(solve, """""", """""")

    print("Simple tests OK")


def random_test():
    random_tests_timer = Timer()
    timed_solve = random_tests_timer.wrap(solve)

    for test_case in range(100):
        match generate_random_input(test_case):
            case input_, expected_output:
                pass
            case input_:
                expected_output = None

        check(solve=timed_solve, input_=input_, expected_output=expected_output)

    print(f"Random tests OK\n{random_tests_timer.formatted_stats()}")


def stress_test():
    stress_tests_timer = Timer()
    timed_solve = stress_tests_timer.wrap(solve)

    for _ in range(10):
        input_ = generate_stress_input()
        check(solve=timed_solve, input_=input_)

    print(f"Stress tests OK\n{stress_tests_timer.formatted_stats()}")


def test():
    simple_test()
    random_test()
    stress_test()


def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [None]:
### []()
from __future__ import annotations
import dataclasses
import io
import scipy.special
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        input_ = Input()
        return input_


@dataclasses.dataclass
class Output:
                        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        output = Output()
        return output

    def __str__(self) -> str:
        string = ...
        return string
        
    def __eq__(self, output: Output) -> bool:
        is_equal = str(self) == str(output)
        return is_equal


def solve(input_: Input) -> Output:
    output = Output()
    return output
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))
    
def simple_test():
    #check(solve, """"""", """""")
    #check(solve, """"""", """""")
    #check(solve, """"""", """""")

    print("Simple tests OK")
    
simple_test()

### [Захар и загадочные совпадения](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/qx-d?currentPage=1&pageSize=10&rowNumber=1&compiler=python-ml)

In [86]:
from __future__ import annotations
import dataclasses
import io
import json
import sys
import typing as tp
import pandas as pd
import numpy as np

def main():
    df = pd.DataFrame(json.load(open("data/coderun/data.json")))
    if not df.empty:
        max_daily_count = df.groupby("date")["count"].max()
        days = max_daily_count[max_daily_count == max_daily_count.min()].index.values
        index = np.argsort(pd.to_datetime(days).values)
        days = days[index]
        print(*days, sep="\n")

### [Популярность страниц в энциклопедии](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/qx-e?currentPage=1&pageSize=10&rowNumber=2)

In [296]:
from __future__ import annotations
import dataclasses
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    transitions: tp.List[tp.List[int]]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        n_transitions = int(string_io.readline())
        transitions = []
        for _ in range(n_transitions):
            line = list(map(int, string_io.readline().split()))
            transitions.append(line)
            
        input_ = Input(transitions=transitions)
        return input_


@dataclasses.dataclass
class Output:
    probs: tp.List[float]
    
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        probs = list(map(float, string_io.readline().split()))
        output = Output(probs=probs)
        return output

    def __str__(self) -> str:
        string = " ".join(f"{i:.2f}" for i in self.probs)
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = np.all(np.isclose(self.probs, output_.probs, atol=1e-2))
        return is_equal

def solve(input_: Input) -> Output:
    transitions = np.array(input_.transitions, dtype=float)
    norm = transitions.sum(1, keepdims=True)
    norm[norm == 0] = 1
    transition_probs = transitions / norm
    eigenvalues, eigenvectors = np.linalg.eig(transition_probs.T)
    
    for eigenvalue, eigenvector in zip(eigenvalues, eigenvectors.T):
        if np.isclose(eigenvalue, 1):
            eigenvector = np.real(eigenvector)
            probs = eigenvector / eigenvector.sum()
            if np.all(probs >= 0):
                output = Output(probs=probs)
                return output
        
    raise ValueError("No stationary distribution found.")
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [297]:
def simple_test():
    check(solve,
          """4
55 20 15 10
25 20 35 20
50 5 25 20
65 20 10 5""",
          "0.50 0.17 0.20 0.13")

    print("Simple tests OK")

In [298]:
simple_test()

Simple tests OK


### [Уголки](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/corners?currentPage=1&pageSize=10&rowNumber=3)

In [None]:
from __future__ import annotations
import dataclasses
import heapq
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    corners: tp.List[tp.Tuple[int, int, int, int]]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        n_corners = int(string_io.readline())
        corners = []
        for _ in range(n_corners):
            line = tuple(map(int, string_io.readline().split()))
            corners.append(line)
        input_ = Input(corners=corners)
        return input_


@dataclasses.dataclass
class Output:
    answer: str
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        output = Output(answer=string_io.readline().strip())
        return output

    def __str__(self) -> str:
        string = self.answer
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = self.answer == output_.answer
        return is_equal

    
def lazy_remove(heap, trash_heap):
    while heap and trash_heap and heap[0] == trash_heap[0]:
        heapq.heappop(heap)
        heapq.heappop(trash_heap)


class ScanLine:
    def __init__(self):
        self.negative_facing_corners = []
        self.negative_facing_corners_trash = []
        self.positive_facing_corners = []
        self.positive_facing_corners_trash = []
        
    def add(self, corner, is_negative_facing):
        if is_negative_facing:
            heapq.heappush(self.negative_facing_corners, -corner)
        else:
            heapq.heappush(self.positive_facing_corners, corner)
        
    def remove(self, corner, is_negative_facing):
        if is_negative_facing:
            heapq.heappush(self.negative_facing_corners_trash, -corner)
            lazy_remove(self.negative_facing_corners, self.negative_facing_corners_trash)
        else:
            heapq.heappush(self.positive_facing_corners_trash, corner)
            lazy_remove(self.positive_facing_corners, self.positive_facing_corners_trash)
        
    def does_cover_everything(self):
        return (self.negative_facing_corners 
            and self.positive_facing_corners 
            and -self.negative_facing_corners[0] >= self.positive_facing_corners[0])
        
        
def get_y_coordinate_and_direction(corner):
    return corner[1], corner[-2] == -1
        
    
def solve(input_: Input) -> Output:
    corners = sorted(input_.corners)
    scanline = ScanLine()
    for corner in corners:
        if corner[-1] == -1:
            scanline.add(*get_y_coordinate_and_direction(corner))
    
    if not scanline.does_cover_everything():
        return Output(answer="NO")
    
    for i, corner in enumerate(corners):
        if corner[-1] == -1:
            scanline.remove(*get_y_coordinate_and_direction(corner))
        else:
            scanline.add(*get_y_coordinate_and_direction(corner))
            
        if i == len(corners) - 1 or corner[0] < corners[i + 1][0] and not scanline.does_cover_everything():
            return Output(answer="NO")
            
    return Output(answer="YES")
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))
    

In [304]:
    
def simple_test():
    check(solve, """4
        0 0 1 -1
        0 0 -1 1
        0 0 1 1
        0 0 -1 -1""",
          "YES")
    
    check(solve, """2
        0 0 1 1
        0 0 -1 -1""",
          "NO")


    print("Simple tests OK")

In [305]:
simple_test()

Simple tests OK


### [Рассчитать pFound](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/calculate-pfound?currentPage=1&pageSize=10&rowNumber=4)

In [357]:
folder = "data/coderun/pfound/hidden_task"

In [358]:
hostid_url = pd.read_csv(os.path.join(folder, "hostid_url.tsv"), sep="\t", header=None, names=["hostid", "url"])
qid_query = pd.read_csv(os.path.join(folder, "qid_query.tsv"), sep="\t", header=None, names=["qid", "query"])
qid_url_rating = pd.read_csv(os.path.join(folder, "qid_url_rating.tsv"), sep="\t", header=None, names=["qid", "url", "rating"])

df = pd.merge(pd.merge(qid_url_rating, qid_query, on="qid", how="left"), hostid_url, on="url", how="left")

In [359]:
def compute_p_found(top_p_relevance, p_break = 0.15):
    p_look = 1
    p_found = 0
    for p_relevance in top_p_relevance:
        p_found += p_look * p_relevance
        p_look *= (1 - p_relevance) * (1 - p_break)
    return p_found

In [361]:
top_k = 10
max_p_found = 0
max_query = ""
for qid, query in qid_query.values:
    query_df = df[df["qid"] == qid]
    query_df = query_df.groupby("hostid")["rating"].max()
    query_df = query_df.sort_values(ascending=False)
    top_p_relevance = query_df.values[:top_k]
    p_found = compute_p_found(top_p_relevance)
    if p_found >= max_p_found:
        max_p_found = p_found
        max_query = query
        
print(max_query)

туту


### [Формирование заплывов](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/make-heats/description?currentPage=1&pageSize=10&rowNumber=5)

In [396]:
df = pd.read_csv(io.StringIO("""0,301,25.3
0,302,24.2
0,303,29.2
0,304,28.4
0,305,27.3
0,306,27.1
0,307,28.0
0,308,28.2
1,201,27.8
1,202,27.5
1,203,27.2
1,204,26.8
1,205,27.0
1,206,27.4
1,207,27.6
1,208,27.9
2,101,26.6
2,102,26.3
2,103,25.5
2,104,25.0
2,105,25.1
2,106,25.9
2,107,26.4
2,108,26.7"""), header=None, names=["heat", "id", "time"])

In [399]:
original_team = {k: None for k in df[df["heat"] == 0]["id"].values}
df = df.sort_values(by="time", ascending=False)
lane_order = np.array([8, 1, 7, 2, 6, 3, 5, 4])
for heat in range(len(df) // 8):
    heat_ids = df.iloc[heat * 8: (heat + 1) * 8]["id"].values
    for lane, swimmer_id in zip(lane_order, heat_ids):
        if swimmer_id in original_team:
            original_team[swimmer_id] = heat + 1, lane

In [400]:
print("\n".join(f"{i},{a},{b}" for i, (a, b) in sorted(list(original_team.items()))))

301,3,6
302,3,4
303,1,8
304,1,1
305,2,1
306,2,2
307,1,2
308,1,7


### [Документы на выдачe](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/revenue?currentPage=1&pageSize=10&rowNumber=6)

In [444]:
from __future__ import annotations
import dataclasses
import io
import sys
import typing as tp
import numpy as np
import itertools
import pandas as pd
import json


def compute_cost(costs):
    cost = np.sum(costs / np.arange(1, len(costs) + 1) ** 0.5)
    return cost

def compute_quality(relevance):
    quality = np.sum(relevance / np.arange(1, len(relevance) + 1))
    return quality

def main():
    doc = json.loads(sys.stdin.read())
    new_documents = pd.DataFrame(doc["new_documents"])
    total_cost = 0
    
    for serpset in doc["serpset"]:
        query = serpset["query"]
        results = pd.DataFrame(serpset["results"])
        results = results.sort_values(by="position")
        cost = list(results["cost"])
        relevance = list(results["relevance"])
        best_cost = compute_cost(cost)
        base_quality = compute_quality(relevance)
        new_results = new_documents[new_documents["query"] == query]
        new_cost = list(new_results["cost"])
        new_relevance = list(new_results["relevance"])

        n_results = len(cost)
        n_new_results = len(new_cost)

        for inserts in range(1, n_new_results + 1):
            for choose in itertools.combinations(range(n_new_results), inserts):
                for pos in itertools.permutations(range(n_results), inserts):
                    cost_i = cost.copy()
                    relevance_i = relevance.copy()
                    indices = np.argsort(pos)
                    choose = [choose[i] for i in indices]
                    pos = [pos[i] for i in indices]
                    for i, p in zip(choose, pos):
                        cost_i.insert(p, new_cost[i])
                        relevance_i.insert(p, new_relevance[i])

                    quality = compute_quality(relevance_i[:n_results])
                    cost_v = compute_cost(cost_i[:n_results])
                    if cost_v > best_cost and quality >= base_quality:
                        best_cost = cost_v

        total_cost += best_cost
        
    print(f"{total_cost:.2f}")

### [Весы и гири](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/scales-and-weights?currentPage=1&pageSize=10&rowNumber=7)

In [447]:
from __future__ import annotations
import dataclasses
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    max_weight: int
    weights: tp.List[int]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        max_weight = int(string_io.readline())
        weights = list(map(int, string_io.readline().split()))
        input_ = Input(max_weight=max_weight, weights=weights)
        return input_


@dataclasses.dataclass
class Output:
    answer: str

    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        answer = string_io.readline().strip()
        output = Output(answer=answer)
        return output

    def __str__(self) -> str:
        string = self.answer
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = self.answer == output_.answer
        return is_equal
    

def solve(input_: Input) -> Output:
    diffs = [0]
    for weight in input_.weights:
        n_diffs = len(diffs)
        for diff in diffs[:n_diffs]:
            diffs.append(diff + weight)
            diffs.append(diff - weight)
            
    if len(diffs) < input_.max_weight:
        return Output("No")
    
    has_diff = np.zeros(input_.max_weight + 1)
    for diff in diffs:
        if diff >= 0 and diff <= input_.max_weight:
            has_diff[diff] = 1
    
    return Output("Yes") if np.all(has_diff) else Output("No")
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))
    

In [448]:
def simple_test():
    check(solve, """5
1 3 4""", "Yes""")
    check(solve, """9
1 3 4""", """No""")
    check(solve, """94
34 7 4 25 30 27 39""", """Yes""")

    print("Simple tests OK")

In [449]:
simple_test()

Simple tests OK


### [Поехавшие фичи](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/tricky-features-distributions?currentPage=1&pageSize=10&rowNumber=8)

In [519]:
df = pd.read_csv("~/Downloads/dataset.tsv", sep="\t", header=None, names=["day", "test", "feature", "value"])

result = df.groupby("feature").apply(lambda df: 
        scipy.stats.ks_2samp(df[df["test"] == 1]["value"].values,
                             df[df["test"] == 2]["value"].values).statistic)


print(",".join(map(str, sorted(result.sort_values()[-10:].index))))

493,677,709,814,940,988,1076,1289,1401,1423


### [Лавки в одномерном городе](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/lavka-in-the-city?currentPage=1&pageSize=10&rowNumber=10)

In [545]:
class Distribution:
    def __init__(self):
        self.distributions = [
    scipy.stats.norm(150, 80),
    scipy.stats.norm(310, 60),
    scipy.stats.norm(450, 70),
    scipy.stats.norm(550, 70),
    scipy.stats.norm(730, 85),
    scipy.stats.norm(900, 90),
        ]
        
    def __call__(self, x):
        return np.stack([d.pdf(x) for d in self.distributions]).max(0)

In [528]:
distribution = Distribution()
centers = np.arange(100, 901)
areas = []

for c in centers:
    areas.append(scipy.integrate.quad(distribution, c - 100, c + 100)[0])

### [N-гранные кубики](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/n-sided-dice?currentPage=2&pageSize=10&rowNumber=11)

In [612]:
def play(n):
    cubes = np.random.randint(1, n + 1, n)
    points = 0
    for cube, n_people in collections.Counter(cubes).items():
        points += n_people * (n_people * cube - cube - n_people**2)
    return points

In [None]:
def get_expected_points(n):
    expected_points = n**2 / 2 - 5 * n + 5.5 - 2 / n
    return expected_points

In [641]:
np.mean([play(50) for _ in range(100000)])

1006.16868

In [639]:
get_expected_points(50)

1005.46

### [Игра с числами](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/num-game?currentPage=2&pageSize=10&rowNumber=12)

In [654]:
from __future__ import annotations
import dataclasses
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    numbers: tp.List[int]
    max_erase: int
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        n_numbers, max_erase = map(int, string_io.readline().split())
        numbers = list(map(int, string_io.readline().split()))
        input_ = Input(numbers=numbers, max_erase=max_erase)
        return input_


@dataclasses.dataclass
class Output:
    answer: str

    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        answer = string_io.readline().strip()
        output = Output(answer=answer)
        return output

    def __str__(self) -> str:
        string = self.answer
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = self.answer == output_.answer
        return is_equal

def get_points(points, i):
    return 0 if i < 0 else points[i]

def solve(input_: Input) -> Output:
    numbers = input_.numbers[::-1]
    first_points = []
    second_points = []
    
    for i in range(len(numbers)):
        delta = 0
        max_first_points = -np.inf
        second_p = 0
        
        for erase in range(min(input_.max_erase, i + 1)):
            index = i - erase
            delta += numbers[index]
            first_p = delta + (second_points[index - 1] if index >= 1 else 0)
            if max_first_points < first_p:
                max_first_points = first_p
                second_p = first_points[index - 1] if index >= 1 else 0

        first_points.append(max_first_points)
        second_points.append(second_p)
                
    output = Output("0") if first_points[-1] < second_points[-1] else Output("1")
    return output
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [655]:
def simple_test():
    check(solve, """4 3
1 2 3 9""", """0""")
    check(solve, """4 3
1 2 3 -4""", """1""")

    print("Simple tests OK")
    
simple_test()

Simple tests OK


### [Время путешествий](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/adventure-time?currentPage=2&pageSize=10&rowNumber=13)

In [658]:
from __future__ import annotations
import dataclasses
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    temperature: tp.List[int]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        temperature = list(map(int, string_io.readline().split()))
        input_ = Input(temperature=temperature)
        return input_


@dataclasses.dataclass
class Output:
    temperature_delta: int
    begin: int
    end: int
    
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        args = map(int, string_io.readline().split())
        output = Output(*args)
        return output

    def __str__(self) -> str:
        string = f"{self.temperature_delta} {self.begin} {self.end}"
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = str(self) == str(output_)
        return is_equal

def solve(input_: Input) -> Output:
    temperature_delta = 0
    begin = 0
    end = 0
    cur_min = input_.temperature[0]
    cur_min_pos = 0
    for i, t in enumerate(input_.temperature):
        delta = t - cur_min
        if delta > temperature_delta or (delta == temperature_delta and i - cur_min_pos < end - begin):
            temperature_delta = delta
            begin = cur_min_pos
            end = i
        if t < cur_min:
            cur_min = t
            cur_min_pos = i
            
    output = Output(temperature_delta, begin, end)
    return output
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))
    

In [659]:
def simple_test():
    check(solve, """3 4 1 6""", """5 2 3""")

    print("Simple tests OK")
    
simple_test()

Simple tests OK


In [674]:
sum([scipy.special.comb(19, i) for i in range(14)]) / 2**19

0.9682159423828125

In [696]:
count = [101, 30]
nobs = [5829, 5807]
print(f"z_test statistic {statsmodels.stats.proportion.proportions_ztest(count, nobs)[0]:.4f}")

z_test statistic 6.2168


In [698]:
statsmodels.stats.proportion.confint_proportions_2indep(count[0], nobs[0], count[1], nobs[1], alpha=0.01)

(0.007220640494741911, 0.017501614534234173)

### [Детекция сигнала](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/signal-detection?currentPage=2&pageSize=10&rowNumber=17)

In [703]:
from __future__ import annotations
import dataclasses
import heapq
import io
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    n_calibration_signals: int
    signals: tp.List[int]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        n_signals = int(string_io.readline())
        n_calibration_signals = int(string_io.readline())
        signals = [int(string_io.readline()) for _ in range(n_signals)]
        input_ = Input(n_calibration_signals=n_calibration_signals, signals=signals)
        return input_


@dataclasses.dataclass
class Output:
    first_outlier: int
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        first_outlier = int(string_io.readline())
        output = Output(first_outlier=first_outlier)
        return output

    def __str__(self) -> str:
        string = str(self.first_outlier)
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = str(self) == str(output_)
        return is_equal

def solve(input_: Input) -> Output:
    less_heap = []
    more_heap = []
    for i, signal in enumerate(input_.signals):
        if i >= input_.n_calibration_signals and signal > -less_heap[0]:
            return Output(i)
        
        if less_heap and -less_heap[0] < signal:
            heapq.heappush(more_heap, signal)
        else:
            heapq.heappush(less_heap, -signal)
        
        if len(less_heap) / (i + 1) < 0.9:
            signal = heapq.heappop(more_heap)
            heapq.heappush(less_heap, -signal)
        elif (len(less_heap) - 1) / (i + 1) >= 0.9:
            signal = heapq.heappop(less_heap)
            heapq.heappush(more_heap, -signal)
            
    return Output(-1)
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [704]:
def simple_test():
    check(solve, """12
5
1
5
2
3
4
1
3
2
7
2
3
4""", """8""")
    check(solve, """11
10
2
2
2
2
1
1
3
1
1
1
3""", """10""")
    check(solve, """11
10
2
2
2
3
1
1
3
1
1
1
3""", """-1""")

    print("Simple tests OK")
    
simple_test()

Simple tests OK


### [Волейбол](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/volleyball?currentPage=2&pageSize=10&rowNumber=18)

In [743]:
from __future__ import annotations
import dataclasses
import io
import scipy.special
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    scores: tp.List[tp.Tuple[int, int]]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        line = string_io.readline().split()
        scores = [tuple(map(int, i.split(":"))) for i in line[1:]]
        input_ = Input(scores=scores)
        return input_


@dataclasses.dataclass
class Output:
    n_combinations: int
                        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        n_combinations = int(string_io.readline())
        output = Output(n_combinations=n_combinations)
        return output

    def __str__(self) -> str:
        string = str(self.n_combinations)
        return string
        
    def __eq__(self, output_: Output) -> bool:
        is_equal = self.n_combinations == output_.n_combinations
        return is_equal

def get_max_score(game_id):
    return 15 if game_id == 4 else 25
                        
def solve(input_: Input) -> Output:
    n_combinations = 1
    for game_id, score in enumerate(input_.scores):
        max_score = get_max_score(game_id)
        loser, winner = sorted(score)
        if winner == max_score:
            n_combinations *= scipy.special.comb(winner + loser - 1, loser, exact=True)
        else:
            n_combinations *= scipy.special.comb(2 * (max_score - 1), max_score - 1, exact=True)
            endgame = loser + winner - 2 * (max_score - 1)
            n_combinations *= 2 ** ((endgame - 2) // 2)
        
    output = Output(n_combinations=n_combinations)
    return output
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [744]:
def simple_test():
    check(solve, """5 24:26 31:33 25:1 25:15 15:3""", """56889691950301172383828530142994457600000000""")
    check(solve, """3 25:1 35:33 27:25""", """26621643348538487941775616000000""")
    check(solve, """4 6:25 25:20 33:31 25:5""", """512566584534289943779149761805072000000""")

    print("Simple tests OK")
    
simple_test()

Simple tests OK


### [Пользовательский лог](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/half-traits-users-log?currentPage=2&pageSize=10&rowNumber=20)

In [779]:
folder = "/Users/dimakoshman/Downloads/feature_files"
counter = collections.Counter()
for i in range(1, 21):
    feature = pd.read_csv(os.path.join(folder, f"feature_{i}.csv"), header=None, names=["user_id", "feature_value"])
    feature = feature.dropna()
    counter.update(feature["user_id"].values)
    
counts = np.array(list(counter.values()))
print((counts >= 10).sum())

870


### [Команда аналитиков](https://coderun.yandex.ru/seasons/first_2023/tracks/data-analytics/problem/task-paralleling?currentPage=3&pageSize=10&rowNumber=23)

In [None]:
from __future__ import annotations
import dataclasses
import heapq
import io
import scipy.special
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
    n_workers: int
    deadlines: tp.List[int]
    hours: tp.List[int]
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        n_tasks, n_workers = map(int, string_io.readline().split())
        deadlines = []
        hours = []
        for _ in range(n_tasks):
            hour, deadline = map(int, string_io.readline().split())
            hours.append(hour)
            deadlines.append(deadline)
            
        input_ = Input(n_workers=n_workers, deadlines=deadlines, hours=hours)
        return input_


@dataclasses.dataclass
class Output:
    is_possible: bool
    schedule: tp.List[tp.List[int]] | None = None
                        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        is_possible = string_io.readline().strip() == "YES"
        schedule = None
        if is_possible:
            schedule = []
            line = string_io.readline()
            while line:
                line = list(map(int, schedule.split()))
                schedule.append(line[1:])
                line = string_io.readline()
            
        output = Output(is_possible=is_possible, schedule=schedule)
        return output

    def __str__(self) -> str:
        string = "YES" if self.is_possible else "NO"
        for s in schedule:
            string += f"\n{len(s)} " + " ".join(map(str, s))
        return string
        
    def __eq__(self, output: Output) -> bool:
        is_equal = self.is_possible == output.is_possible
        return is_equal

    
def solve(input_: Input) -> Output:
    todo = sorted((d, h) for d, h in zip(input_.deadlines, input_.hours))[::-1]
    in_progress = []
    is_available = [True] * input_.n_workers
    assigned = [[] for _ in range(input_.n_workers)]
    while todo:
        next_task = todo.pop()
        
        
    output = Output()
    return output
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))

In [791]:
sorted([(0,1), (-1,10)])

[(-1, 10), (0, 1)]

In [781]:
def simple_test():
    check(solve, """5 2
3 3
2 2
3 6
2 4
2 6""", """YES
3 2 4 5 
2 1 3""")
    check(solve, """2 1
4 7
4 7""", """NO""")
    check(solve, """1 2
1 2""", """YES
1 1 
0 """)

    print("Simple tests OK")
    
simple_test()

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
### []()
from __future__ import annotations
import dataclasses
import io
import scipy.special
import sys
import typing as tp
import numpy as np


@dataclasses.dataclass
class Input:
        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Input:
        input_ = Input()
        return input_


@dataclasses.dataclass
class Output:
                        
    @staticmethod
    def from_string_io(string_io: io.StringIO) -> Output:
        output = Output()
        return output

    def __str__(self) -> str:
        string = ...
        return string
        
    def __eq__(self, output: Output) -> bool:
        is_equal = str(self) == str(output)
        return is_equal


def solve(input_: Input) -> Output:
    output = Output()
    return output
    
    
def main():
    print(solve(Input.from_string_io(sys.stdin)))
    
def simple_test():
    #check(solve, """""", """""")
    #check(solve, """""", """""")
    #check(solve, """""", """""")

    print("Simple tests OK")
    
simple_test()