## Setup Code

In [2]:
import numpy as np
import math
from time import perf_counter
from collections import deque


class Node:
    def __init__(self, data, left=None, right=None, parent=None, depth:int=None):
        self.left = left
        self.data = data
        self.right = right
        self.parent = parent
        self.depth = depth

class TreeNode:
    def __init__(self, val=0, left=None, right=None):
        self.val = val
        self.left = left
        self.right = right

    def __repr__(self):
        return f"{self.val},L{self.left.val if self.left else ''},R{self.right.val if self.right else ''}"


def _tree() -> list[TreeNode]:
    """Returns nodes of an example tree
                0
            1_______7
          2___3______8
             4__________9
              _5_________10
            6_________11_
    Returns:
        list[TreeNode]: Tree Nodes, where r[0] is root
    """
    nodes = [TreeNode(i) for i in range(12)]
    root = nodes[0]
    root.right = nodes[7]
    root.right.right = nodes[8]
    root.right.right.right = nodes[9]
    root.right.right.right.right = nodes[10]
    root.right.right.right.right.left = nodes[11]
    root.left = nodes[1]
    root.left.left = nodes[2]
    root.left.right = nodes[3]
    root.left.right.left = nodes[4]
    root.left.right.left.right = nodes[5]
    root.left.right.left.right.left = nodes[6]
    return nodes


def _bsearch_tree() -> list[TreeNode]:
    """Returns nodes of an example binary search tree
                6
             3_____7
          2___5______8
        1___4__________9
          _0_____________10
    Returns:
        list[TreeNode]: Tree Nodes, where r[0] is root
    """
    nodes = [TreeNode(i) for i in range(11)]
    root = nodes[6]
    root.right = nodes[7]
    root.right.right = nodes[8]
    root.right.right.right = nodes[9]
    root.right.right.right.right = nodes[10]
    root.left = nodes[3]
    root.left.left = nodes[2]
    root.left.left.left = nodes[1]
    root.left.right = nodes[5]
    root.left.right.left = nodes[4]
    root.left.right.left.left = nodes[0]
    return nodes


def run_tests(fnc, test_cases) -> None:
    for i, case in enumerate(test_cases):
        ans = fnc(*case["in"])
        if ans != case["out"]:
            print(f"Test # {i+1} : Calculated {ans}, expected {case['out']}")

# test_cases = [
#     {"in": ([[1],[2],[3],[]],), "out": True},
#     {"in": ([[1,3],[3,0,1],[2],[0]],), "out": False},
# ]

# run_tests(my_func, test_cases)

## Problem

### Problem Statement

In [3]:



test_cases = [
    {"in": ([[1,1,0],[1,1,0],[0,0,1]],), "out": 2},
    {"in": ([[1,0,0],[0,1,0],[0,0,1]],), "out": 3},
]

run_tests(find_num_subgraphs, test_cases)


# Unoptimized

## Container With Most Water

You are given an integer array height of length n. There are n vertical lines drawn such that the two endpoints of the ith line are (i, 0) and (i, height[i]).

Find two lines that together with the x-axis form a container, such that the container contains the most water.

Return the maximum amount of water a container can store.

Notice that you may not slant the container.

- Input: height = [1,8,6,2,5,4,8,3,7]
- Output: 49
Explanation: The above vertical lines are represented by array [1,8,6,2,5,4,8,3,7]. In this case, the max area of water (blue section) the container can contain is 49.
Example 2:

Input: height = [1,1]
Output: 1

### Survey
- looking at all possible pairs is niave approach
- Which pairs can I ignore?
  - Step through from 0 to N
    - Can ignore anything to right that is smaller than something seen on left
      - When we see a larger value on right ... can we drop old values?

So storing all previous highs that have been seen ... when can I stop considering the old ones ?


stack state -> (x, y)

In [None]:
def _area(s: tuple[int,int,int]) -> int:
    return (s[1]-s[0])*s[2]

# reverse order if we see something stupid

def max_size(heights: list[int]) -> int:
    # It helps to draw out scenarios and walk through the stack state
    stack, i, max_ = [(0,0,heights[0])], 0, 0
    for h in heights[1:]:
        i += 1
        for j,s in enumerate(stack):
            # Forms a box with lower height than our candidate and a later start index, so the max potential area with 
            # any later candidate would always be less.
            # Check to see if this box is a max, but do not consider it as a candidate start
            if s[2] > h:
                max_ = max(_area((s[0],i,h)), max_)
            # else - update end index for any candidates that had lower heights, since they are still intact
            else:
                stack[j] = (s[0], i, s[2])

        # hit a new start candidate (higher potential), track it
        if h > stack[-1][2]:
            stack.append((i,i, h))

    # compare areas of all tracked candidates as well as our intermediate calculations
    return max(max_, max([_area(s) for s in stack]))


test_cases = [
    {"in": ([1,2,3,3,1,1,1,1,1], ), "out": 8},
    {"in": ([1,8,6,2,5,4,8,3,7], ), "out": 49},
    {"in": ([1,1],), "out": 1},
]

run_tests(max_size, test_cases)


In [None]:
# Performance Tests

# TODO this needs to run in under 10 seconds ... yikes
# pyramid ... currently takes 44 seconds
max_size(list(range(1,10000)) + list(range(10000,1,-1)))


# monotonically increasing
# 8.2 seconds
# max_size(list(range(1,10000)))
# 41 seconds
# max_size(list(range(1,20000)))

# .1 second
# max_size(np.random.randint(1,10000,10000))

## Game of Two Stacks

### Problem Statement
Alexa has two stacks of non-negative integers, A and B. Alexa challenges Nick to play the following game:

- In each move, Nick can remove one integer from the top of either A or B.
- Nick keeps a running sum of the integers he removes from the two stacks.
- Nick is disqualified from the game if, at any point, his running sum becomes greater than some integer *max_sum* given at the beginning of the game.
- Nick's final score is the total number of integers he has removed from the two stacks.

Given A, B, and *max_sum* for N games, find the maximum possible score Nick can achieve.

In [None]:
def is_new(a, b, visited: set) -> bool:
    "check if visited, mark as visited if not"
    if (a,b) in visited:
        return False
    visited.add((a,b))
    return True

# can probably quit after hitting 20 branches with same result?
def two_stacks_dfs(max_sum, stack_a, stack_b):
    max_ = 0
    Q, visited = deque(), set()
    if stack_a:
        Q.append((stack_a[0], 1, 0))
    if stack_b:
        Q.append((stack_b[0], 0, 1))
    while Q:
        rsum, a_idx, b_idx = Q.pop()
        if rsum > max_sum:
            max_ = max(a_idx+b_idx-1, max_)
        elif a_idx == len(stack_a) and b_idx == len(stack_b):
            return a_idx+b_idx
        else:
            if a_idx < len(stack_a) and is_new(a_idx+1, b_idx, visited):
                Q.append((rsum + stack_a[a_idx], a_idx+1, b_idx))
            if b_idx < len(stack_b) and is_new(a_idx, b_idx+1, visited):
                Q.append((rsum + stack_b[b_idx], a_idx, b_idx+1))
    return max_

def two_stacks_bfs(max_sum, stack_a, stack_b):
    max_ = 0
    Q, visited = deque(), set()
    if stack_a:
        Q.append((stack_a[0], 1, 0))
    if stack_b:
        Q.append((stack_b[0], 0, 1))
    while Q:
        rsum, a_idx, b_idx = Q.popleft()
        if rsum > max_sum:
            max_ = max(a_idx+b_idx-1, max_)
        elif a_idx == len(stack_a) and b_idx == len(stack_b):
            return a_idx+b_idx
        else:
            if a_idx < len(stack_a) and is_new(a_idx+1, b_idx, visited):
                Q.append((rsum + stack_a[a_idx], a_idx+1, b_idx))
            if b_idx < len(stack_b) and is_new(a_idx, b_idx+1, visited):
                Q.append((rsum + stack_b[b_idx], a_idx, b_idx+1))
    return max_


inputs =[
    ((10, [4,2,4,6,1], [2,1,8,5]), 4),  # 4
    ((15, [10,1,1,1,1,1], [5,5,5]), 6),  # 6
    ((15, [1,1,1,8,1,1,1], [2,2,2,2,2,2]), 9),  # 9
    ((20, [1,1], [1,1]), 4),  # 4
    ((0, [1], [1]), 0) # 0
]

for i, o in inputs:
    r = two_stacks_dfs(*i)
    if o != r:
        print(i, f"Expected: Found: {o} : {r}")

In [None]:
# Another test case
games, game = [], None
with open("C:/Users/tom/Downloads/twostacks_input03.txt", "r") as fp:
    for i, line in enumerate(fp.readlines()):
        if i == 0:
            continue
        line_items = [int(k) for k in line.strip().split()]
        if (i-1)%3==0:
            if game:
                games.append(tuple(game))
            game = [line_items[-1]]
        else:
            game.append(line_items)
if game:
    games.append(game)

with open("C:/Users/tom/Downloads/twostacks_output03.txt", "r") as fp:
    answers = [int(s.strip()) for s in fp.readlines()]

for ins, out in zip(games, answers):
    calc = two_stacks_bfs(*ins)
    if calc != out:
        print(i, f"Expected: Found: {out} : {calc}")

In [None]:
games = []
for i in range(10):
    n,m = np.random.randint(1,int(10e5),2)
    A = np.random.randint(1, int(10e6), n).tolist()
    B = np.random.randint(1, int(10e6), m).tolist()
    max_ = np.random.randint(1,int(10e8),1)[0]
    games.append([max_, A, B])

In [None]:
for g in games:
    two_stacks_bfs(*g)

## Merging Communities

### Problem Statement
People connect with each other in a social network. A connection between Person i and Person j is represented as M(i,j). When two persons belonging to different communities connect, the net effect is to merge the communities to which i and j belong.

At the beginning, there are N people representing N communities (themselves). Suppose person i and j connected and later j and k connected, then i,j, and k will belong to the same community.

There are two types of queries:

Communities containing persons i and j are merged if they belong to different communities.
Print the size of the community to which person  belongs.

Input Format

The first line of input contains 2 space-separated integers  and , the number of people and the number of queries.
The next  lines will contain the queries.

$n < 10^5$<br>
$q < 2*10^5$

In [None]:
lines = ["3 6", "Q 1", "M 1 2", "Q 2", "M 2 3", "Q 3", "Q 2"]
output = [1,2,3,3]

## Cricket Team Strength (unoptimized)

### Problem Statement
A cricket match is going to be held. The field is represented by a 1D plane. A cricketer, Mr. X has N favorite shots. Each shot has a particular range, $(x_0,x_1)$. Each player on the opposite team can field only in a particular range, $(y_0,y_1)$. For a set of N shots and M players, determine the total strength of the players relative to Mr X.  The strength of a player is the number of shots covered (range of player overlaps range of shot) by that player, and the strength of a team is the sum of all player strengths

$1 \leq N, M \leq 10^5 $ <br>
$1 \leq x,y \leq 10^8 $

In [None]:
# region  Brute Force

def can_cover(s0, s1, p0, p1) -> bool:
    if s0 <= p1 and s1 >= p0:
        return True


def team_strength_v0(shots, players) -> int:
    strength = 0
    for shot in shots:
        for player in players:
            strength += 1 if can_cover(*shot, *player) else 0
    return strength

#endregion Brute

# region 2
from operator import itemgetter

def team_strength_v1(shots, players) -> int:
    sshots = list(sorted(shots, key=itemgetter(0)))
    pQ = list(sorted(players, key=itemgetter(0)))

    strength = 0
    p0, p1 = pQ[0]
    for s0, s1 in sshots:
        # print(f"Shot {s0,s1}")
        i = 0
        while p0 <= s1:
            if p1 >= s0:
                # print(f"Adding {p0, p1}")
                strength += 1
                i += 1
            else:
                pQ.pop(i)  # Do not consider this one anymore going forward
            if not pQ or i == len(pQ):
                break
            p0, p1 = pQ[i]
        p0, p1 = pQ[0]

    return strength

# endregion

In [None]:
def gen_inputs(max__num: int, max__rng: int) -> tuple:
    num_shots = np.random.randint(1, max__num, 1)[0]
    num_players = np.random.randint(1, max__num, 1)[0]

    shots = [(a,b) for a,b in zip(np.random.randint(1, max__rng/2, num_shots), np.random.randint(max__rng/2, max__rng, num_shots))]
    players = [(a,b) for a,b in zip(np.random.randint(1, max__rng/2, num_players), np.random.randint(max__rng/2, max__rng, num_players))]
    return (shots, players)


test_cases = [
    {"in": ([(1, 2), (2, 3), (4, 5), (6, 7)], [(1, 5), (2, 3), (4, 7), (5, 7)]), "out": 9},
    {"in": gen_inputs(int(10e2), int(10e4)), "out": None},
    {"in": gen_inputs(int(10e3), int(10e5)), "out": None},
    # {"in": gen_inputs(int(10e4), int(10e6)), "out": None},
]

In [None]:
t0 = perf_counter()
team_strength_v0(*test_cases[1]["in"])
t1 = perf_counter()
print(t1-t0)

In [None]:
t0 = perf_counter()
team_strength_v0(*test_cases[1]["in"])
t1 = perf_counter()
print(t1-t0)

In [None]:
# This approach is so bad that it makes me question Prima code ... nearly 29 seconds just to build this thing, while brute force completes the entire problem in 7 seconds
def coverage_of(scores: np.array, max__rng: int) -> np.array:
    coverage = np.zeros(max__rng+1, dtype=int)

    for x0,x1 in scores:
        coverage[x0] += 1
        coverage[x1+1] -= 1
        # np.add.at(coverage, [p0], 1)

    for i in range(1,len(coverage)):
        coverage[i] = coverage[i] + coverage[i-1]
    return coverage

In [None]:
shots, players = test_cases[0]["in"]
max__rng = int(10)
scoverage = coverage_of(shots, max__rng)
pcoverage = coverage_of(players, max__rng)

In [None]:
pcoverage - scoverage

In [None]:
# approach 1 ... max is slow
strength = 0
for s0,s1 in shots:
    strength += max(pcoverage[s0:s1+1])

strength