# Evaluate Sum of Last K

You are given a list of data points, where each data point consists of: a list of tags, a list of timestamps (representing when the values were recorded), and a list of values (corresponding to each timestamp).


Your task is to:

1. Filter the data points based on a given tag.

2. Sort the filtered data points in ascending order by their timestamps.

3. Calculate the sum of the last K values from this sorted list.



In [2]:
from typing import Dict, List

def evaluate_sum_of_last_k(data, k, tag):
    tags = data["tag"]
    values = data["value"]
    timestamps = data["timestamp"]
    if tag in tags:
        pairs = list(zip(timestamps, values))
        pairs.sort(key=lambda x: x[0]) # sort by timestamp in ascending order
        pairs = pairs[-k:]
        return sum(pair[1] for pair in pairs)
    return 0  # If the tag is not found

# Example input
data = {
    "tag": ["env:prod", "prod1"],
    "timestamp": [1, 10, 3, 100, 2],
    "value": [-1, 10, -10, 100, 2]
}

k = 2
tag = "prod1"

# Call the function with the provided input
result = evaluate_sum_of_last_k(data, k, tag)
print(result)  # Output: 110

110


# 2 Coin Change

You are given an integer array of available coin denominations (coins) and an integer amount (amount). Your goal is to determine the minimum number of coins needed to make up that amount. If it's impossible to make the amount with the given coins, return -1.

Follow-up: LC #518

In [6]:
class CoinCounts:

    # Approach 1: DP
    def getCoins(self, coinTypes, value):
        # Initialize DP array where dp[i] is the minimum number of coins to make amount i
        dp = [float('inf')] * (value + 1)
        dp[0] = 0  # Base case: 0 coins are needed to make amount 0

        # Initialize a list to store how many of each coin type we use for the final result
        coin_count = [[0] * len(coinTypes) for _ in range(value + 1)]
        
        # Dynamic programming to fill dp array
        for i in range(1, value + 1):
            for idx, coin in enumerate(coinTypes):
                if coin <= i and dp[i - coin] != float('inf'):
                    # If using this coin leads to a better solution, update dp[i]
                    if dp[i] > dp[i - coin] + 1:
                        dp[i] = dp[i - coin] + 1
                        # Update the coin count to reflect using this coin
                        coin_count[i] = coin_count[i - coin][:]  # Copy the previous coin counts
                        coin_count[i][idx] += 1  # Add one of this coin type

        # If dp[value] is still infinity, it means no solution was found
        if dp[value] == float('inf'):
            return []

        # Return the list of coin counts for each coin type
        return coin_count[value]

    # Approach 2: Greedy
    def getCoins(self, coinTypes, value):
        # Sort coins in descending order to try using the largest coin first
        coinTypes.sort(reverse=True)
        coin_count = [0] * len(coinTypes)
        total_coins = 0

        for i, coin in enumerate(coinTypes):
            if value == 0:
                break  # No more value to cover
            # Calculate how many coins of the current type can be used
            coin_count[i] = value // coin
            total_coins += coin_count[i]
            # Update the remaining value
            value %= coin

        # If there's still some value left, the greedy method failed to find a solution
        if value > 0:
            return []
        
        return coin_count
    
    # Approach 3: DFS
    def getCoins(self, coinTypes, value):
        # Sort coins in descending order for better heuristic performance
        coinTypes.sort(reverse=True)
        result = [float('inf')]  # Store the minimum coin combination found
        best_combination = None  # To store the best coin count combination

        # Helper function for DFS with backtracking
        def dfs(remaining, idx, current_combination, total_coins):
            nonlocal best_combination

            # Base case: if remaining is 0, check if this is the best solution
            if remaining == 0:
                if total_coins < result[0]:
                    result[0] = total_coins
                    best_combination = current_combination[:]
                return
            
            # If we have exhausted coin types or remaining becomes negative, return
            if idx >= len(coinTypes) or remaining < 0:
                return

            # Try using as many coins as possible of the current denomination
            max_choice = remaining // coinTypes[idx]

            for cnt in range(max_choice, -1, -1):  # Try each possible number of coins
                current_combination[idx] = cnt
                dfs(remaining - cnt * coinTypes[idx], idx + 1, current_combination, total_coins + cnt)
                current_combination[idx] = 0  # Backtrack

        # Start DFS
        current_combination = [0] * len(coinTypes)
        dfs(value, 0, current_combination, 0)

        # If no valid combination was found, return empty list
        return best_combination if best_combination is not None else []

3 2


# 3 Calculate the File Sizes in a Folder

You are given a directory (folder) containing files and subfolders. Your task is to traverse through the directory, identify all the files, and calculate the sum of sizes of each file. 

In [12]:
import os

def calculate_folder_size(folder_path):
    total_size = 0
    
    # Loop over all the files and subfolders in the current folder
    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        
        # If it's a file, add its size
        if os.path.isfile(item_path):
            total_size += os.path.getsize(item_path)

        # If it's a folder, recursively calculate the size of the subfolder
        elif os.path.isdir(item_path):
            total_size += calculate_folder_size_recursive(item_path)
    
    return total_size

# Example usage
folder_path = "/Users/cathzzr2/Desktop/SRI2024"
total_size = calculate_folder_size(folder_path)
print(str(total_size) + " bytes")


6001904 bytes


In [None]:
# File/Directory Interface is given
# Use isinstance() function
class File:
    def __init__(self, name, size):
        self.name = name
        self.size = size

    def get_size(self):
        return self.size

class Directory:
    def __init__(self, name):
        self.name = name
        self.children = []

    def add_child(self, child):
        self.children.append(child)

    def get_size(self):
        total_size = 0
        for child in self.children:
            total_size += child.get_size()
        return total_size

class Solution:
    def calc_size_sum(self, directory) -> int:
        size = 0
        for item in directory.children:
            if isinstance(item, File):
                size += item.get_size()
            elif isinstance(item, Directory):
                self.calc_size_sum(item)
        return size


In [None]:
# If the input is an absolute path

class File:
    def __init__(self, name, size):
        self.name = name
        self.size = size

    def get_size(self):
        return self.size


class Directory:
    def __init__(self, name):
        self.name = name
        self.children = []

    def add_child(self, child):
        self.children.append(child)

    def get_child(self, name):
        # Find a child directory or file by its name
        for child in self.children:
            if child.name == name:
                return child
        return None

    def get_size(self):
        total_size = 0
        for child in self.children:
            total_size += child.get_size()
        return total_size


class Solution:
    def calc_size_sum(self, root, path: str) -> int:
        # Split the absolute path by "/" and remove any empty strings (for root path "/")
        path_parts = [part for part in path.split("/") if part]

        # Start from the root directory
        current = root

        # Traverse the path to find the target directory or file
        for part in path_parts:
            if isinstance(current, Directory):
                current = current.get_child(part)
                if current is None:
                    return 0  # Path does not exist
            else:
                return 0  # Path is invalid if it's a file in between

        # If we reach here, current is either a file or a directory
        if isinstance(current, File):
            return current.get_size()
        elif isinstance(current, Directory):
            return current.get_size()

        return 0


# Example usage:
root = Directory("root")
subdir1 = Directory("subdir1")
subdir2 = Directory("subdir2")
file1 = File("file1.txt", 100)
file2 = File("file2.txt", 200)

root.add_child(subdir1)
root.add_child(file1)
subdir1.add_child(subdir2)
subdir2.add_child(file2)

solution = Solution()

# Example 1: Calculate size for an absolute path to a file
print(solution.calc_size_sum(root, "/subdir1/subdir2/file2.txt"))  # Output: 200

# Example 2: Calculate size for an absolute path to a directory
print(solution.calc_size_sum(root, "/subdir1/subdir2"))  # Output: 200 (total size of files in subdir2)

# Example 3: Calculate size for an invalid path
print(solution.calc_size_sum(root, "/nonexistent"))  # Output: 0


# 4 String Match

LC: #408


You are given two strings: a regular string and a compressed string. Your task is to determine if the compressed string matches the regular string. The compressed string may contain numeric values that represent any sequence of letters in the regular string.


Example 1:

datadog and d3dog -> true

Explanation: The 3 in d3dog means that three letters ("ata") in "datadog" are skipped, making the strings match.

Example 2:

datadog and d2dog -> false

Explanation: The 2 in d2dog skips two letters, but this does not allow the strings to match.

In [7]:
class Solution:
    def is_match(self, regular: str, compressed: str) -> bool:
        regular_idx, compressed_idx = 0, 0
        while regular_idx < len(regular) and compressed_idx < len(compressed):
            if compressed[compressed_idx].isdigit():
                skip_num = 0
                while compressed[compressed_idx].isdigit():
                    skip_num = skip_num * 10 + int(compressed[compressed_idx])
                    compressed_idx += 1
                regular_idx += skip_num
            else:
                if regular[regular_idx] != compressed[compressed_idx]:
                    return False
                regular_idx += 1
                compressed_idx += 1
        return True

# test
solution = Solution()
print(solution.is_match("dataaaaaaaaaadog", "d12dog"))  

True


# 5 String Match Follow-up

Based on #4, add a new condition: the compressed string can contain range patterns like {n,m}, meaning the string can skip between n and m letters. For example, d{1,3}dog would match d1dog, d2dog, or d3dog. Also, special characters like ^ might be introduced to represent a skip pattern, such as d^4dog, which can be interpreted the same way as d4dog.

In [9]:
class Solution:
    def is_match(self, regular: str, compressed: str) -> bool:
        regular_idx, compressed_idx = 0, 0
        
        while regular_idx < len(regular) and compressed_idx < len(compressed):
            if compressed[compressed_idx].isdigit():
                # Handle the case where compressed string has numbers (single skip)
                skip_num = 0
                while compressed_idx < len(compressed) and compressed[compressed_idx].isdigit():
                    skip_num = skip_num * 10 + int(compressed[compressed_idx])
                    compressed_idx += 1
                regular_idx += skip_num

            elif compressed[compressed_idx] == '^':
                compressed_idx += 1  # skip '^'

            elif compressed[compressed_idx] == '{':
                # Handle range pattern {n,m}
                compressed_idx += 1  # skip '{'
                range_start = 0
                range_end = 0
                
                # Parse the start of the range
                while compressed_idx < len(compressed) and compressed[compressed_idx].isdigit():
                    range_start = range_start * 10 + int(compressed[compressed_idx])
                    compressed_idx += 1
                
                if compressed[compressed_idx] == ',':
                    compressed_idx += 1  # skip ','
                
                # Parse the end of the range
                while compressed_idx < len(compressed) and compressed[compressed_idx].isdigit():
                    range_end = range_end * 10 + int(compressed[compressed_idx])
                    compressed_idx += 1
                
                if compressed[compressed_idx] == '}':
                    compressed_idx += 1  # skip '}'
                
                # Try skipping between range_start and range_end and check for match
                match_found = False
                for skip_count in range(range_start, range_end + 1):
                    if self.is_match(regular[regular_idx + skip_count:], compressed[compressed_idx:]):
                        match_found = True
                        break
                if not match_found:
                    return False
                regular_idx += skip_count
            else:
                # Compare the characters if not a digit or range pattern
                if regular[regular_idx] != compressed[compressed_idx]:
                    return False
                regular_idx += 1
                compressed_idx += 1
        
        # Check if both strings have been fully processed
        return regular_idx == len(regular) and compressed_idx == len(compressed)

# Test cases
solution = Solution()
print(solution.is_match("dataaaaaaaaaadog", "d12dog"))  # True
print(solution.is_match("datadog", "d{1,3}dog"))  # True
print(solution.is_match("datadog", "d2dog"))  # False
print(solution.is_match("datadog", "d3^dog"))  # True

True
True
False
True


# 6 Query and Log

You are given a list of strings. The strings can either be queries (starting with Q:) or logs (starting with L:). Your task is to:


- For queries: Register the query, assign it a unique query ID (e.g., q1, q2, etc.), and store the query's words.


- For logs: For each log, check if the words in any registered query are a subset of the log's words. If a match is found, print the corresponding query IDs. If no match is found, print "no match."


A "match" means that all the words in a query must be present in the log, but the log can contain additional words.

In [None]:
# Approach 1: Brute Force

def process_logs_and_queries(inputs):
    queries = []
    query_map = {}
    log_results = []
    query_counter = 1

    for line in inputs:
        # Handling queries
        if line.startswith("Q:"):
            query = line[3:].strip()
            query_id = f"q{query_counter}"
            query_counter += 1
            # Split the query into words and store it
            query_words = set(query.split())
            queries.append((query_id, query_words))
            log_results.append(f"Registered {query_id}")
            query_map[query_id] = query_words
        
        # Handling logs
        elif line.startswith("L:"):
            log = line[3:].strip()
            log_words = set(log.split())
            matching_queries = []

            # Check each query if it matches the log
            for query_id, query_words in queries:
                if query_words.issubset(log_words):
                    matching_queries.append(query_id)

            if matching_queries:
                log_results.append(f"Log {', '.join(matching_queries)}")
            else:
                log_results.append("Log no match")

    return log_results

# Example input
input_data = [
    "Q: hello world",
    "Q: data failure",
    "Q: world hello",
    "L: hello world we have a data failure",
    "L: oh no hello system error",
    "Q: system error",
    "L: oh no hello system error again",
    "L: oh no hello world system error again"
]

# Call the function and print the results
output = process_logs_and_queries(input_data)
for line in output:
    print(line)

In [None]:
from collections import defaultdict

# Approach 2: Reverted Index

def process_logs_and_queries_reverted_index(inputs):
    queries = []
    inverted_index = defaultdict(set)
    log_results = []
    query_counter = 1

    for line in inputs:
        # Handling queries
        if line.startswith("Q:"):
            query = line[3:].strip()
            query_id = f"q{query_counter}"
            query_counter += 1
            # Split the query into words and store it
            query_words = set(query.split())
            queries.append((query_id, query_words))
            log_results.append(f"Registered {query_id}")
            
            # Add words from the query to the inverted index
            for word in query_words:
                inverted_index[word].add(query_id)

        # Handling logs
        elif line.startswith("L:"):
            log = line[3:].strip()
            log_words = set(log.split())
            matching_queries = set()

            # Look up each word in the log in the inverted index
            for word in log_words:
                if word in inverted_index:
                    # Add all queries that contain this word
                    matching_queries.update(inverted_index[word])
            
            # Now, verify if the full query words are present in the log
            final_matches = []
            for query_id in matching_queries:
                query_words = dict(queries)[query_id]
                if query_words.issubset(log_words):
                    final_matches.append(query_id)

            if final_matches:
                log_results.append(f"Log {', '.join(final_matches)}")
            else:
                log_results.append("Log no match")

    return log_results

# Example input
input_data = [
    "Q: hello world",
    "Q: data failure",
    "Q: world hello",
    "L: hello world we have a data failure",
    "L: oh no hello system error",
    "Q: system error",
    "L: oh no hello system error again",
    "L: oh no hello world system error again"
]

# Call the function and print the results
output = process_logs_and_queries_reverted_index(input_data)
for line in output:
    print(line)


# 7 Design a Circular Buffer/Queue

You need to design a Circular Buffer or Circular Queue that supports the following operations:


- push(): Insert an element at the rear of the buffer.


- pop(): Remove an element from the front of the buffer.


- peek(): Retrieve the element at the front without removing it.


- size(): Return the number of elements currently in the buffer.

In [3]:
class CircularQueue:
    def __init__(self, capacity: int):
        self.capacity = capacity
        self.queue = [None] * capacity
        self.front = 0
        self.rear = 0
        self.count = 0  # To keep track of the number of elements in the queue

    def push(self, value: int) -> bool:
        if self.is_full():
            return False  # Buffer is full
        self.queue[self.rear] = value
        self.rear = (self.rear + 1) % self.capacity  # Move rear pointer
        self.count += 1
        return True

    def pop(self) -> bool:
        if self.is_empty():
            return False  # Buffer is empty
        self.queue[self.front] = None  # Optionally, clear the value
        self.front = (self.front + 1) % self.capacity  # Move front pointer using modulus
        self.count -= 1
        return True

    def peek(self) -> int:
        if self.is_empty():
            return -1  # Return -1 if buffer is empty
        return self.queue[self.front]  # Return the front element without removing it

    def size(self) -> int:
        return self.count

    def is_full(self) -> bool:
        return self.count == self.capacity

    def is_empty(self) -> bool:
        return self.count == 0


# Example usage:
queue = CircularQueue(3)
print(queue.push(1))  # True
print(queue.push(2))  # True
print(queue.push(3))  # True
print(queue.push(4))  # False (Queue is full)

print(queue.peek())   # 1
print(queue.pop())    # True
print(queue.peek())   # 2

print(queue.push(4))  # True (Inserted after pop)
print(queue.peek())   # 2
print(queue.size())   # 3

True
True
True
False
1
True
2
True
2
3


# 8 Buffer File


You are tasked with implementing a Buffered File class. This class is responsible for writing data to a disk using a buffer. The buffer temporarily holds the data until it reaches a specific threshold, at which point all data in the buffer is flushed (written) to the disk in one operation.


Key operations:


- write(content): Adds data to the buffer. If the buffer reaches its maximum capacity (threshold), it automatically flushes the data to the disk.


- flush(): Writes all the current buffered data to the disk and clears the buffer.


Requirements:


- Buffered Write: You are given a maximum buffer size. As you write data, the content is first stored in a buffer. When the buffer is full (i.e., reaches the maximum size), all of its content is flushed to the disk in one operation.


- Flush: When the buffer is flushed, the data is written to the disk and the buffer is cleared.


In [None]:
class BufferedFile:
    def __init__(self, maxBufferedSize):
        # Maximum size the buffer can hold before flushing to disk
        self.maxBufferedSize = maxBufferedSize
        # Simulate disk storage
        self.diskStorage = []
        # Buffer to hold the data temporarily
        self.buffer = []

    def write(self, content):
        # Loop through each character in the content
        for ch in content:
            # Add character to buffer
            self.buffer.append(ch)
            # If buffer reaches its maximum size, flush it to the disk
            if len(self.buffer) == self.maxBufferedSize:
                self.flush()

    def flush(self):
        # Write all buffer content to disk
        self.diskStorage.extend(self.buffer)
        # Clear the buffer after flushing
        self.buffer = []

# Example usage:
file = BufferedFile(5)
file.write("hello")
file.write("world")

# Check what is written to disk
print(file.diskStorage)  # Output: ['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd']

# Manually flush any remaining buffer content
file.flush()
print(file.diskStorage)  # Output: ['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd']


# 9 Delete Directories in a File System


You are tasked with implementing a file system class (FileSystem) that supports the following operations:


- findList(path): Returns a list of all files and directories directly under the given path.


- delete(path): Deletes a file or directory at the given path. If the path is a directory, it should only be deleted after all its children (files or directories) have been deleted.


- isDir(path): Checks whether the given path is a directory or a file.


You need to implement a function deleteDirs(path, fs) that recursively deletes a directory and all of its subdirectories and files.

In [None]:
class FileSystem:
    def __init__(self):
        # Initialize the file system, this could be a dictionary simulating directories and files
        self.fs = {}

    def findList(self, path):
        # Return the list of children under this directory (files and directories)
        return self.fs.get(path, [])

    def delete(self, path):
        # Delete the directory or file at the given path
        if path in self.fs:
            del self.fs[path]

    def isDir(self, path):
        # Check if the given path is a directory (if it has children)
        return path in self.fs and isinstance(self.fs[path], list)

# Recursive function to delete directories and their contents
def deleteDirs(path, fs):
    if fs.isDir(path):
        # Get all children of the current directory
        children = fs.findList(path)
        # Recursively delete each child
        for child in children:
            deleteDirs(child, fs)
        # After deleting all children, delete the directory itself
        fs.delete(path)
    else:
        # If it's a file, delete it directly
        fs.delete(path)

# Example usage:
fs = FileSystem()
fs.fs = {
    "/": ["dir1", "dir2", "file1"],
    "/dir1": ["subdir1", "file2"],
    "/dir1/subdir1": ["file3"],
    "/dir2": [],
}

# Deleting directory /dir1 and its contents
deleteDirs("/dir1", fs)
print(fs.fs)  # Output: {'/': ['dir2', 'file1'], '/dir2': []} 

# 10 High-Performance Filter with Tags


ou have a stream of data containing multiple tags (e.g., ['apple facebook google', 'banana facebook', 'facebook google tesla', 'intuit google facebook']). You also have a filter list of tags. Your goal is to filter out any tags that appear in the input that match the tags in the filter list. The result should be a list of the remaining tags that do not appear in the filtered input.


For example:


- Input Tags: ['apple facebook google']


- Filter List: ['apple']


- Result: ['facebook', 'google'] (Only the first entry contains apple, and we return the other tags in that entry)

In [None]:
from collections import defaultdict

class HighPerformanceFilter:
    def __init__(self):
        self.tagsList = {}  # Store the list of tags for each entry (id -> tags)
        self.revertedIdx = defaultdict(list)  # Reverted index to map tags to entry ids
        self.id = 1  # Unique id for each entry added

    # Add a new set of tags to the system
    def addTags(self, tagsStr):
        # Split the input tag string into a list of tags and clean up spaces
        tags = list(map(lambda x: x.strip(), tagsStr.split(" ")))

        # Store the tags in the main list with the current id
        self.tagsList[self.id] = tags

        # Update the reverted index: each tag points to the id it belongs to
        for t in tags:
            self.revertedIdx[t].append(self.id)

        # Increment the id for the next set of tags
        self.id += 1

    # Filter out entries based on the inputTags list
    def filter(self, inputTags):
        res = []  # Result list to store the output tags
        idxGroups = defaultdict(list)  # Group ids by how many inputTags they contain

        # For each tag in the inputTags, check the reverted index
        for tag in inputTags:
            if tag in self.revertedIdx:
                # For each id that contains this tag, record that it has matched this tag
                for tid in self.revertedIdx[tag]:
                    idxGroups[tid].append(tag)

        # Now, iterate over the grouped ids to find the matching entries
        for idx in idxGroups:
            # Create a set of tags from the original entry
            targetSet = set(self.tagsList[idx])

            # This flag checks if all input tags are included in the current entry
            allIncluded = True
            for ctag in inputTags:
                if ctag in targetSet:
                    # If the tag is found, remove it from the set
                    targetSet.remove(ctag)
                else:
                    # If any tag is missing, this entry is not a match
                    allIncluded = False
                    break

            # If all input tags are found, add the remaining tags to the result
            if allIncluded:
                res.extend(targetSet)

        return res

# Example usage:
fs = HighPerformanceFilter()

# Add some sample tag entries
fs.addTags('apple facebook google')
fs.addTags('banana facebook')
fs.addTags('facebook google tesla')
fs.addTags('intuit google facebook')

# Test the filter function
print(fs.filter(['apple']))  # Output: ['facebook', 'google']
print(fs.filter(['facebook', 'google']))  # Output: ['apple', 'tesla', 'intuit']


In [None]:
# With more complicated input
# stream of tags (possibly with special characters or delimiters).

import re

def high_performance_filter(stream, filter_list):
    # Convert the filter list to a set for fast lookups
    filter_set = set(filter_list)
    
    # Preprocess the stream to handle multiple delimiters (e.g., commas, spaces, etc.)
    # Using regular expression to split by commas, spaces, or other delimiters
    tags = [tag.strip() for tag in re.split(r'[,\s]+', stream)]
    
    # Find the complement of the tags that are not in the filter list
    complement_tags = [tag for tag in tags if tag and tag not in filter_set]
    
    return complement_tags

# Example usage:
incoming_stream = "apple, facebook  google  , tesla,    intuit,  google"
filter_list = ["facebook", "google"]

# Call the function and get the result
result = high_performance_filter(incoming_stream, filter_list)
print(result)  # Expected output: ['apple', 'tesla', 'intuit']


# 11 Flight Vacations


LC #568


You are given a grid days where: days[i][j] represents the number of vacation days you can spend in city j during week i.


Your task is to maximize the total number of vacation days you can get, assuming you can travel between any cities at the beginning of each week. In the follow-up of this problem, if there are multiple ways to get the same number of vacation days, you should choose the solution that minimizes the number of flights taken between cities.


In [None]:
class Solution:

    # Approach 1: DP
    def maxVacationDays2(self, days: list[list[int]]) -> (int, int):
        # Number of weeks
        nweek = len(days)
        # Number of cities
        mcities = len(days[0])

        if nweek == 0 or mcities == 0:
            return (0, 0)

        # dp[week][city] = (vday, flights)
        # Initialize the DP for the first week (week 0)
        thisWeekDp = []
        for i in range(mcities):
            # In week 0, the starting flights are 0. We just track the vacation days.
            vday = days[0][i]
            if i == 0:
                thisWeekDp.append((vday, 0))  # Start at city 0 with no flights
            else:
                thisWeekDp.append((vday, 1))  # If you start at any other city, consider it as 1 flight

        # Iterate over each week, updating the dp table
        for i in range(1, nweek):
            nextWeekDp = thisWeekDp.copy()
            for j in range(mcities):
                for k in range(mcities):
                    # Calculate vacation days if traveling from city k to city j in week i
                    if thisWeekDp[k][0] + days[i][j] > nextWeekDp[j][0]:
                        # More vacation days, update the dp
                        if j == k:
                            # No flight is needed if staying in the same city
                            nextWeekDp[j] = (thisWeekDp[k][0] + days[i][j], thisWeekDp[k][1])
                        else:
                            # Need a flight if switching cities
                            nextWeekDp[j] = (thisWeekDp[k][0] + days[i][j], thisWeekDp[k][1] + 1)
                    elif thisWeekDp[k][0] + days[i][j] == nextWeekDp[j][0]:
                        # If vacation days are the same, minimize the number of flights
                        if j == k:
                            # Staying in the same city, compare flight counts
                            if thisWeekDp[k][1] < nextWeekDp[j][1]:
                                nextWeekDp[j] = (thisWeekDp[k][0] + days[i][j], thisWeekDp[k][1])
                        else:
                            # If switching cities, compare flight counts
                            if thisWeekDp[k][1] + 1 < nextWeekDp[j][1]:
                                nextWeekDp[j] = (thisWeekDp[k][0] + days[i][j], thisWeekDp[k][1] + 1)

            # Update DP for the next iteration
            thisWeekDp = nextWeekDp

        # Find the best combination of max vacation days and min flights at the end of all weeks
        best = (0, 0)
        for choice in thisWeekDp:
            if choice[0] > best[0]:
                best = choice
            elif choice[0] == best[0]:
                if choice[1] < best[1]:
                    best = choice

        return best
    
    # Approach 2: Greedy
    def maxVacationDaysGreedy(self, days: list[list[int]]) -> (int, int):
        nweek = len(days)
        mcities = len(days[0])

        if nweek == 0 or mcities == 0:
            return (0, 0)

        # Initialize the DP for the first week (week 0)
        thisWeekDp = []
        for i in range(mcities):
            vday = days[0][i]
            thisWeekDp.append((vday, 0))  # (vacation days, flights)

        # Iterate over each week, choosing the best option for each city
        for i in range(1, nweek):
            nextWeekDp = [(0, float('inf'))] * mcities  # Initialize the next week DP
            for j in range(mcities):  # Target city for this week
                for k in range(mcities):  # Previous city from last week
                    vdays = thisWeekDp[k][0] + days[i][j]
                    flights = thisWeekDp[k][1] + (0 if j == k else 1)
                    if vdays > nextWeekDp[j][0]:
                        nextWeekDp[j] = (vdays, flights)
                    elif vdays == nextWeekDp[j][0] and flights < nextWeekDp[j][1]:
                        nextWeekDp[j] = (vdays, flights)

            thisWeekDp = nextWeekDp

        # Find the maximum vacation days with minimum flights
        best = (0, float('inf'))
        for choice in thisWeekDp:
            if choice[0] > best[0]:
                best = choice
            elif choice[0] == best[0] and choice[1] < best[1]:
                best = choice

        return best

# Example usage:
days = [
    [1, 3, 1],
    [6, 0, 3],
    [3, 3, 3]
]

solution = Solution()
result = solution.maxVacationDays2(days)
print(result)  # Output: (9, 1) -> 9 vacation days with 1 flight


# 12 Percentile Calculation


This problem likely involves percentile calculations, and it consists of two parts:


- Part 1: Given a percentile (e.g., 90th percentile), calculate the boundary value at that percentile in a dataset.


- Part 2: Given a specific value, determine what percentile this value corresponds to within the dataset.

In [None]:
class PercentileCalculator:
    def __init__(self, data):
        # Sort the data when initializing
        self.data = sorted(data)

    # Part 1: Calculate the boundary value for a given percentile without numpy
    def calculate_percentile_boundary(self, percentile):
        if percentile < 0 or percentile > 100:
            raise ValueError("Percentile must be between 0 and 100")

        # Find the rank/index for the given percentile
        n = len(self.data)
        rank = (percentile / 100) * (n - 1)
        
        # If the rank is an integer, return the exact value
        if rank.is_integer():
            return self.data[int(rank)]
        else:
            # If not, interpolate between the two nearest ranks
            lower_idx = int(rank)
            upper_idx = lower_idx + 1
            lower_value = self.data[lower_idx]
            upper_value = self.data[upper_idx]
            fraction = rank - lower_idx
            return lower_value + (upper_value - lower_value) * fraction

    # Part 2: Find the percentile that a specific value falls into
    def find_percentile_of_value(self, value):
        # Count how many values are less than the given value
        count_less_than_value = len([x for x in self.data if x < value])
        # Percentile is the proportion of values less than the given value
        return (count_less_than_value / len(self.data)) * 100


# Example usage:
data = [15, 20, 35, 40, 50, 55, 60, 65, 70, 85, 90]
calculator = PercentileCalculator(data)

# Part 1: Calculate the boundary value for the 90th percentile
boundary_90th = calculator.calculate_percentile_boundary(90)
print(f"90th percentile boundary value: {boundary_90th}")  # Expected output: around 85

# Part 2: Find the percentile for a specific value
value = 60
percentile_of_value = calculator.find_percentile_of_value(value)
print(f"The value {value} falls in the {percentile_of_value} percentile")  # Expected output: around 50-60


# 13 List of Buckets


Input:


- A list of integers.


- Number of buckets.


- Bucket width.


Output:


- The count of integers in each bucket.


Example:


For the list [1, 2, 11, 20, 100], 3 buckets, and a bucket width of 10:


- Bucket 0-9: Contains 1, 2, so the count is 2.


- Bucket 10-19: Contains 11, so the count is 1.


- Bucket 20+: Contains 20, 100, so the count is 2.

In [None]:
def bucketize(nums, num_buckets, bucket_width):
    # Create a list to store the counts of each bucket
    buckets = [0] * num_buckets
    last_bucket = 0

    # Iterate through the list of numbers
    for num in nums:
        # Determine which bucket the number falls into
        bucket_index = min(num // bucket_width, num_buckets - 1)
        buckets[bucket_index] += 1

    # Output the results in the required format
    for i in range(num_buckets - 1):
        start = i * bucket_width
        end = (i + 1) * bucket_width - 1
        print(f"{start}-{end}: {buckets[i]}")
    
    # Handle the last bucket (20+ in this example)
    start = (num_buckets - 1) * bucket_width
    print(f"{start}+: {buckets[num_buckets - 1]}")

# Example usage:
nums = [1, 2, 11, 20, 100]
num_buckets = 3
bucket_width = 10

bucketize(nums, num_buckets, bucket_width)


# 14 Complete the Missing Points


You are given a list of coordinates, and the points between each pair of consecutive points are missing. The missing points need to be interpolated at an x-coordinate interval, ensuring that the new points lie on a straight line between the given points.


For example, given an interval of 5, interpolate the missing points between consecutive points at increments of 5 along the x-axis, while keeping the points on the straight line between them.


Example:


- Input: [(0, 10), (10, 10), (20, -10)]


- Interval: 5


- Output: [(0, 10), (5, 20), (10, 10), (15, 0), (20, -10)]

In [None]:
def interpolate_points(points, interval):
    result = []
    
    # Iterate over each consecutive pair of points
    for i in range(len(points) - 1):
        x1, y1 = points[i]
        x2, y2 = points[i + 1]
        
        # Add the starting point
        result.append((x1, y1))
        
        # Calculate the slope (m) between two points
        slope = (y2 - y1) / (x2 - x1)
        
        # Interpolate points between x1 and x2 with step = interval
        x = x1 + interval
        while x < x2:
            y = y1 + slope * (x - x1)  # Calculate the y value at x
            result.append((x, round(y)))  # Append the interpolated point
            x += interval
    
    # Add the last point
    result.append(points[-1])
    
    return result

# Example usage:
points = [(0, 10), (10, 10), (20, -10)]
interval = 5
output = interpolate_points(points, interval)
print(output)


# 15 List and Delete All Files

You are given a file system API, and the task is to implement an API that lists and deletes all files under a given path. After that, you are also asked to consider how to reduce resource usage.

In [None]:
class FileSystem:
    def __init__(self):
        self.fs = {}  # Mock file system representation

    def findList(self, path):
        """Simulate listing all items in the directory"""
        if path in self.fs and isinstance(self.fs[path], list):
            return self.fs[path]
        return []

    def delete(self, path):
        """Simulate deleting a file or directory"""
        if path in self.fs:
            del self.fs[path]
            print(f"Deleted: {path}")

    def isDir(self, path):
        """Check if the path is a directory"""
        return path in self.fs and isinstance(self.fs[path], list)

class Solution:
    def delete_all_under_path(self, path, fs):
        """Recursively list and delete all files under the given path"""
        if fs.isDir(path):
            # List all children of the directory
            children = fs.findList(path)
            # Recursively delete all contents
            for child in children:
                child_path = f"{path}/{child}"
                self.delete_all_under_path(child_path, fs)
            # After deleting all children, delete the directory itself
            fs.delete(path)
        else:
            # If it's a file, delete it directly
            fs.delete(path)

# Example Usage:
fs = FileSystem()

# Example mock file system
fs.fs = {
    "/root": ["file1.txt", "dir1"],
    "/root/file1.txt": "content of file1",
    "/root/dir1": ["file2.txt"],
    "/root/dir1/file2.txt": "content of file2"
}

solution = Solution()

# Delete all files under /root
solution.delete_all_under_path("/root", fs)

# 16 Max Path Sum from Root to Leaf

You are tasked with finding the maximum path sum from the root to a leaf in a binary tree or an n-ary tree. The path sum is the sum of the values of the nodes along the path from the root to a leaf node.


- In a binary tree, each node has at most two children.


- In an n-ary tree, each node can have up to n children.


- Tree becomes graph (DAG)

In [None]:
class TreeNode:
    def __init__(self, val=0, left=None, right=None):
        self.val = val
        self.left = left
        self.right = right

class Solution:
    def maxPathSum(self, root: TreeNode) -> int:
        if not root:
            return 0
        
        # Base case: if the node is a leaf node, return its value
        if not root.left and not root.right:
            return root.val
        
        # Recursively find the max path sum of left and right subtrees
        left_max = self.maxPathSum(root.left) if root.left else float('-inf')
        right_max = self.maxPathSum(root.right) if root.right else float('-inf')
        
        # Return the current node's value plus the larger of the two child path sums
        return root.val + max(left_max, right_max)

# Example usage:
# Constructing the binary tree:
#        5
#      /   \
#     4     8
#    /     / \
#   11    13  4
#  /  \        \
# 7    2        1

root = TreeNode(5)
root.left = TreeNode(4)
root.right = TreeNode(8)
root.left.left = TreeNode(11)
root.left.left.left = TreeNode(7)
root.left.left.right = TreeNode(2)
root.right.left = TreeNode(13)
root.right.right = TreeNode(4)
root.right.right.right = TreeNode(1)

solution = Solution()
print(solution.maxPathSum(root))  # Output: 27 (5 -> 8 -> 4 -> 1)

In [None]:
class Node:
    def __init__(self, val=0, children=None):
        self.val = val
        self.children = children if children is not None else []

class Solution:
    def maxPathSum(self, root: Node) -> int:
        if not root:
            return 0
        
        # Base case: if it's a leaf node (no children), return its value
        if not root.children:
            return root.val
        
        # Recursively find the maximum path sum of all children
        max_child_sum = float('-inf')
        for child in root.children:
            max_child_sum = max(max_child_sum, self.maxPathSum(child))
        
        # Return the current node's value plus the maximum path sum from its children
        return root.val + max_child_sum

# Example usage:
# Constructing the N-ary tree:
#        1
#      / | \
#     3  2  4
#    / \
#   5   6

root = Node(1)
root.children = [Node(3, [Node(5), Node(6)]), Node(2), Node(4)]

solution = Solution()
print(solution.maxPathSum(root))  # Output: 11 (1 -> 3 -> 6)

In [None]:
class GraphNode:
    def __init__(self, value):
        self.value = value
        self.neighbors = []

class Solution:
    def maxPathSum(self, node, visited=None, memo=None):
        if visited is None:
            visited = set()
        if memo is None:
            memo = {}

        # If we have already computed the maximum path sum from this node, return it
        if node in memo:
            return memo[node]

        # Mark this node as visited
        visited.add(node)

        # Base case: If the node has no neighbors, it's a terminal node
        if not node.neighbors:
            memo[node] = node.value
            visited.remove(node)
            return node.value

        # Recursively compute the maximum path sum for each neighbor
        max_sum = float('-inf')
        for neighbor in node.neighbors:
            if neighbor not in visited:
                max_sum = max(max_sum, self.maxPathSum(neighbor, visited, memo))

        # The maximum path sum starting from this node is its value plus the max of its neighbors
        memo[node] = node.value + max_sum
        visited.remove(node)
        return memo[node]

# Example usage:
# Create graph nodes
nodeA = GraphNode(5)
nodeB = GraphNode(3)
nodeC = GraphNode(8)
nodeD = GraphNode(7)
nodeE = GraphNode(2)

# Construct graph (directed edges)
nodeA.neighbors = [nodeB, nodeC]  # A -> B, A -> C
nodeB.neighbors = [nodeD]         # B -> D
nodeC.neighbors = [nodeD, nodeE]  # C -> D, C -> E
nodeD.neighbors = []              # D is a terminal node
nodeE.neighbors = []              # E is a terminal node

# Find the maximum path sum starting from nodeA
solution = Solution()
print(solution.maxPathSum(nodeA))  # Output should be the maximum path sum (5 -> 8 -> 7 or 5 -> 3 -> 7)

# 17 Children Jobs


You are given a list of jobs, where each job has a duration and a list of dependent jobs (children jobs). Starting from a given job id, you need to calculate the total time required to finish the job and all its dependent jobs.

In [None]:
from collections import defaultdict

class Solution:
    def __init__(self):
        # Graph to store job dependencies
        self.graph = defaultdict(list)
        # Dictionary to store job durations
        self.durations = {}
    
    def build_graph(self, jobs):
        """ Build the graph and duration dictionary from the input job list """
        for job_id, duration, children in jobs:
            self.durations[job_id] = duration
            for child in children:
                self.graph[job_id].append(child)
    
    def total_duration(self, job_id):
        """ Calculate the total duration for the job and its dependencies """
        # DFS to calculate the total time including dependencies
        def dfs(job):
            total_time = self.durations[job]
            # Recursively calculate the time for all dependent jobs
            for child in self.graph[job]:
                total_time += dfs(child)
            return total_time
        
        return dfs(job_id)

# Example usage:
jobs = [
    (1, 5, [2, 3]),    # Job 1 takes 5 time units and has children jobs 2 and 3
    (2, 3, []),        # Job 2 takes 3 time units and has no children
    (3, 2, [4]),       # Job 3 takes 2 time units and has a child job 4
    (4, 4, [])         # Job 4 takes 4 time units and has no children
]

# Create solution instance
solution = Solution()
solution.build_graph(jobs)

# Calculate total duration starting from job 1
print(solution.total_duration(1))  # Output: 14 (5 + 3 + 2 + 4)


# 18 Interview List Intersections (LC 986)

You are given two lists of closed intervals, firstList and secondList, where firstList[i] = [starti, endi] and secondList[j] = [startj, endj]. Each list of intervals is pairwise disjoint and in sorted order.


Return the intersection of these two interval lists.


A closed interval [a, b] (with a <= b) denotes the set of real numbers x with a <= x <= b.


The intersection of two closed intervals is a set of real numbers that are either empty or represented as a closed interval. For example, the intersection of [1, 3] and [2, 4] is [2, 3].

In [None]:
class Solution:
    def intervalIntersection(self, firstList: List[List[int]], secondList: List[List[int]]) -> List[List[int]]:
        # set output and two pointers
        res = []
        i, j = 0, 0
        # iterate: find latest start and earliest end
        while i < len(firstList) and j < len(secondList):
            new_start = max(firstList[i][0], secondList[j][0])
            new_end = min(firstList[i][1], secondList[j][1])
            # intersection: latest start <= earliest end
            if new_start <= new_end:
                res.append([new_start, new_end])
            # increment the pointer of smaller end time
            if firstList[i][1] < secondList[j][1]:
                i += 1
            else:
                j += 1
        return res
        

# 19 Find Mutual Sreams


You are given two parameters:


- An array of streams: Each stream contains keywords separated by |.


- An array of keywords: You need to find the streams where all the provided keywords are present.

In [None]:
def find_mutual_streams(streams, keywords):
    # Convert keywords to a set for faster lookup
    keywords_set = set(keywords)
    result = []

    # Iterate through each stream
    for stream in streams:
        # Split the stream into individual keywords
        stream_keywords = set(stream.split('|'))
        
        # Check if all the keywords are present in this stream
        if keywords_set.issubset(stream_keywords):
            result.append(stream)

    return result

# Example usage:
streams = [
    "apple|banana|cherry",
    "banana|cherry|date",
    "apple|cherry|date",
    "apple|banana|date"
]

keywords = ["apple", "cherry"]

# Find streams containing all the keywords
mutual_streams = find_mutual_streams(streams, keywords)
print(mutual_streams)
