# Strings

In [2]:
# Python testing module
import ipytest
ipytest.autoconfig()
import pytest

## [Longest Substring Without Repeating Characters](https://leetcode.com/problems/longest-substring-without-repeating-characters/)

In [3]:
# Solution
# We are gonna need a dict to store what all elements we have seen till now and at what position
# We are also gonna need a start position to get the start position of the current string and max_len to keep
# record of the length of the longest substring yet. 
# If we find an element again we are gonna calculate the length of current substring and compare with max_ len and set
# the max of both as max_len. If not we update the dict with the current element found and value as its position and
# keep going till the end

In [21]:
# Implementation
def lengthOfLongestSubstring(s):
    max_len = start = 0
    used = {}
    for i, c in enumerate(s):
        if c in used and start <= used[c]:
            start = used[c] + 1
        else:
            max_len = max(max_len, i - start + 1)
        used[c] = i
    return max_len

In [22]:
%%ipytest 
def test_lengthOfLongestSubstring():
    assert lengthOfLongestSubstring("abcabcbb") == 3
    assert lengthOfLongestSubstring("bbbbb") == 1
    assert lengthOfLongestSubstring("pwwkew") == 3

[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.01s[0m[0m


## [Longest Repeating Character Replacement](https://leetcode.com/problems/longest-repeating-character-replacement/)

In [25]:
# Solution
# Another tricky question which can be done in O(n) time. First we need two pointers left and right. We start from the
# start of the array and keep track of the count of each chars in the interval. If the count of most occuring element
# in a particular interval is at max 'k' less than the length of that interval then that interval can be updated k 
# times to get longest repeating character with k replacements. We keep track of the max length of such interval
# while iterating over the string and we will have another TADA at the end.

In [30]:
# Implementation
def characterReplacement(s, k):
    # counter will contain the counts of character in the interval in consideration
    c_dict = {}
    max_len = left = 0
    
    # moving the right pointer
    for right, c in enumerate(s):
        
        # add to dict if not present
        if c not in c_dict:
            c_dict[c] = 0
        # update the counter by 1
        c_dict[c] += 1
        # No of items in the current interval
        cell_count = right - left + 1
        if cell_count - max(c_dict.values()) <= k:
            max_len = max(max_len, cell_count)
        else:
            # We need to reduce the interval from left
            c_dict[s[left]] -= 1
            # reduce the count from the char counter
            if c_dict[s[left]] == 0:
                c_dict.pop(s[left])
            left +=  1
    return max_len
            

In [31]:
%%ipytest 
def test_characterReplacement():
    assert characterReplacement(s = "ABAB", k = 2) == 4
    assert characterReplacement(s = "AABABBA", k = 1) == 4


[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.01s[0m[0m


## [Minimum Window Substring](https://leetcode.com/problems/minimum-window-substring/)

In [55]:
# Solution
# This is hard. We need to return the substring itself so we need to keep track of the start and end pointer to the
# result substring. Initially it will be both zeroes. We also need a counter to count each character in the t variable
# We will store the length of t in the 'missing' variable. We will loop through the string using a window which will 
# use another start and end string, lets say i, j. When we see an element, check if it's in the counter and decrement
# the count by one and also update the number of missing elements by subtracting one from it. Once we have the missing
# variable as zero, that means we have all the variables we need and more. So we can start trimming from the start to 
# arrive at the actual start my eliminating unnecessary elements. You might thing that's all. Nope. We will update the
# start and end pointer and then remove the first element from the counter and decrease the missing count by one.
# Why you ask, so that we can find even smaller substring further in the array if it's there

In [76]:
from collections import Counter
def minWindow(s, t):
    need = Counter(t)
    missing = len(t)
    start, end = 0 , 0
    i = 0
    for j, c in enumerate(s, 1):
        if need[c] > 0:
            missing -= 1
        need[c] -= 1
        if missing == 0:
            while i < j and need[s[i]] < 0:
                need[s[i]] += 1
                i += 1
            if end == 0 or j - i < end - start:
                start, end = i, j
            need[s[i]] += 1
            missing += 1
            i += 1
    return s[start:end]

In [75]:
%%ipytest 
def test_minWindow():
    assert minWindow(s = "ADOBECODEBANC", t = "ABC") == "BANC"
    assert minWindow(s = "a", t = "a") == "a"
    assert minWindow(s = "a", t = "aa") == ""


[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.01s[0m[0m


In [77]:
minWindow(s = "ADOBECODEBANC", t = "ABC")

need : Counter({'A': 1, 'B': 1, 'C': 1}), c : A
need : Counter({'B': 1, 'C': 1, 'A': 0}), c : D
need : Counter({'B': 1, 'C': 1, 'A': 0, 'D': -1}), c : O
need : Counter({'B': 1, 'C': 1, 'A': 0, 'D': -1, 'O': -1}), c : B
need : Counter({'C': 1, 'A': 0, 'B': 0, 'D': -1, 'O': -1}), c : E
need : Counter({'C': 1, 'A': 0, 'B': 0, 'D': -1, 'O': -1, 'E': -1}), c : C
need : Counter({'A': 1, 'B': 0, 'C': 0, 'D': -1, 'O': -1, 'E': -1}), c : O
need : Counter({'A': 1, 'B': 0, 'C': 0, 'D': -1, 'E': -1, 'O': -2}), c : D
need : Counter({'A': 1, 'B': 0, 'C': 0, 'E': -1, 'D': -2, 'O': -2}), c : E
need : Counter({'A': 1, 'B': 0, 'C': 0, 'D': -2, 'O': -2, 'E': -2}), c : B
need : Counter({'A': 1, 'C': 0, 'B': -1, 'D': -2, 'O': -2, 'E': -2}), c : A
need : Counter({'C': 1, 'A': 0, 'B': 0, 'D': -1, 'O': -1, 'E': -1}), c : N
need : Counter({'C': 1, 'A': 0, 'B': 0, 'D': -1, 'O': -1, 'E': -1, 'N': -1}), c : C


'BANC'

In [80]:
tuple(sorted("bad"))

('a', 'b', 'd')

## [Group Anagrams](https://leetcode.com/problems/group-anagrams/)

In [81]:
# Solution
# We can use a dictionary with sorted string as the key and append the string to its value which is a list.
# We will have our solution after a for loop in dict.values()

In [85]:
# Implementation
def groupAnagrams(strs):
    groups = {}
    for item in strs:
        s_item = tuple(sorted(item))
        groups[s_item] = groups.get(s_item, []) + [item]
    return groups.values()

In [86]:
%%ipytest 
def test_groupAnagrams():
    assert groupAnagrams(["eat","tea","tan","ate","nat","bat"]) == [["bat"],["nat","tan"],["ate","eat","tea"]]
    assert groupAnagrams([""]) == [[""]]
    assert groupAnagrams(["a"]) == [["a"]]


[31mF[0m[31m                                                                                            [100%][0m
[31m[1m________________________________________ test_groupAnagrams ________________________________________[0m

    [94mdef[39;49;00m [92mtest_groupAnagrams[39;49;00m():
>       [94massert[39;49;00m groupAnagrams([[33m"[39;49;00m[33meat[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mtea[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mtan[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mate[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mnat[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mbat[39;49;00m[33m"[39;49;00m]) == [[[33m"[39;49;00m[33mbat[39;49;00m[33m"[39;49;00m],[[33m"[39;49;00m[33mnat[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mtan[39;49;00m[33m"[39;49;00m],[[33m"[39;49;00m[33mate[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33meat[39;49;00m[33m"[39;49;00m,[33m"[39;49;00m[33mtea[39;49;00m[33m"[39;49;00m]

In [87]:
groupAnagrams(["eat","tea","tan","ate","nat","bat"])

dict_values([['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']])

In [88]:
# I don't know how to test this. But it works

## [Largest Number](https://leetcode.com/problems/largest-number/)

In [94]:
# Implementation
from functools import cmp_to_key
def largestNumber(nums):
    for i, num in enumerate(nums):
        nums[i] = str(num)
    
    def compare(a, b):
        if a + b > b + a:
            return -1
        else:
            return 1
    nums = sorted(nums, key = cmp_to_key(compare))
    num = "".join(nums)
    return str(int(num))

In [95]:
%%ipytest 
def test_largestNumber():
    assert largestNumber([10,2]) == '210'
    assert largestNumber([3,30,34,5,9]) == "9534330"


[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.01s[0m[0m


## [Valid Parentheses](https://leetcode.com/problems/valid-parentheses/)