- While Python doesn't have a single "framework" like Java Collections or C++ STL, its **built-in** types and the **collections** module are highly versatile and provide all the tools you need for efficient data manipulation
- Python has the **collections module** and **several built-in** data structures that serve a similar purpose to the C++ STL and Java Collections Framework.

## **List DS: (equivalent of the ArrayList in Java)**

### Initilization

In [1]:
my_list = []           # Empty list
my_list = [1, 2, 3, 4] # List with initial values 

my_list

[1, 2, 3, 4]

### Iterating over a list

In [2]:
my_list = [4, 3, 2, 1] # List with initial values 
n = len(my_list)

# iterate on the indices of a list
for index in range( n ):
    print( my_list[index], end = " ")

print()

# iterating directly over the list
for value in my_list:
    print( value , end = " ")
    
print()

# iterate on the indices and value together
for index, value in enumerate(my_list):
    print( (index, value) , end = " ")

4 3 2 1 
4 3 2 1 
(0, 4) (1, 3) (2, 2) (3, 1) 

### Iterating over a reversed list

In [3]:
# 1. Using reversed()

for value in reversed(my_list):
    print(value, end = ", ")
    
# Time:  O(n) each element is yielded atleast once
# Space: O(1) returns an iterator, no new list is created
# does not modify the original list
# memory-efficient reverse iteration when you don't need a reversed copy
print()

# 2. Using List Slicing my_list[::-1]
for value in my_list[::-1]:
    print(value, end = ", ")
    
# Time:  O(n) slicing creates a new list by copying all n elements
# Space: O(n) extra space since a NEW reverse list is created
# does not modify the original list

print()

# 3. using list.reverse() method

my_list.reverse()
for value in my_list:
    print(value, end = ", ")
    
# Time: O(n) for in-place reversal, plus O(n) to iterate
# Space: O(1) extra space
# modifes the orignal array
print()

# 4. Using a Reverse Index Loop
for i in range(len(my_list)-1, -1, -1):
    print( my_list[i] , end = ", ")
    
# Time: O(n)
# Space: O(1)
# Does NOT modify the original list
# more verbose than reversed() but equally efficeint

1, 2, 3, 4, 
1, 2, 3, 4, 
1, 2, 3, 4, 
4, 3, 2, 1, 

### Various list methods

In [4]:
def custom_print():
    print(f"current list: {my_list} Returned Element: {my_return}")

my_list = []

my_return = my_list.append(5)          # Add an element to the end
custom_print()

my_return = my_list.extend([1210, 77]) # Add multiple elements=
custom_print()

my_return = my_list.insert(0, 10000)   # Insert at a specific position
custom_print()

my_return = my_list.pop()              # Remove and return the last element
custom_print()

my_return = my_list.sort()             # in-place sorting the list
custom_print()

my_return = my_list.reverse()          # in-place reversing the list
custom_print()

current list: [5] Returned Element: None
current list: [5, 1210, 77] Returned Element: None
current list: [10000, 5, 1210, 77] Returned Element: None
current list: [10000, 5, 1210] Returned Element: 77
current list: [5, 1210, 10000] Returned Element: None
current list: [10000, 1210, 5] Returned Element: None


### Sorting a List based on (Multiple) Criteria

In [5]:
data = [(2, 'cherry'), (1, 'date'), (3, 'apple'), (1, 'banana'), (3, 'grape'), (2, "berry")]

# increasing order
sorted_by_first  = sorted(data, key = lambda item : item[0])
sorted_by_second = sorted(data, key = lambda item : item[1])
sorted_by_first_then_second = sorted(data, key = lambda item : (item[0], item[1]) )
sorted_by_second_then_first = sorted(data, key = lambda item : (item[1], item[0]) )

print(f"sorted_by_first: {sorted_by_first}")
print(f"sorted_by_second: {sorted_by_second}")
print(f"sorted_by_first_then_second: {sorted_by_first_then_second}")
print(f"sorted_by_second_then_first: {sorted_by_second_then_first}")

print()

# decreasing order
rev1_sorted_by_first  = sorted( data, key = lambda item : -item[0])
rev2_sorted_by_first  = sorted( data, key = lambda item : item[0], reverse = True )

rev1_sorted_by_second = sorted( data, key = lambda item : -ord(item[1][0]) ) # item[1] works but -item[1] does not
rev2_sorted_by_second = sorted( data, key = lambda item : item[1], reverse=True ) # item[1] works but -item[1] does not

rev1_sorted_by_first_second = sorted( data, key = lambda item : (-item[0], -ord(item[1][0])) )
rev2_sorted_by_first_second = sorted( data, key = lambda item : (item[0], item[1]), reverse = True )

rev1_sorted_by_second_first = sorted( data, key = lambda item : (-ord(item[1][0]), -item[0]) )
rev2_sorted_by_second_first = sorted( data, key = lambda item : (item[1], item[0]), reverse=True )

print(f"rev1_sorted_by_first: {rev1_sorted_by_first}")
print(f"rev2_sorted_by_first: {rev2_sorted_by_first}")

print(f"rev1_sorted_by_second: {rev1_sorted_by_second}")
print(f"rev2_sorted_by_second: {rev2_sorted_by_second}")

print(f"rev1_sorted_by_first_second: {rev1_sorted_by_first_second}")
print(f"rev2_sorted_by_first_second: {rev2_sorted_by_first_second}")

print(f"rev1_sorted_by_second_first: {rev1_sorted_by_second_first}")
print(f"rev2_sorted_by_second_first: {rev2_sorted_by_second_first}")

print()

# all the above functions are also applicable when sorting in place. 
# Just use 
data.sort(key = lambda item: (item[0], -ord(item[1][0]) )) # increasing in first and decreasing in second!
print(f"sorting data in place: {data}")

sorted_by_first: [(1, 'date'), (1, 'banana'), (2, 'cherry'), (2, 'berry'), (3, 'apple'), (3, 'grape')]
sorted_by_second: [(3, 'apple'), (1, 'banana'), (2, 'berry'), (2, 'cherry'), (1, 'date'), (3, 'grape')]
sorted_by_first_then_second: [(1, 'banana'), (1, 'date'), (2, 'berry'), (2, 'cherry'), (3, 'apple'), (3, 'grape')]
sorted_by_second_then_first: [(3, 'apple'), (1, 'banana'), (2, 'berry'), (2, 'cherry'), (1, 'date'), (3, 'grape')]

rev1_sorted_by_first: [(3, 'apple'), (3, 'grape'), (2, 'cherry'), (2, 'berry'), (1, 'date'), (1, 'banana')]
rev2_sorted_by_first: [(3, 'apple'), (3, 'grape'), (2, 'cherry'), (2, 'berry'), (1, 'date'), (1, 'banana')]
rev1_sorted_by_second: [(3, 'grape'), (1, 'date'), (2, 'cherry'), (1, 'banana'), (2, 'berry'), (3, 'apple')]
rev2_sorted_by_second: [(3, 'grape'), (1, 'date'), (2, 'cherry'), (2, 'berry'), (1, 'banana'), (3, 'apple')]
rev1_sorted_by_first_second: [(3, 'grape'), (3, 'apple'), (2, 'cherry'), (2, 'berry'), (1, 'date'), (1, 'banana')]
rev2_sorted_b

## **String DS:**

In [6]:
my_string = "garrry and john"
n = len(my_string)

### Iterating over a string

In [7]:
for char in my_string:
    print(char, end = ", ")

print()

for index in range(n):
    print(index, my_string[index], end = ", ")
    
print()

for index, char in enumerate(my_string):
    print(index, char, end = ", ")

g, a, r, r, r, y,  , a, n, d,  , j, o, h, n, 
0 g, 1 a, 2 r, 3 r, 4 r, 5 y, 6  , 7 a, 8 n, 9 d, 10  , 11 j, 12 o, 13 h, 14 n, 
0 g, 1 a, 2 r, 3 r, 4 r, 5 y, 6  , 7 a, 8 n, 9 d, 10  , 11 j, 12 o, 13 h, 14 n, 

### Iterating over a reversed string

In [8]:
for char in reversed(my_string):
    print(char, end = ", ")
print()

for char in my_string[::-1]:
    print(char, end = ", ")
print()

for index in range(n-1, -1, -1):
    print(my_string[index], end = ", ")

n, h, o, j,  , d, n, a,  , y, r, r, r, a, g, 
n, h, o, j,  , d, n, a,  , y, r, r, r, a, g, 
n, h, o, j,  , d, n, a,  , y, r, r, r, a, g, 

### Various string methods

In [9]:
# Basic Methods

# Slices the string from start (inclusive) to end (exclusive). 
# Omitting start or end implies the beginning or end of the string, respectively.
start, end = 1, 5
print( my_string[start : end] ) 

# Concatenates strings
print( "hello" + " " + "world" )

# repeat strings
print( "hello_" * 5)


# Case Manipulation
print( my_string.lower() )
print( my_string.upper() ) 
print( my_string.title() ) 


# finding and replacing

# return the index of the first occurrence of substring
# return the index of the last occurrence of substring
# replace all occrrences of old with new (make sure you can do this since very important)


# Splitting and Joining

# This method splits a string into a list based on the given separator. 
# The maxsplit parameter limits the number of splits.
# string.split(separator, maxsplit)

string = "Hello world this is Python"
chunks = string.split() # Since no separator is given, it splits on whitespace.
print(chunks)

string = "apple,banana,grape,orange"
fruits = string.split(",")
print(fruits) # Splits the string wherever "," appears

string = "Python is awesome and powerful"
split_twice = string.split(" ", 2)
print(split_twice) # Only 2 splits are performed


# This method joins elements of an iterable (like a list) into a string, using separator between them.

words = ['Hello', 'world', 'this', 'is', 'Python']
string = " ".join(words)
print(string)

fruits = ['apple', 'banana', 'grape', 'orange']
csv_string = ",".join(fruits)
print(csv_string)

letters = ['A', 'B', 'C', 'D']
joined_string = "-".join(letters)
print(joined_string)

numbers = [1, 2, 3, 4, 5]
number_string = " | ".join(map(str, numbers))  # Convert numbers to strings
print(number_string, type(number_string))


arrr
hello world
hello_hello_hello_hello_hello_
garrry and john
GARRRY AND JOHN
Garrry And John
['Hello', 'world', 'this', 'is', 'Python']
['apple', 'banana', 'grape', 'orange']
['Python', 'is', 'awesome and powerful']
Hello world this is Python
apple,banana,grape,orange
A-B-C-D
1 | 2 | 3 | 4 | 5 <class 'str'>


## MAP function in python

- map(function, iterable)

- function → A function that will be applied to each element in iterable.
- iterable → The collection of elements that the function will be applied to.

Pros:
- Faster for large data
- Uses an iterator (lazy evaluation)
- Supports multiple iterables

In [10]:
numbers = [1, 2, 3, 4, 5]
squared = map(lambda x: x ** 2, numbers)
print( list(squared) )

[1, 4, 9, 16, 25]


In [11]:
words = ["hello", "world", "python"]
uppercased = map(str.upper, words) # mind you thin is not str.upper(). mind the () since map only needs a function!
print(list(uppercased))

['HELLO', 'WORLD', 'PYTHON']


In [12]:
numbers = [1, 2, 3, 4, 5]
string_numbers = map(str, numbers)
print(list(string_numbers))

['1', '2', '3', '4', '5']


In [13]:
list1 = [1, 2, 3]
list2 = [4, 5, 6]
sum_list = map(lambda x, y: x + y, list1, list2)
print(list(sum_list))

[5, 7, 9]


In [14]:
names = ["Alice", "Bob", "Charlie"]
formatted = map(lambda name: f"Hello, {name}!", names)
print(list(formatted))

['Hello, Alice!', 'Hello, Bob!', 'Hello, Charlie!']


In [15]:
numbers = [10, 20, 30]
number_string = " - ".join(map(str, numbers))
print(number_string)

10 - 20 - 30


## **Matrix DS as a List of Lists**

In [16]:
nrows = 3
ncols = 4

matrix = [[0 for _ in range(ncols)] for _ in range(nrows)]

## **Set DS: (equivalent of the HashSet in Java)**

- Sets are **unordered**: Elements in a set don't have a specific order.
- Sets contain **unique** elements: **Duplicate** values are automatically **removed**.
- Sets are **mutable**: You can add or remove elements after a set is created.
- Set operations are **very efficient**: Python's set implementations are highly optimized, making them a good choice for tasks involving:
    - membership testing
    - unions
    - intersections
    
- The computational time for different operations in a hash set includes:
    - Lookup: 
        - O(1) average, 
        - O(n) worst-case.

    - Insertion: 
        - O(1) average, 
        - O(n) worst-case.

    - Deletion: 
        - O(1) average, 
        - O(n) worst-case.

    - The space complexity for hash sets is O(n).

    - The **worst-case** scenario arises when the hash function does **not** optimally disperse values, resulting in **too many collisions**. However, these circumstances are rare. Most of the time, hash sets are fast and efficient.
    
- Some commmon problem statements that make using sets efficient are:
    - Array intersection
    - Non-repeating elements
    - Unique elements given two lists (elements in only one of the sets)
    - Unique String in the List
        - Our first problem revolves around identifying the first unique string from a list. Imagine you're working on a text analyzing tool that needs to identify the first unique word in a piece of text. This problem simulates such a real-world scenario.
    - Anagram Pairs in Two Lists

In [17]:
my_set = set()         # Empty set
my_set = {1, 2, 3, 4}  # Set with initial values

# adding elements
my_set.add(6)      # Adds 6 to the set 
my_set.update( [ i * 100 for i in range(1, 9)] )
print(f"my_set after update: {my_set}")

# rmeoving elements
my_set.remove(3)   # Removes 3 from the set (raises KeyError if not present)
my_set.discard(7)  # Removes 7 if present, but does nothing if not **(no error)**
my_set.clear()     # Removes all elements from the set


# set operations
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}


# Union (elements in either set)
union_set = set1 | set2  # {1, 2, 3, 4, 5, 6}
union_set = set1.union(set2) # Another way to find the union


# Intersection (elements in both sets)
# efficient since O( min(n, m) )
intersection_set = set1 & set2  # {3, 4}
intersection_set = set1.intersection(set2) # Another way to find the intersection


# Difference (elements in set1 but not in set2)
difference_set = set1 - set2  # {1, 2}
difference_set = set1.difference(set2) # Another way to find the difference


# Symmetric Difference (elements in either set, but not both)
symmetric_difference_set = set1 ^ set2  # {1, 2, 5, 6}
symmetric_difference_set = set1.symmetric_difference(set2) # Another way to find the symmetric difference



# Subset (set1 is a subset of set2)
is_subset = set1 <= set2  # False
is_subset = set1.issubset(set2) # Another way to check if set1 is a subset of set2.

# proper subset
is_subset = set1 < set2  
is_subset = set1.issubset(set2) 


# Superset (set1 is a superset of set2)
is_superset = set1 >= set2  # False
is_superset = set1.issuperset(set2) # Another way to check if set1 is a superset of set2.


# Disjoint (sets have no elements in common)
is_disjoint = set1.isdisjoint(set2)  # False

my_set after update: {800, 1, 2, 3, 4, 100, 6, 200, 300, 400, 500, 600, 700}


- The below code demonstrates how you can entirely clear a set or make a copy of it. Note that removing an item from the copied set does not affect the original set.

In [18]:
# Create a new list and make a copy of it
new_list = set(['Eggs', 'Jam', 'Ham'])
copied_list = new_list.copy()

print(new_list)  # Outputs: {'Eggs', 'Ham', 'Jam'}
print(copied_list)  # Outputs: {'Eggs', 'Ham', 'Jam'}

# Modifying the copied list won't affect the original list
copied_list.remove('Ham')
print(new_list)  # Outputs: {'Eggs', 'Ham', 'Jam'}
print(copied_list)  # Outputs: {'Eggs', 'Jam'}

{'Ham', 'Eggs', 'Jam'}
{'Ham', 'Eggs', 'Jam'}
{'Ham', 'Eggs', 'Jam'}
{'Eggs', 'Jam'}


In [19]:
my_set

set()

## **Dict DS: (equivalent of the HashMap in Java)**

- As we delve into the world of hash tables, let's start by understanding their underlying structure. A **hash table** consists of an **array** (the actual table where data is stored), coupled with a **hash function**. The hash function plays a crucial role - it takes the keys as input and generates an index, mapping **keys** to different **slots** or **indices** in the table.

- Each **index** of the array holds a bucket that ultimately contains the key-value pair. The pairing of keys with values enhances the data retrieval process. The efficiency of retrieving values depends on the hash function's ability to distribute data across the array uniformly.

- You can also think of **hash tables** as **hash sets** storing tuples of (key, value), but this particular interface makes it less easy to use, so Python has a concept of dictionaries we will cover below.

- Let's visualize this with a Python dictionary, which operates on the same principle. Suppose we have a dictionary containing student names as keys and their corresponding scores as values: 

In [20]:
# A simple dictionary illustrating the principle of hashing
student_scores = { 'Tom': 85, 'Serena': 92, 'Alex': 78, 'Nina': 88 }

# printing the scores
for student, score in student_scores.items():
    print(f"{student}: {score}")

# Outputs:
# Tom: 85
# Serena: 92
# Alex: 78
# Nina: 88 

Tom: 85
Serena: 92
Alex: 78
Nina: 88


- In this example, 'Tom', 'Serena', 'Alex', and 'Nina' are keys, while 85, 92, 78, and 88 are their associated values. Under the hood, the Python interpreter uses a hash function to assign each key-value pair to a unique address in memory.

## Collision Handling in Hash Tables

- There are instances when two different keys produce the same index after being processed through the hash function. This situation is known as a collision. When a collision occurs, we are faced with a dilemma - where do we store the new key-value pair since that index is already occupied?

- Here are two common strategies to handle such scenarios:

    - Chaining: In this method, each index (or bucket) in the array hosts a linked list of all key-value pairs that hash to the same index. When a collision occurs, we simply go to the collided index and append the new key-value pair to the existing linked list.

    - Open Addressing: Upon encountering a collision, the hash table searches for another free slot or index in the table (possibly the next available empty slot) and assigns that location to the new key-value pair. This approach requires a suitable probing strategy to ensure efficient use of table space.

- The image below provides a visual example of Chaining collision resolving method - John Smith and Sandra Dee have the same hash function result, so their entries are organized in a linked list in the corresponding bucket.

## Time and Space Complexity Analysis for Hash Tables

- Hash tables are renowned for their efficiency and speed when it comes to data storage and retrieval. They boast constant time complexity **O(1)** for the operations on key-value pairs: **insertion, deletion, and retrieval**. 

- This efficiency comes from a **good hash function**, which allows for keys to be uniformly distributed across the table and accessed directly via their indices, eliminating the need to scan through unnecessary slots.

- Although hash tables generally perform robustly, situations may arise where frequent collisions occur. Such situations could deteriorate the table's efficiency and extend the time complexity to a worst-case scenario of **O(n)**, where n is the number of keys hashing to the same index.

## Some Common Problems:
- Frequent Words Finder
- Password Strength Counter
- Bonus Calculator
- Majority Vote Problem with Dictionaries
- Implement a Keyword Index


## Working with Hash Tables in Python – Dictionaries

- Python provides a built-in implementation of hash tables, known as **dictionaries**. Dictionaries in Python work similarly to hash tables. They allow the use of arbitrary keys to access values and handle collisions seamlessly behind the scenes, ensuring consistent and quick access to stored data.

- You can create a dictionary with key-value pairs, access values using keys, and perform various operations such as adding new key-value pairs and deleting them, as demonstrated below:

In [21]:
# Create a Python dictionary similar to a Hash Table
book_ratings = {"Moby-Dick": 8, "The Great Gatsby": 9, "War and Peace": 10, "The Catcher in the Rye": 8}

# Access a value with its key. This happens in O(1) time
print(book_ratings["Moby-Dick"])   # Outputs: 8
# Another way to access a value with its key is by providing the default value if the key is not there. Complexity is also O(1).
print(book_ratings.get("Moby-Dick", 0)) # Outputs: 8
print(book_ratings.get("Moby Dick", 0)) # Outputs: 0

# Add a new key-value pair. The addition operation is also O(1)
book_ratings["To Kill a Mockingbird"] = 9
book_ratings["The Great Gatsby"] = 8
print(book_ratings)
# Outputs: {"Moby-Dick": 8, "The Great Gatsby": 8, "War and Peace": 10, "The Catcher in the Rye": 8, "To Kill a Mockingbird": 9}

# Remove a key-value pair. Deletion is also a constant time operation
del book_ratings["War and Peace"]
print(book_ratings)
# Outputs: {"Moby-Dick": 8, "The Great Gatsby": 9, "The Catcher in the Rye": 8, "To Kill a Mockingbird": 9}

8
8
0
{'Moby-Dick': 8, 'The Great Gatsby': 8, 'War and Peace': 10, 'The Catcher in the Rye': 8, 'To Kill a Mockingbird': 9}
{'Moby-Dick': 8, 'The Great Gatsby': 8, 'The Catcher in the Rye': 8, 'To Kill a Mockingbird': 9}


## **Tuple DS:**

In [22]:
my_tuple = ()

my_tuple = (1, 2, 3, 4)

## **Deque DS:**

In [23]:
from collections import deque

# Empty deque
my_deque = deque()

# Deque with initial values
my_deque = deque([1, 2, 3])

## Other useful libraries for interviews:
- heaps from the `heapq` module
- Counter from the _ module
- SortedList (BSTs) from the `sortedcontainers` module

Bisect Module:
----
- The `bisect` functions help you efficiently find the correct position to insert a new distance marker into your already sorted route so that the route remains sorted. They don't actually insert, they just tell you the index where the insertion should happen.

----
- `bisect_left` gives me the index where I can insert an element so it goes **before** or at the position of any existing elements that are equal to it, maintaining the sorted order.
    - `bisect_left` always returns a non-negative integer index (think about it why?)
        
----
- `bisect_right` gives me the index where I can insert an element so it goes **after** any existing elements that are equal to it (or at the end of the section of equal elements), maintaining the sorted order.

In [24]:
import bisect
route = [1, 2, 2, 3, 5, 5, 5, 8]

index_left_2 = bisect.bisect_left(route, 2)
print(f"bisect_left(route, 2) = {index_left_2}")  # Output: 1 (insert at index 1 or later)

index_right_2 = bisect.bisect_right(route, 2)
print(f"bisect_right(route, 2) = {index_right_2}") # Output: 3 (insert at index 3 or later)

print()

index_left_4 = bisect.bisect_left(route, 4)
print(f"bisect_left(route, 4) = {index_left_4}")  # Output: 4 (insert at index 4 or later)

index_right_4 = bisect.bisect_right(route, 4)
print(f"bisect_right(route, 4) = {index_right_4}") # Output: 4 (insert at index 4 or later, same as left here as no 4s exist)

print()

index_left_5 = bisect.bisect_left(route, 5)
print(f"bisect_left(route, 5) = {index_left_5}")  # Output: 4 (insert at index 4 or later)

index_right_5 = bisect.bisect_right(route, 5)
print(f"bisect_right(route, 5) = {index_right_5}") # Output: 7 (insert at index 7 or later)

print()

index_right_10 = bisect.bisect_right(route, 10)
print(f"bisect_right(route, 10) = {index_right_10}") 

bisect_left(route, 2) = 1
bisect_right(route, 2) = 3

bisect_left(route, 4) = 4
bisect_right(route, 4) = 4

bisect_left(route, 5) = 4
bisect_right(route, 5) = 7

bisect_right(route, 10) = 8
