## Data Structures and Algorithms

In [1]:
# Any sequence (or iterable) can be unpacked into variables using a simple assignment
# operation. The only requirement is that the number of variables and structure match
# the sequence.

x, y = (1, 2)

x, y

(1, 2)

In [3]:
# In case of arbitrary iterable, recommended to use star notation:

*first_N, last_element = [1,2,3,4,5,6,7]
first_N, last_element

([1, 2, 3, 4, 5, 6], 7)

In [4]:
# Also works in the middle and last elements

first_element, *middle_elements, last_element = [1,2,3,4,5]
first_element, middle_elements, last_element

(1, [2, 3, 4], 5)

In [5]:
# Deque (Doubly Ended Queue) in Python is implemented using the module “collections“.
#  Deque is preferred over list in the cases where we need quicker append and pop operations
#  from both the ends of container, as deque provides an O(1) time complexity for append and
#  pop operations as compared to list which provides O(n) time complexity.

In [3]:
from collections import deque

queue = deque([1,2,3,4,5,6])

In [4]:
queue.append(7)
queue

deque([1, 2, 3, 4, 5, 6, 7])

In [5]:
queue.appendleft(0)
queue

deque([0, 1, 2, 3, 4, 5, 6, 7])

In [6]:
queue.popleft()
queue

deque([1, 2, 3, 4, 5, 6, 7])

In [7]:
# Set maxlen in order to limit the size
limited_queue = deque([1,2,3], maxlen = 3)
limited_queue

deque([1, 2, 3])

In [8]:
limited_queue.append(4)
limited_queue

deque([2, 3, 4])

In [14]:
# Generators: any function containing a yield statement
# These objects only runs in response to iteration
def frange(start, stop, increment):
    x = start
    while x < stop:
        yield x
        x += increment


In [16]:
# Example iterating through it as a loop for
for i in frange(0, 4, 0.5):
    print(i)

0
0.5
1.0
1.5
2.0
2.5
3.0
3.5


In [17]:
# In case we call it without iteration: 
frange(0, 4, 0.5)

<generator object frange at 0x7fd16e736270>

In [18]:
# Other way is using next(iterator) method:

def countdown(n):
    print('Starting the countdown at position', n)
    while n > 0:
        yield n 
        n -= 1
    print('Finished!')

In [20]:
countdown_object = countdown(4)

In [22]:
next(countdown_object)

Starting the countdown at position 4


4

In [23]:
next(countdown_object)

3

In [24]:
next(countdown_object)

2

In [25]:
next(countdown_object)

1

In [26]:
next(countdown_object)

Finished!


StopIteration: 

In [27]:
# Find the Largest or Smallest N items:
# With the help of heapq module, we can solve it in an easy way

import heapq

In [28]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

[42, 37, 23]
[-4, 1, 2]


In [29]:
# It can also work with other more complex objects like
# dictionaries by the keyword 'key':

portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

the_cheapests = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
the_most_expensives = heapq.nlargest(3, portfolio, key=lambda s: s['price'])

the_cheapests, the_most_expensives

([{'name': 'YHOO', 'shares': 45, 'price': 16.35},
  {'name': 'FB', 'shares': 200, 'price': 21.09},
  {'name': 'HPQ', 'shares': 35, 'price': 31.75}],
 [{'name': 'AAPL', 'shares': 50, 'price': 543.22},
  {'name': 'ACME', 'shares': 75, 'price': 115.65},
  {'name': 'IBM', 'shares': 100, 'price': 91.1}])

In [30]:
# Multidict: dictionaries which supports more than one element for each key.
# As easy as using lists or sets. deaultdict can be used in order to make initialisation
# easier:

from collections import defaultdict

In [31]:
my_list_dict = defaultdict(list)
my_list_dict['a'].append(1)
my_list_dict['b'].append(1)
my_list_dict['c'].append(1)
my_list_dict

defaultdict(list, {'a': [1], 'b': [1], 'c': [1]})

In [33]:
# When can then add more items to the key we want to:

my_list_dict['a'].append(2)
my_list_dict

defaultdict(list, {'a': [1, 2], 'b': [1], 'c': [1]})

In [34]:
# Preserving insertion order in dictionaries when iteration over it:
# best way is to use OrderedDict from collections:

from collections import OrderedDict

In [35]:
my_order_dict = OrderedDict()
my_order_dict['foo'] = 1
my_order_dict['bar'] = 2
my_order_dict['spam'] = 3
my_order_dict['grok'] = 4
# Outputs "foo 1", "bar 2", "spam 3", "grok 4"
for key in my_order_dict:
    print(key, my_order_dict[key])

foo 1
bar 2
spam 3
grok 4


In [38]:
# Getting max value from a dictionary
# In case we dont need to know the key (unlikely):

aux_dict = {'a': 5, 'b': 2, 'c': 6, 'd': 1}
max(aux_dict.values())

6

In [42]:
# In case we do need the key, best option is using keyword 'key'
max(aux_dict, key = lambda a: aux_dict[a])

'c'

In [44]:
# In case we need both, then we need an additional step:
max(aux_dict, key = lambda a: aux_dict[a]), aux_dict[max(aux_dict, key = lambda a: aux_dict[a])]

('c', 6)

In [46]:
zip(aux_dict.items())

<zip at 0x7fd16e55f300>

In [47]:
# in order to increase efficiency, best option is using zip by switching keys and values:

max(zip(aux_dict.values(), aux_dict.keys()))

(6, 'c')

In [48]:
# IMPORTANT: zip method creates an iterator that con only be consumed once
my_zip = zip(aux_dict.values(), aux_dict.keys())

In [49]:
max(my_zip)

(6, 'c')

In [50]:
min(my_zip)

ValueError: min() arg is an empty sequence

In [51]:
# In order to find commonalities in two dictionaries, it is usefull using the
# set operations on keys (the act as sets)

dict_a = {'x': 3, 'y': 5}
dict_b = {'y': 2, 'z': 9}

dict_a.keys() & dict_b.keys()

{'y'}

In [52]:
# Get frecuency of appearence in a list: best option is use a Counter:


from collections import Counter
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]

In [53]:
counter = Counter(words)

In [54]:
# Is a dictionary containing frequency of every item in the list
counter

Counter({'look': 4,
         'into': 3,
         'my': 3,
         'eyes': 8,
         'the': 5,
         'not': 1,
         'around': 2,
         "don't": 1,
         "you're": 1,
         'under': 1})

In [56]:
# In case more words appear in scene, it is algo possible make operations withing
# counters, example: 

morewords = ['why','are','you','not','looking','in','my','eyes']
new_counter = Counter(morewords)

In [59]:
new_counter 

Counter({'why': 1,
         'are': 1,
         'you': 1,
         'not': 1,
         'looking': 1,
         'in': 1,
         'my': 1,
         'eyes': 1})

In [61]:
counter + new_counter

Counter({'look': 4,
         'into': 3,
         'my': 4,
         'eyes': 9,
         'the': 5,
         'not': 2,
         'around': 2,
         "don't": 1,
         "you're": 1,
         'under': 1,
         'why': 1,
         'are': 1,
         'you': 1,
         'looking': 1,
         'in': 1})

In [65]:
# Sorting objects with no __order__ method, for example class User:
from dataclasses import dataclass
@dataclass
class User:
    id: int

In [67]:
# if we want to sort users based on its id values:
# sort operation, as max, min etc also allows setting a key param
# the solution consist of declaring the property of the object
# we wanna use in order to compare by attrgetter()

from operator import attrgetter
users_list = [User(1), User(9), User(2)]
sorted(users_list, key = attrgetter('id'))

[User(id=1), User(id=2), User(id=9)]

In [68]:
# Most of these operations are easily handle with pandas
# In case we have a list of diccionary and wanna convert it to pd.DataFrame:
import random
item_name_choices = ['item_1', 'item_2', 'item_3']
created_list_of_dicts = [{'item_name': random.choice(item_name_choices), 'n_sold': random.randint(0,100)} for i in range(1000)]

In [70]:
import pandas as pd

In [71]:
my_df = pd.DataFrame(created_list_of_dicts)

In [74]:
# Now all operations are done with pandas which is a library meant to solve 
# this type of math problems
# we can group by item_name and get the total n_sold, or avg

my_df_agr = my_df.groupby('item_name').agg({'n_sold': ['sum', 'mean']})

In [75]:
my_df_agr

Unnamed: 0_level_0,n_sold,n_sold
Unnamed: 0_level_1,sum,mean
item_name,Unnamed: 1_level_2,Unnamed: 2_level_2
item_1,16227,50.39441
item_2,17123,49.063037
item_3,17316,52.632219


In [87]:
# Extracting a subset of a dictionary:
# We can use dict comprehension:

prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}

In [90]:
# ke a dictionary of all prices over 200
p1 = { key:value for key, value in prices.items() if value > 200 }
p1

{'AAPL': 612.78, 'IBM': 205.55}

In [92]:
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
p2 = { key:value for key,value in prices.items() if key in tech_names }
p2

{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}

In [80]:
# In order to merge dictionaries:
dict_a = {'x': 3, 'y': 5}
dict_b = {'y': 2, 'z': 9}


In [81]:
# If we wanna update dict a:
dict_a.update(dict_b)

In [82]:
dict_a

{'x': 3, 'y': 2, 'z': 9}

In [85]:
# In case we wanna create a merged dict preserving the original ones:
dict_a = {'x': 3, 'y': 5}
dict_b = {'y': 2, 'z': 9}
dict_merged = dict()
dict_merged.update(dict_a)
dict_merged.update(dict_b)

In [86]:
dict_merged

{'x': 3, 'y': 2, 'z': 9}