# Data Structures and Algorithms

### Determining the Most Frequently Occurring Items in a Sequence

In [1]:
a = {"x":1,
     "y":2,
    "z":3}

In [2]:
a.values()

dict_values([1, 2, 3])

In [3]:
a = [1,2,3,4,2,5,1,5,7]

In [4]:
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]

from collections import Counter
word_counts = Counter(words)
top_three = word_counts.most_common(3)
print(top_three)

[('eyes', 8), ('the', 5), ('look', 4)]


In [5]:
more_words = ['why','are','you','not','looking','in','my','eyes']


In [6]:
a = Counter(words)
b = Counter(more_words)

a

Counter({'look': 4,
         'into': 3,
         'my': 3,
         'eyes': 8,
         'the': 5,
         'not': 1,
         'around': 2,
         "don't": 1,
         "you're": 1,
         'under': 1})

In [7]:
a + b

Counter({'look': 4,
         'into': 3,
         'my': 4,
         'eyes': 9,
         'the': 5,
         'not': 2,
         'around': 2,
         "don't": 1,
         "you're": 1,
         'under': 1,
         'why': 1,
         'are': 1,
         'you': 1,
         'looking': 1,
         'in': 1})

In [8]:
max(zip(a.values(), a.keys()))

(8, 'eyes')

### Sorting a List of Dictionaries by a Common Key

In [9]:
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

from operator import itemgetter

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))

print(rows_by_fname)
print(rows_by_uid)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]


In [10]:
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)

[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


### Sorting Objects Without Native Comparison Support

In [11]:
class User():
    def __init__(self, id): 
        self.id = id
    def __repr__(self):
        return f"User{self.id}"

In [12]:
user1 = User(15)
user2 = User(10)
user3 = User(20)
users = [user1, user2, user3]

from operator import attrgetter
sorted(users, key= attrgetter("id"))

[User10, User15, User20]

In [13]:
max(users, key=attrgetter("id"))

User20

### Grouping Records Together Based on a Field

In [14]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [15]:
from operator import itemgetter
from itertools import groupby

rows.sort(key=itemgetter("date"))
for date, items in groupby(rows, key=itemgetter("date")):
    print("                 " + date)
    for i in items:
        print(i)

                 07/01/2012
{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}
                 07/02/2012
{'address': '5800 E 58TH', 'date': '07/02/2012'}
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
{'address': '1060 W ADDISON', 'date': '07/02/2012'}
                 07/03/2012
{'address': '2122 N CLARK', 'date': '07/03/2012'}
                 07/04/2012
{'address': '5148 N CLARK', 'date': '07/04/2012'}
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


### filtering

In [16]:
values = ['1', '2', '-3', '-', '4', 'N/A', '5']

def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False

ivals = list(filter(is_int, values))
ivals

['1', '2', '-3', '4', '5']

In [17]:
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]

counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

more5 = [n > 5 for n in counts]
print(more5)

from itertools import compress
list(compress(addresses, more5))

[False, False, True, False, False, True, True, False]


['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']

### Dictionary Comprehension

In [18]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

p1 = { key:value for key, value in prices.items() if value > 200}

tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}

p2 = { key:value for key, value in prices.items() if key in tech_names}

print("Prices over 200 " + str(p1))
print("Tech companies' stock "+ str(p2))

Prices over 200 {'AAPL': 612.78, 'IBM': 205.55}
Tech companies' stock {'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


### Mapping Names to Sequence Elements

In [19]:
from collections import namedtuple

Subscriber = namedtuple("Subscriber", ["addr", "joined"])
sub = Subscriber("hasan@gmail.com", "04-01-1999")
print(sub)
print(sub.addr)
print(sub.joined)

Subscriber(addr='hasan@gmail.com', joined='04-01-1999')
hasan@gmail.com
04-01-1999


In [20]:
from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])

stock_prototype = Stock('', 0, 0.0, None, None)

def dict_to_stock(s):
    return stock_prototype._replace(**s)

a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
dict_to_stock(a)

Stock(name='ACME', shares=100, price=123.45, date=None, time=None)

In [21]:
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
dict_to_stock(b)

Stock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)

### Combining Multiple Mappings into a Single Mapping

In [22]:
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

from collections import ChainMap
c = ChainMap(a,b)
print(c['x']) # Outputs 1 (from a)
print(c['y']) # Outputs 2 (from b)
print(c['z']) # Outputs 3 (from a)

1
2
3


# Strings and Text

#### Splitting Strings on Any of Multiple Delimiters

In [24]:
line = 'asdf fjdk; afed, fjek,asdf,       foo'

import re
re.split(r'[;,\s]\s*', line)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

#### Matching Text at the Start or End of a String

In [25]:
filenames = ['Makefile', 'foo.c', 'bar.py', 'spam.c', 'spam.h']

[name for name in filenames if name.endswith(('.c', '.h'))]

['foo.c', 'spam.c', 'spam.h']