## Just Like that

In [4]:
employees = {'Alice' : 100000,
             'Bob' : 99817,
             'Carol' : 122908,
             'Frank' : 88123,
             'Eve' : 93121}


top_earners = [ (k,v) for k,v in employees.items() if v >= 100000 ]

top_earners

[('Alice', 100000), ('Carol', 122908)]

In [11]:

text = '''
Call me Ishmael. Some years ago - never mind how long precisely - having
little or no money in my purse, and nothing particular to interest me
on shore, I thought I would sail about a little and see the watery part
of the world. It is a way I have of driving off the spleen, and regulating
the circulation. - Moby Dick'''


print( [ line for line in text.split('\n')]  )

print()

w = [ [ x for x in line.split() if len(x) > 3]  for line in text.split('\n') ]

print(w)

['', 'Call me Ishmael. Some years ago - never mind how long precisely - having', 'little or no money in my purse, and nothing particular to interest me', 'on shore, I thought I would sail about a little and see the watery part', 'of the world. It is a way I have of driving off the spleen, and regulating', 'the circulation. - Moby Dick']

[[], ['Call', 'Ishmael.', 'Some', 'years', 'never', 'mind', 'long', 'precisely', 'having'], ['little', 'money', 'purse,', 'nothing', 'particular', 'interest'], ['shore,', 'thought', 'would', 'sail', 'about', 'little', 'watery', 'part'], ['world.', 'have', 'driving', 'spleen,', 'regulating'], ['circulation.', 'Moby', 'Dick']]


In [27]:

companies = {
    'CoolCompany' : {'Alice' : 33, 'Bob' : 28, 'Frank' : 29},
    'CheapCompany' : {'Ann' : 4, 'Lee' : 9, 'Chrisi' : 7},
    'SosoCompany' : {'Esther' : 38, 'Cole' : 8, 'Paris' : 18}}


print( [ x for x in companies  ] )

print( [ y for y in companies['CoolCompany'].values() ] )

print()

illegal =  [ x for x in companies if any( y < 9 for y in companies[x].values() )]
print(illegal)

['CoolCompany', 'CheapCompany', 'SosoCompany']
[33, 28, 29]

['CheapCompany', 'SosoCompany']


In [30]:
# zip 

lst_1 = [1, 2, 3]
lst_2 = [4, 5, 6]

# Zip two lists together
zipped = list(zip(lst_1, lst_2))
print(zipped)
print()
# Unzip to lists again

lst_1_new, lst_2_new = zip( *zipped )
print( lst_1_new )
print( lst_2_new )

[(1, 4), (2, 5), (3, 6)]

(1, 2, 3)
(4, 5, 6)


In [33]:

column_names = ['name', 'salary', 'job']
db_rows = [('Alice', 180000, 'data scientist'),
           ('Bob', 99000, 'mid-level manager'),
           ('Frank', 87000, 'CEO')]

db = [ dict( zip(column_names, row )) for row in db_rows ]
db


[{'name': 'Alice', 'salary': 180000, 'job': 'data scientist'},
 {'name': 'Bob', 'salary': 99000, 'job': 'mid-level manager'},
 {'name': 'Frank', 'salary': 87000, 'job': 'CEO'}]

## 2. Data Manipulations

In [34]:

import numpy as np


In [36]:
a = np.array([[1, 0, 0],
              [1, 1, 1],
              [2, 0, 0]])

print( np.max(a) )
print( np.min(a) )
print( np.average(a) )

2
0
0.6666666666666666


In [39]:
alice = [99, 101, 103]
bob = [110, 108, 105]
tim = [90, 88, 85]

salaries = np.array([ alice, bob, tim ] )
taxation = np.array([[0.2, 0.25, 0.22],
                     [0.4, 0.5, 0.5],
                     [0.1, 0.2, 0.1]])

# one-liner
max_income = np.max( salaries - salaries * taxation )
max_income

81.0

In [43]:
# slicing, broadcasting and array types

a = np.array([[0, 1, 2, 3],
              [4, 5, 6, 7],
              [8, 9, 10, 11],
              [12, 13, 14, 15]])

# 3rd col
print(a[:, 2])

# 2nd row
print( a[1, :] )

# All columns except last:
print(a[:, :-1])

[ 2  6 10 14]
[4 5 6 7]
[[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]
 [12 13 14]]


In [44]:


a = np.array([1, 2, 3, 4], dtype=np.int16)
print(a) # [1 2 3 4]
print(a.dtype) # int16

b = np.array([1, 2, 3, 4], dtype=np.float64)
print(b) # [1. 2. 3. 4.]
print(b.dtype) # float64

[1 2 3 4]
int16
[1. 2. 3. 4.]
float64


In [45]:
x = np.array([[8, 9, 11, 12],
              [1, 2, 2, 1], 
              [2, 8, 9, 9],
              [9, 6, 6, 3],
              [3, 3, 3, 3]])


## One-liner
avg, var, std = np.average(x, axis=1), np.var(x, axis=1), np.std(x, axis=1)


## REGULAR EXPRESSIONS

In [46]:
import re

In [50]:
text = '''A blockchain, originally block chain,
is a growing list of records, called blocks,
which are linked using cryptography.
'''

print(re.findall('b...k', text))  # dot regex

['block', 'block', 'block']


In [51]:
print(re.findall('y.*y', text))

['yptography']


In [52]:
# zero or regex
print(re.findall('blocks?', text))

['block', 'block', 'blocks']


In [53]:
txt = '<div>hello world</div>'

print(re.findall('<.*?>', txt))

['<div>', '</div>']


In [55]:
text = 'peter piper picked a peck of pickled peppers'

result = re.findall('p.*?e.*?r', text)

print(result)

['peter', 'piper', 'picked a peck of pickled pepper']


### web scraping with re

In [56]:
import urllib.request

In [57]:
search_phrase = 'crypto'

with urllib.request.urlopen('https://www.wired.com/') as response:
    html = response.read().decode('utf8') # convert to string
    first_pos = html.find(search_phrase) 
    print(html[first_pos-10:first_pos+10])
    

,r=window.crypto||wi


In [58]:
text_1 = 'crypto-bot that is trading Bitcoin and other currencies'
text_2 = 'cryptographic encryption methods that can be cracked easily with quantum computers'

pattern = re.compile('crypto(.{1,30})coin') 
print(pattern.match(text_1))
print(pattern.match(text_2))

<re.Match object; span=(0, 34), match='crypto-bot that is trading Bitcoin'>
None


() matches whatever regex is inside.<br>
.  matches an arbitrary character.<br>
{1,30} matches between 1 and 30 occurrences of the previous regex.<br>
(.{1,30}) matches between 1 and 30 arbitrary characters.<br>
crypto(.{1,30})coin matches the regex consisting of three parts: the word 'crypto', an arbitrary sequence with 1 to 30 chars, followed by the word 'coin'.<br>



In [59]:
print(pattern.match(text_1))

<re.Match object; span=(0, 34), match='crypto-bot that is trading Bitcoin'>


In [60]:
print(pattern.match(text_2))

None


In [62]:
text = '''
'One can never have enough socks', said Dumbledore.
'Another Christmas has come and gone and I didn't
get a single pair. People will insist on giving me books.'
Christmas Quote
'''

regex = 'Christ.*'

print(re.match(regex, text))

print(re.search(regex, text))

print(re.findall(regex, text))

None
<re.Match object; span=(62, 102), match="Christmas has come and gone and I didn't">
["Christmas has come and gone and I didn't", 'Christmas Quote']


In [63]:

page = '''
<!DOCTYPE html>
<html>
<body>

<h1>My Programming Links</h1>
<a href='https://app.finxter.com/'>test your Python skills</a>
<a href='https://blog.finxter.com/recursion/'>Learn recursion</a>
<a href='https://nostarch.com/'>Great books from NoStarchPress</a>
<a href='http://finxter.com/'>Solve more Python puzzles</a>

</body>
</html>
'''

practice_tests = re.findall('(<a.*?finxter.*?(test|puzzle).*?>)', page)

print(practice_tests)

[("<a href='https://app.finxter.com/'>test your Python skills</a>", 'test'), ("<a href='http://finxter.com/'>Solve more Python puzzles</a>", 'puzzle')]


In [64]:
# EXTRACTING DOLLARS FROM A STRING

string = 'helloworld'

regex_1 = 'hello(world)'
regex_2 = '(hello(world))'

res_1 = re.findall(regex_1, string)
res_2 = re.findall(regex_2, string)

print(res_1)
print(res_2)


['world']
[('helloworld', 'world')]


In [65]:
report = '''
If you invested $1 in the year 1801, you would have $18087791.41 today.
This is a 7.967% return on investment. 
But if you invested only $0.25 in 1801, you would end up with $4521947.8525.
'''

dollars = [x[0] for x in re.findall('(\$[0-9]+(\.[0-9]*)?)', report)]

print(dollars)

['$1', '$18087791.41', '$0.25', '$4521947.8525']


In [66]:
# FINDING NONSECURE HTTP URLS

article = '''
The algorithm has important practical applications
http://blog.finxter.com/applications/
in many basic data structures such as sets, trees,
dictionaries, bags, bag trees, bag dictionaries,
hash sets, https://blog.finxter.com/sets-in-python/
hash tables, maps, and arrays. http://blog.finxter.com/
http://not-a-valid-url
http:/bla.ba.com
http://bo.bo.bo.bo.bo.bo/
http://bo.bo.bo.bo.bo.bo/333483--33343-/
https://bo.bo.bo.bo.bo.bo.bo/333483--33343-/
'''

stale_links = re.findall('http://[a-z0-9_\-.]+\.[a-z0-9_\-/]+', article)

print(stale_links)



['http://blog.finxter.com/applications/', 'http://blog.finxter.com/', 'http://bo.bo.bo.bo.bo.bo/', 'http://bo.bo.bo.bo.bo.bo/333483--33343-/']


### VALIDATING THE TIME FORMAT OF USER INPUT



In [69]:
print(re.findall('x{3,5}y', 'xy'))

print(re.findall('x{3,5}y', 'xxxy'))

print(re.findall('x{3,5}y', 'xxxxxy'))

print(re.findall('x{3,5}y', 'xxxxxxy'))

print(re.findall('x{3,5}y', 'xxxxxxxxy')) # selects only ( 3 to 5 characters)

[]
['xxxy']
['xxxxxy']
['xxxxxy']
['xxxxxy']


In [71]:
# check the time format XX:XX

inputs = ['18:29', '23:55', '123', 'ab:de', '18:299', '99:99']

input_ok = lambda x: re.fullmatch('[0-9]{2}:[0-9]{2}', x) != None

for x in inputs:
    print(input_ok(x))

True
True
False
False
False
True


In [73]:
# match 00:00 to 23:59 format

inputs = ['18:29', '23:55', '123', 'ab:de', '18:299', '99:99']

input_ok = lambda x: re.fullmatch('([01][0-9]|2[0-3]):[0-5][0-9]', x) != None

for x in inputs:
    print(input_ok(x))

True
True
False
False
False
False


### DUPLICATE DETECTION IN STRINGS

In [80]:
text = '''
It was a bright cold day in April, and the clocks were
striking thirteen. Winston Smith, his chin nuzzled into
his breast in an effort to escape the vile wind, slipped
quickly through the glass doors of Victory Mansions,
though not quickly enough to prevent a swirl of gritty
dust from entering along with him.
-- George Orwell, 1984
'''

duplicates = re.findall('([^\s]*(?P<x>[^\s])(?P=x)[^\s]*)', text)

print( duplicates )


[('thirteen.', 'e'), ('nuzzled', 'z'), ('effort', 'f'), ('slipped', 'p'), ('glass', 's'), ('doors', 'o'), ('gritty', 't'), ('--', '-'), ('Orwell,', 'l')]


### DETECTING WORD REPETITIONS

In [81]:
text = 'if you use words too often words become used'

style_problems = re.search('\s(?P<x>[a-z]+)\s+([a-z]+\s+){0,10}(?P=x)\s', ' ' + text + ' ')

print(style_problems)

<re.Match object; span=(11, 34), match=' words too often words '>


### REPLACE

In [84]:
text = '''
Alice Wonderland married John Doe.
The new name of former 'Alice Wonderland' is Alice Doe.
Alice Wonderland replaces her old name 'Wonderland' with her new name 'Doe'.
Alice's sister Jane Wonderland still keeps her old name.
'''

updated_text = re.sub("Alice Wonderland(?!')", "Alice Doe", text)

print(updated_text)



Alice Doe married John Doe.
The new name of former 'Alice Wonderland' is Alice Doe.
Alice Doe replaces her old name 'Wonderland' with her new name 'Doe'.
Alice's sister Jane Wonderland still keeps her old name.



## Algorithms

FINDING ANAGRAMS: Two strings are anagrams if they have the same sorted character sequence; <br>

“listen” → “silent” <br>

“funeral ” → “real fun” <br>

“elvis” → “lives”

In [90]:
is_anagram = lambda x1, x2: sorted(x1) == sorted(x2)

print(is_anagram('elvis', 'lives'))
print(is_anagram('elvise', 'livees'))
print(is_anagram('elvis', 'dead'))

True
True
False


## PALINDROMES 

In [91]:
is_palindrome = lambda phrase: phrase == phrase[::-1]

print(is_palindrome('anna'))
print(is_palindrome('kdljfasjf'))
print(is_palindrome('rats live on no evil star'))

True
False
True


In [92]:
# COUNTING PERMUTATIONS WITH RECURSIVE FACTORIAL FUNCTIONS

n = 5
factorial = lambda n: n * factorial(n-1) if n > 1 else 1

print(factorial(n))


120


### CALCULATING THE POWERSET <br>

Given set: s = {1, 2} <br>
Powerset: P = {{},{1},{2},{1,2}}  <br>

In [97]:
from itertools import chain, combinations


In [105]:

def power_set( iterable ):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

list( power_set('abc')) # 2^3


[(),
 ('a',),
 ('b',),
 ('c',),
 ('a', 'b'),
 ('a', 'c'),
 ('b', 'c'),
 ('a', 'b', 'c')]

In [117]:
# method 2

s = "abc"
x = len(s)

masks = [ 1 << i for i in range(x) ]

print( masks)

print("-----------")

for i in range( 1 << x ):
    print(i)
    print( [ ss for mask, ss in zip(masks, s) if i & mask  ])

[1, 2, 4]
-----------
0
[]
1
['a']
2
['b']
3
['a', 'b']
4
['c']
5
['a', 'c']
6
['b', 'c']
7
['a', 'b', 'c']


### CAESAR’S CIPHER ENCRYPTION

In [119]:
abc = 'abcdefghijklmnopqrstuvwxyz'
s = 'xthexrussiansxarexcoming'

rt13 = lambda x: ''.join([abc[(abc.find(c) + 13) % 26] for c in x])

print(rt13)
print(rt13(rt13(s)))

<function <lambda> at 0x000002A3F7166E18>
xthexrussiansxarexcoming


### PRIME NUMBER

In [121]:

def prime(n):
    for i in range(2,n):
        if n % i == 0:
            return False
        return True
    
    
m = 20
primes = [ n for n in range(2, m+1) if prime(n) ]
print( primes )

[3, 5, 7, 9, 11, 13, 15, 17, 19]
