<a href="https://colab.research.google.com/github/bharathulaprasad/webcrawling/blob/main/Python_features_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Lambda, map, filter, reduce

In [22]:
def square_fn(x):
    return x * x

square_ld = lambda x: x * x

for i in range(10):
    assert (square_fn(i) == square_ld(i)) 
    

In [23]:
nums = [1/3, 333/7, 2323/2230, 40/34, 2/3]
nums_squared = [num * num for num in nums]
print(nums_squared)

[0.1111111111111111, 2263.0408163265306, 1.0851472983570953, 1.384083044982699, 0.4444444444444444]


In [24]:
nums_squared_1 = map(square_fn, nums)
nums_squared_2 = map(lambda x: x * x, nums)
print(list(nums_squared_1))

[0.1111111111111111, 2263.0408163265306, 1.0851472983570953, 1.384083044982699, 0.4444444444444444]


 calculate the mean squared error of a simple linear function f(x) = ax + b

In [25]:
a, b = 3, -0.5
xs = [2, 3, 4, 5]
labels = [6.4, 8.9, 10.9, 15.3]

# Method 1: using a loop
errors = []
for i, x in enumerate(xs):
    errors.append((a * x + b - labels[i]) ** 2)
result1 = sum(errors) ** 0.5 / len(xs)

# Method 2: using map
diffs = map(lambda x, y: (a * x + b - y) ** 2, xs, labels)
result2 = sum(diffs) ** 0.5 / len(xs)

print(result1, result2)

0.35089172119045514 0.35089172119045514


In [26]:
bad_preds = filter(lambda x: x > 0.5, errors)
print(errors)
print(list(bad_preds))

[0.8100000000000006, 0.16000000000000028, 0.3599999999999996, 0.6400000000000011]
[0.8100000000000006, 0.6400000000000011]


# Reduce example

In [27]:
product = 1
for num in nums:
    product *= num
print(product)

12.95564683272412


In [28]:
from functools import reduce
product = reduce(lambda x, y: x * y, nums)
print(product)

12.95564683272412


In [29]:
elems = [1, 2, 3, 4]
a, b, c, d = elems
print(a, b, c, d)

1 2 3 4


#Unpack a list

In [30]:
a, *new_elems, d = elems
print(a)
print(new_elems)
print(d)

1
[2, 3]
4


# Slicing

In [31]:
elems = list(range(10))
print(elems)
print(elems[::-1])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


In [32]:
evens = elems[::2]
print(evens)

reversed_evens = elems[-2::-2]
print(reversed_evens)

[0, 2, 4, 6, 8]
[8, 6, 4, 2, 0]


In [33]:
del elems[::2]
print(elems)

[1, 3, 5, 7, 9]


# Insertion

In [35]:
elems = list(range(10))
elems[1] = 22
print(elems)

[0, 22, 2, 3, 4, 5, 6, 7, 8, 9]


#Replace

In [36]:
elems = list(range(10))
elems[1:2] = [20, 30, 40]
print(elems)

[0, 20, 30, 40, 2, 3, 4, 5, 6, 7, 8, 9]


In [37]:
elems = list(range(10))
elems[1:1] = [0.2, 0.3, 0.5]
print(elems)

[0, 0.2, 0.3, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9]


#Flattening

In [38]:
list_of_lists = [[1], [2, 3], [4, 5, 6]]
sum(list_of_lists, [])

[1, 2, 3, 4, 5, 6]

In [39]:
nested_lists = [[1, 2], [[3, 4], [5, 6], [[7, 8], [9, 10], [[11, [12, 13]]]]]]
flatten = lambda x: [y for l in x for y in flatten(l)] if type(x) is list else [x]
flatten(nested_lists)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

#List vs Generator

This one takes O(mn)

In [42]:
tokens = ['i', 'want', 'to', 'go', 'to', 'school']

def ngrams(tokens, n):
    length = len(tokens)
    grams = []
    for i in range(length - n + 1):
        grams.append(tokens[i:i+n])
    return grams

ngrams(tokens, 3)

[['i', 'want', 'to'],
 ['want', 'to', 'go'],
 ['to', 'go', 'to'],
 ['go', 'to', 'school']]

# Using Generator

# This one takes O(m+n)

In [43]:
def ngrams(tokens, n):
    length = len(tokens)
    for i in range(length - n + 1):
        yield tokens[i:i+n]

ngrams_generator = ngrams(tokens, 3)
print(ngrams_generator)
for ngram in ngrams_generator:
    print(ngram)

<generator object ngrams at 0x7fd96c935ad0>
['i', 'want', 'to']
['want', 'to', 'go']
['to', 'go', 'to']
['go', 'to', 'school']


# Another way to create ngrams is slice ad ZIP

In [44]:
def ngrams(tokens, n):
    length = len(tokens)
    slices = (tokens[i:length-n+i+1] for i in range(n))
    return zip(*slices)

ngrams_generator = ngrams(tokens, 3)
print(ngrams_generator) # zip objects are generators
for ngram in ngrams_generator:
    print(ngram)

<zip object at 0x7fd96c947be0>
('i', 'want', 'to')
('want', 'to', 'go')
('to', 'go', 'to')
('go', 'to', 'school')


# Classes and some magic numbers

In [45]:
# In Python, magic methods are prefixed and suffixed with the double underscore __, also known as dunder. The most wellknown magic method is probably __init__.
class Node:
    """ A struct to denote the node of a binary tree.
    It contains a value and pointers to left and right children.
    """
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
        
    def __repr__(self):    
        strings = [f'value: {self.value}']
        strings.append(f'left: {self.left.value}' if self.left else 'left: None')
        strings.append(f'right: {self.right.value}' if self.right else 'right: None')
        return ', '.join(strings)

left = Node(4)
root = Node(5, left)
print(root)

value: 5, left: 4, right: None


In [47]:
class Node:
    """ A struct to denote the node of a binary tree.
    It contains a value and pointers to left and right children.
    """
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
    
    def __eq__(self, other):
        return self.value == other.value
    
    def __lt__(self, other):
        return self.value < other.value
    
    def __ge__(self, other):
        return self.value >= other.value


left = Node(4)
root = Node(5, left)
print(left == root)
print(left < root)
print(left >= root)

False
True
False


#Objects attributes and locals

In [48]:
class Model1:
    def __init__(self, hidden_size=100, num_layers=3, learning_rate=3e-4):
        print(locals())
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate

model1 = Model1()
model1.__dict__

{'self': <__main__.Model1 object at 0x7fd96c901450>, 'hidden_size': 100, 'num_layers': 3, 'learning_rate': 0.0003}


{'hidden_size': 100, 'learning_rate': 0.0003, 'num_layers': 3}

In [51]:
# Alternative way of construct using __dict__ for all params
class Model2:
    def __init__(self, hidden_size=100, num_layers=3, learning_rate=3e-4):
        params = locals()
        del params['self']
        self.__dict__ = params

model2 = Model2()
model2.__dict__

{'hidden_size': 100, 'learning_rate': 0.0003, 'num_layers': 3}

# Variable arguments

In [52]:
class Model3:
    def __init__(self, **kwargs):
        self.__dict__ = kwargs

model3 = Model3(hidden_size=100, num_layers=3, learning_rate=3e-4)
model3.__dict__

{'hidden_size': 100, 'learning_rate': 0.0003, 'num_layers': 3}

#memoization

In [53]:
def fib_helper(n):
    if n < 2:
        return n
    return fib_helper(n - 1) + fib_helper(n - 2)

def fib(n):
    """ fib is a wrapper function so that later we can change its behavior
    at the top level without affecting the behavior at every recursion step.
    """
    return fib_helper(n)

def fib_m_helper(n, computed):
    if n in computed:
        return computed[n]
    computed[n] = fib_m_helper(n - 1, computed) + fib_m_helper(n - 2, computed)
    return computed[n]

def fib_m(n):
    return fib_m_helper(n, {0: 0, 1: 1})

In [54]:
for n in range(20):
    assert fib(n) == fib_m(n)

In [55]:
import time

start = time.time()
fib(30)
print(f'Without memoization, it takes {time.time() - start:7f} seconds.')

start = time.time()
fib_m(30)
print(f'With memoization, it takes {time.time() - start:.7f} seconds.')

Without memoization, it takes 0.377735 seconds.
With memoization, it takes 0.0001135 seconds.


#Decorators

In [56]:
def timeit(fn): 
    # *args and **kwargs are to support positional and named arguments of fn
    def get_time(*args, **kwargs): 
        start = time.time() 
        output = fn(*args, **kwargs)
        print(f"Time taken in {fn.__name__}: {time.time() - start:.7f}")
        return output  # make sure that the decorator returns the output of fn
    return get_time 

In [60]:
@timeit
def fib(n):
    return fib_helper(n)

@timeit
def fib_m(n):
    return fib_m_helper(n, {0: 0, 1: 1})

In [64]:
fib(30)
fib_m(30)

Time taken in fib: 0.3781400
Time taken in fib_m: 0.0000236


832040

# Caching

In [66]:
import functools

@functools.lru_cache()
def fib_helper(n):
    if n < 2:
        return n
    return fib_helper(n - 1) + fib_helper(n - 2)

@timeit
def fib(n):
    """ fib is a wrapper function so that later we can change its behavior
    at the top level without affecting the behavior at every recursion step.
    """
    return fib_helper(n)

In [83]:
fib(5)
fib_m(5)

Time taken in fib: 0.0000074
Time taken in fib_m: 0.0000110


5