In [54]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [1]:
import collections

collections?

[0;31mType:[0m        module
[0;31mString form:[0m <module 'collections' from '/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/collections/__init__.py'>
[0;31mFile:[0m        /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/collections/__init__.py
[0;31mDocstring:[0m  
This module implements specialized container datatypes providing
alternatives to Python's general purpose built-in containers, dict,
list, set, and tuple.

* namedtuple   factory function for creating tuple subclasses with named fields
* deque        list-like container with fast appends and pops on either end
* ChainMap     dict-like class for creating a single view of multiple mappings
* Counter      dict subclass for counting hashable objects
* OrderedDict  dict subclass that remembers the order entries were added
* defaultdict  dict subclass that calls a factory function to supply missing values
* UserDict     wrapper around dictionary objects for easier dict subclassing
* U

In [2]:
divmod(12,5)

(2, 2)

## NamedTuple in Python

In [6]:
from collections import namedtuple


def custom_divmod(x, y):
    DivMod = namedtuple('Divmod', ['quotient', 'remainder'])
    print(DivMod)
    return DivMod(*divmod(x,y))

result = custom_divmod(12, 5)
print(result)

print("===========")
print(result.quotient, result.remainder)

<class '__main__.Divmod'>
Divmod(quotient=2, remainder=2)
2 2


### Ways to declare fields

In [9]:
## Ways to describe fields in python in namedtuples

# Way 1
Point1 = namedtuple("Point1", ['x','y'])

# Way 2
Point2 = namedtuple("Point2","x, y")

# Way 3
Point3 = namedtuple("Point3", 'x y')

p1 = Point1(2,3)
p2 = Point2(10,12)
p3 = Point3(100, 200)

print("Point 1 : ",p1, p1.x, p1.y, p1[0], p1[1])
print("Point2 : ",p2)
print("Point3 : ", p3)

Point 1 :  Point1(x=2, y=3) 2 3 2 3
Point2 :  Point2(x=10, y=12)
Point3 :  Point3(x=100, y=200)


### Defaults

In [12]:
Person = namedtuple("Person", "name job", defaults=['Software Engineer'])

person = Person("Hina Khadim")
print(person)

print(person._asdict())

print(person._replace(job="Web Developer"))  # returns new tuple
print(person)


Person(name='Hina Khadim', job='Software Engineer')
{'name': 'Hina Khadim', 'job': 'Software Engineer'}
Person(name='Hina Khadim', job='Web Developer')
Person(name='Hina Khadim', job='Software Engineer')


## Deque

In [28]:
from collections import deque
from numbers import Number

class Queue:
    def __init__(self) -> None:
        self.queue = deque()

    def enque(self, item) -> None:
        self.queue.appendleft(item)

    def deque(self) -> any:
        return self.queue.pop()

    def length(self) -> Number:
        return self.queue.__len__()


fruits = ['apple', 'mango', 'banana', 'orange', 'grapes']
queue = Queue()

for fruit in fruits:
    queue.enque(fruit)

print(queue.queue)

print("Pop first element " ,queue.deque())
print("Queue length : ", queue.length())

deque(['grapes', 'orange', 'banana', 'mango', 'apple'])
Pop first element  apple
Queue length :  4


> Note : We can also provide maxlen to deque in that case, the entry at opposit end will be discarded

## default dict

In [29]:
fruits = {'apple' : 5, 'bananas' : 10, 'oranges' : 12}

print(fruits['grapes'])


KeyError: 'grapes'

In [34]:
print(fruits.setdefault('grapes', 1))
print(fruits)

print(fruits.get("tomatoes", 'Default value to return'))
print(fruits)

print(fruits.get("mango"))

1
{'apple': 5, 'bananas': 10, 'oranges': 12, 'grapes': 1}
Default value to return
{'apple': 5, 'bananas': 10, 'oranges': 12, 'grapes': 1}
None


In [35]:
from collections import defaultdict

pets = [
    ("dog", "Affenpinscher"),
    ("dog", "Terrier"),
    ("dog", "Boxer"),
    ("cat", "Abyssinian"),
    ("cat", "Birman"),
]

group_pets = defaultdict(list)

for pet, breed in pets:
    group_pets[pet].append(breed)

print(group_pets)

defaultdict(<class 'list'>, {'dog': ['Affenpinscher', 'Terrier', 'Boxer'], 'cat': ['Abyssinian', 'Birman']})


## OrderedDict

In [38]:
from collections import OrderedDict


normal_dict = {'first' : 1, 'second' : 2, 'third' : 3, 'fourth' : 4}
print(normal_dict)

normal_dict['beta'] = 5
normal_dict['alpha'] = 6

print(normal_dict)


order_dict = OrderedDict()
order_dict['school'] = 1
order_dict['college'] = 2
order_dict['university'] = 3
order_dict['job'] = 4

print("Ordered _ Dict : ",order_dict)

{'first': 1, 'second': 2, 'third': 3, 'fourth': 4}
{'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'beta': 5, 'alpha': 6}
Ordered _ Dict :  OrderedDict([('school', 1), ('college', 2), ('university', 3), ('job', 4)])


> Normal Dict compares only content while Ordered Dict compares content and order

## Counter

In [41]:
from collections import Counter

counter = Counter("Missisipi")
print(counter)

Counter({'i': 4, 's': 3, 'M': 1, 'p': 1})


## Chaining Dictionaries

In [51]:
from collections import ChainMap

cmd_proxy = {}
local_proxy = {'proxy' : "local.com"}
global_proxy = {'proxy' : "global.com"}

config = ChainMap(cmd_proxy, local_proxy, global_proxy)

print(config)
print("Proxy : ",config['proxy'])
print("Config maps : ",config.maps)

config.new_child({'proxy' : "Special proxy"})

print("Config Parent : ",config.parents)
print("Config ",config)


config['proxy'] = "updating proxy..."
print("Updated Config : ", config)

config.pop("proxy")
print("After popping proxy : ",config)



ChainMap({}, {'proxy': 'local.com'}, {'proxy': 'global.com'})
Proxy :  local.com
Config maps :  [{}, {'proxy': 'local.com'}, {'proxy': 'global.com'}]
Config Parent :  ChainMap({'proxy': 'local.com'}, {'proxy': 'global.com'})
Config  ChainMap({}, {'proxy': 'local.com'}, {'proxy': 'global.com'})
Updated Config :  ChainMap({'proxy': 'updating proxy...'}, {'proxy': 'local.com'}, {'proxy': 'global.com'})
After popping proxy :  ChainMap({}, {'proxy': 'local.com'}, {'proxy': 'global.com'})


### Customized Built-ins

### UserString

Used to modify string functionalities. such as allow adding characters one by one. means customize the strings

## Learn python collections from :

- https://realpython.com/python-collections-module/
- https://www.geeksforgeeks.org/python-collections-module/#userdict

## Learn Code cleaning from :

- https://testdriven.io/blog/clean-code-python/#naming-conventions
- https://github.com/zedr/clean-code-python

## Scrapy

In [None]:
# learning now

In [19]:
def print_even(test_list):
    for i in test_list:
        print(i)
        yield i
 
# initializing list
test_list = [1, 4, 5, 6, 7]
 
# printing initial list
print("The original list is : " + str(test_list))
 
# printing even numbers
print("The even numbers in list are : ", end=" ")


The original list is : [1, 4, 5, 6, 7]
The even numbers in list are :  

In [20]:
gen = print_even(test_list)

In [23]:
next(gen)

5


5

In [24]:
/html/body/div/div[2]/div[1]/div[1]/span[1],
body > div > div:nth-child(2) > div.col-md-8 > div:nth-child(1) > span:nth-child(2) > a
/html/body/div/div[2]/div[1]/div[1]/span[2]/a


NameError: name 'html' is not defined

In [None]:
print("hello")