# Beautiful Soup

Beautiful Soup is a Python library for pulling data out of HTML and XML files.

https://www.crummy.com/software/BeautifulSoup/bs4/doc/

```python
# doc is a given html document.
from bs4 import BeautifulSoup
soup = BeautifulSoup(doc, 'html.parser')      # This class defines the basic interface called by the tree builders.

soup.title, soup.title.name, soup.title.string, soup.title.parent.name
soup.p, soup.p.append("...")
soup.a
soup.find_all('a')                # find all anchors
soup.original_encoding, soup.encode(formatter="html")
```

# Bisect

```python
import bisect
c = [1, 3, 3, 5, 9, 10]	  # c should be sorted

bisect.bisect(c, 4)       # 3; the location where 4 should be inserted to keep it sorted

bisect.insort(c, 8)       # inserts 8 into the list to keep it sorted
c                         # [1, 3, 3, 5, 8, 9, 10]
```

# Calendar

```python
import calendar

# Create Calendar objects:
canendar.Calendar()
calendar.TextCalendar()
calendar.HTMLCalendar()

# General calendar attributes:
calendar.firstweekday()
calendar.isleap(year)
calendar.weekday(year, month, day)
calendar.month_name
calendar.day_name
```

# Collections

## defaultdict

```python
from collections import defaultdict
words = ['apple','banana','altitude','brown','cat','bowl']
dic = defaultdict(list)
for w in words: dic[w[0]].append(w)
    
w
defaultdict(list,
            {'a': ['apple', 'altitude'],
             'b': ['banana', 'brown', 'bowl'],
             'c': ['cat']})
```


## Counter

```python
from collections import Counter

c = Counter('cbcdcdaa')
c                      # Counter({'c': 3, 'b': 1, 'd': 2, 'a': 2})
c.most_common(1)       # [('c', 3)]
list(c.elements())     # ['c', 'c', 'c', 'b', 'd', 'd', 'a', 'a'] 
```

## OrderedDict

# glob

glob.glob() returns a list of paths matching a pathname pattern.

```python
import glob

data_list = [] 

for filename in glob.glob('data/*.csv'): 
	data_list.append(pd.read_csv(filename)) 
```

# Functools

## partial
```python

from functools import partial

def my_func(x, y): 
    ...

g = partial(my_func, 2)        # g(y) is my_func(2, y)
```

# itertools

## groupby(), combinations(), permutations(), product()

groupby(iterable, key) groups consecutive elements in iterable by key.

```python
import itertools

words = ['apple','altitude','banana','brown','cat','bowl']

for letter, values in itertools.groupby(words, lambda x: x[0]):
	print(letter, list(values))
    
a ['apple', 'altitude']
b ['banana', 'brown']
c ['cat']
b ['bowl']


for pair in itertools.combinations(words, 2):
    print(pair)

('apple', 'altitude')
('apple', 'banana')
('apple', 'brown')
...
('brown', 'cat')
('brown', 'bowl')
('cat', 'bowl')

# itertools.combinations(words, 2) has 15 pairs.
# itertools.permutations(words, 2) has 30 pairs.

for pair in itertools.product(words, [1,2,3]):       # 18 pairs
    print(pair)
('apple', 1)
('apple', 2)
('apple', 3)
('altitude', 1)
...
```


# Pickle

pickle creates portable serialized representations of Python objects.

```python
import pickle

pickle.dump(an_object, file_name) 
pickle.load(file_name)
```

# urlextract

```python
import urlextract

url_extractor = urlextract.URLExtract()
url_extractor.find_urls("Go to https://www.wikipedia.org/ or https://www.encyclopedia.com/ for more info.")

['https://www.wikipedia.org/', 'https://www.encyclopedia.com/']
```