In [1]:
import re

# Intro to Class: (if we really want to use a class for Book)

The following codes are just what I believe the regex and class would behave. The real implementation of this class to real text should should requires more sophiscated discussion.

In [2]:
text = '''
<P>異物志一卷（注：後漢議郎楊孚撰。）

<P>南州異物志一卷（注：吳丹陽太守萬震撰。）

<P>蜀志一卷（注：東京武平太守常寬撰。）

<P>發蒙記一卷（注：束皙撰。載物產之異。）

<P>地理書一百四十九卷錄一卷。陸澄合山海經已來一百六十家，以為此書。澄本之外，其舊事並多零失。見存別部自行者，唯四十二家，今列之於上。

<P>三輔故事二卷（注：晉世撰）

<P>湘州記二卷（注：庾仲雍撰。）
'''

`([^，。<>〔〕a-zA-Z0-9]+?)([一二三四五六七八九十百千]+?)卷(?:(?:[^，。<>〔〕a-zA-Z0-9]+?)?錄([一二三四五六七八九十]+?)卷)?`

|group 1| group 2| group 3|
| ---   | ---    | ----   |
| book title | number of 卷 | number of 錄 |

There are still some problems with this regex,  
so do not take it very serious.

In [3]:
pattern = '([^，。<>〔〕a-zA-Z0-9]+?)([一二三四五六七八九十百千]+?)卷(?:(?:[^，。<>〔〕a-zA-Z0-9]+?)?錄([一二三四五六七八九十]+?)卷)?'
pattern_object = re.compile(pattern)
pattern_object.findall(text)

[('異物志', '一', ''),
 ('南州異物志', '一', ''),
 ('蜀志', '一', ''),
 ('發蒙記', '一', ''),
 ('地理書', '一百四十九', '一'),
 ('三輔故事', '二', ''),
 ('湘州記', '二', '')]

In [4]:
class Book:
    
    def __init__(self, name, juan, lu):
        self.name = name
        self.juan = juan # instance attributes
        self.lu   = lu
        
    def __repr__(self):
        '''The string representation for Book class. 
        Just a prettier way to print the content of 
        the class.'''
        if self.lu is not None:
            return '《{}》{}卷～錄{}卷'.format(
                self.name, self.juan, self.lu)
        else:
            return '《{}》{}卷'.format(
                self.name, self.juan)

In [5]:
book = Book('異物志', '一', None)
book.name, book.juan, book.lu

('異物志', '一', None)

In [6]:
Book_collect = []

for match in pattern_object.finditer(text):
    Book_collect.append(Book(
                      match.group(1), 
                      match.group(2), 
                      match.group(3)
    ))

Book_collect

[《異物志》一卷, 《南州異物志》一卷, 《蜀志》一卷, 《發蒙記》一卷, 《地理書》一百四十九卷～錄一卷, 《三輔故事》二卷, 《湘州記》二卷]

Actually, if we only want to save the attributes, we could just use the `dict`. Maybe next time we could figure out what kind of methods (functions) we could put in the Book class.

In [7]:
# do similar things in the dict
Book_collect = []

for match in pattern_object.finditer(text):
    Book_collect.append({
    'name' : match.group(1), 
    'juan' : match.group(2), 
    'lu' : match.group(3)
    })

Book_collect

[{'juan': '一', 'lu': None, 'name': '異物志'},
 {'juan': '一', 'lu': None, 'name': '南州異物志'},
 {'juan': '一', 'lu': None, 'name': '蜀志'},
 {'juan': '一', 'lu': None, 'name': '發蒙記'},
 {'juan': '一百四十九', 'lu': '一', 'name': '地理書'},
 {'juan': '二', 'lu': None, 'name': '三輔故事'},
 {'juan': '二', 'lu': None, 'name': '湘州記'}]

# Easier case

From python official tutorial https://docs.python.org/3/tutorial/classes.html

In [8]:
class Dog:
    
    # class attribute
    kind = 'not the cat' 
    
    def __init__(self, name, owner):
        # instance attribute
        self.name = name
        self.owner = owner
        
         # creates a new empty list for each dog
        self.tricks = []   
        
    def add_trick(self, trick):
        self.tricks.append(trick)
    

In [9]:
# initialization a instance
d = Dog('Fido', 'Bob')
e = Dog('Buddy', 'Alice')
d.add_trick('roll over')
e.add_trick('play dead')

In [10]:
d.tricks

['roll over']

In [11]:
e.tricks

['play dead']

In [12]:
d.add_trick('jump')

In [13]:
d.tricks

['roll over', 'jump']

# Case study: hand-crafted Set class

In [14]:
# from Book: Data science from scratch, by Joel Grus

class Set:
    
    # self is a convention 
    # to refer the particular Set object being used
    def __init__(self, values=None): # initializetion operation
        '''This is the constructor.
        It gets called when you create a new Set'''
        self.dict = {}
        
        if values is not None:
            for value in values:
                self.add(value)
    
    def __repr__(self):
        '''string representation of a Set object'''
        return "Set: " + str(self.dict.keys())
    
    # we'll represent membership by being a key in self.dict with value True
    def add(self, value):
        self.dict[value] = True
    
    # value is in the Set if it's a key in the dictionary    
    def contains(self, value):
        return value in self.dict
    
    def remove(self, value):
        del self.dict[value]

In [15]:
s = Set([1, 2, 3])
s.add(4)
s

Set: dict_keys([1, 2, 3, 4])

In [16]:
s.contains(4)

True

In [17]:
s.remove(3)

In [18]:
s

Set: dict_keys([1, 2, 4])

# Case Study: Complex Number

In [19]:
# what a complex number should look like
complex_numer = complex(1, 2)
complex_numer

(1+2j)

Now, we can made a hand-crafted Complex number class

In [20]:
class Complex:
    
    def __init__(self, real, imag):
        self.real = real
        self.imag = imag
        
    def __repr__(self):
        return '({} {} {}j)'.format(self.real, 
                                    ['-', '+'][self.imag >= 0],
                                    abs(self.imag))
    
    def __add__(self, that):
        return Complex(self.real + that.real, 
                       self.imag + that.imag)
    
    def __sub__(self, that):
        return Complex(self.real - that.real, 
                       self.imag - that.imag)
    
    def __abs__(self):
        return (self.real**2 + self.imag**2)**0.5
    
    def conjugate(self):
        return Complex(self.real, - self.imag)

In [21]:
# repr
hand_made_complex_number = Complex(3, 4)
hand_made_complex_number

(3 + 4j)

In [22]:
# add
hand_made_complex_number_2 = Complex(2, 1)
hand_made_complex_number + hand_made_complex_number_2

(5 + 5j)

In [23]:
# sub
hand_made_complex_number - hand_made_complex_number_2

(1 + 3j)

In [24]:
# absolute
abs(hand_made_complex_number)

5.0

In [25]:
# class method
hand_made_complex_number.conjugate()

(3 - 4j)

# Combinations: recursive 

In [26]:
def all_possible_combinations(combinations, sub_set_len, index_list):    
    
    # if the length of the subset (list) is equal to the sub_set_len,
    # and if the subset is not stored in the combinations list, 
    # append to the combinations list
    if (len(index_list) == sub_set_len
       ) and (index_list not in combinations): 
        combinations.append(index_list)
    
    # looping over each element in the list 
    # and remove the element, 
    # and then send back to the all_possible_combinations itself
    # to recursively get all the subsets
    for i, index in enumerate(index_list):
        new_index_list = index_list.copy()
        new_index_list.pop(i)
        combinations = all_possible_combinations(
            combinations, sub_set_len, new_index_list)
        
    return combinations

$$C^3_2 = \frac{3 \times 2}{2 \times 1} = 3$$


In [27]:
all_possible_combinations([], 2, list(range(3)))

[[1, 2], [0, 2], [0, 1]]

In [28]:
len(all_possible_combinations([], 2, list(range(3))))

3

$$
C^5_3 = \frac{5 \times 4 \times 3}{3 \times 2 \times 1} = 10
$$

In [29]:
all_possible_combinations([], 3, list(range(5)))

[[2, 3, 4],
 [1, 3, 4],
 [1, 2, 4],
 [1, 2, 3],
 [0, 3, 4],
 [0, 2, 4],
 [0, 2, 3],
 [0, 1, 4],
 [0, 1, 3],
 [0, 1, 2]]

In [30]:
len(all_possible_combinations([], 3, list(range(5))))

10

# Another recursive example

In [31]:
# other recursive example
def factorial(n):
    if n == 1:
        return 1
    
    else:
        return n * factorial(n - 1)

factorial(3) # 3! = 6

6

# All combinations, regardless of length of subset

In [32]:
def all_possible_combinations_regardless_len(combinations, index_list):    
    
    # save all non-repeated subset with length larger than 0
    if (index_list not in combinations
       ) and (index_list != []): 
        combinations.append(index_list)
    
    for i, index in enumerate(index_list):
        new_index_list = index_list.copy()
        new_index_list.pop(i)
        combinations = all_possible_combinations_regardless_len(
            combinations, new_index_list)
        
    return combinations

In [33]:
all_possible_combinations_regardless_len([], list(range(4)))

[[0, 1, 2, 3],
 [1, 2, 3],
 [2, 3],
 [3],
 [2],
 [1, 3],
 [1],
 [1, 2],
 [0, 2, 3],
 [0, 3],
 [0],
 [0, 2],
 [0, 1, 3],
 [0, 1],
 [0, 1, 2]]