# 《python数据结构与算法分析》笔记

## 第3章 基本数据结构

### 冒泡排序

In [1]:
def maopao(l):
    n = len(l)
    if n < 2:
        return l
    for i in range(n-1):
        for j in range(n-1-i):
            if l[j] > l[j+1]:
                l[j],l[j+1] = l[j+1],l[j]
    return l

In [4]:
maopao([2,5,4,4,8,7,3,5,9,6])

[2, 3, 4, 4, 5, 5, 6, 7, 8, 9]

### 线性数据结构
#### 栈

In [32]:
class Stack():
    def __init__(self):
        self.s = []
        
    def isempty(self):
        return len(self.s)==0
        
    def pop(self):
        if not self.isempty():
            return self.s.pop()
        else:
            print("error1: stack is empty.")
    
    def push(self,a):
        self.s.append(a)
        
    def size(self):
        return len(self.s)
        
    def peek(self):
        if not self.isempty():
            return self.s[-1]
        else:
            print("error2: stack is empty.")

In [19]:
s = Stack()

In [20]:
s.push(1); s.push(3); s.pop(); s.push(1); 

In [24]:
s.pop()

error1: stack is empty.


#### 匹配括号

In [34]:
# (5+6)*(7+8)/(4+3)
st = "(5+6)*(7+8)/(4+3)"

def match(st):
    s1 = Stack()

    for s in st:
        if s == "(":
            s1.push(s)
        elif s == ")":
            s1.pop()
            
    return s1.isempty()

match(st)

True

In [36]:
string = "{[1+(3*8)/6]*(3-5)}"

def match2(st):
    s1 = Stack()

    for s in st:
        if s in ["(","[","{"]:
            s1.push(s)
        elif s == ")":
            if s1.pop() != "(":
                return False
        elif s == "]":
            if s1.pop() != "[":
                return False
        elif s == "}":
            if s1.pop() != "{":
                return False
            
    return s1.isempty()

match2(string)

True

In [41]:
# 十进制转n进制
def divide(num,n):
    digits = "0123456789ABCDEF"
    s = Stack()
    while num > 0:
        s.push(num % n)
        num //= n
    
    res = ""
    while not s.isempty():
        res += digits[s.pop()]
        
    return res

divide(233,16)

'E9'

In [45]:
# 中序表达式（完全括号表达式）转后序表达式
# (((A+B)*C)-((D-E)*(F+G)))
def revert(string):
    temp = Stack()
    for s in string:
        if s != ")":
            temp.push(s)
        else:
            num2 = temp.pop()
            label = temp.pop()
            num1 = temp.pop()
            temp.pop()
            t = num1+num2+label
            temp.push(t)
    return temp.pop()

string = "(((A+B)*C)-((D-E)*(F+G)))"
revert(string)

'AB+C*DE-FG+*-'

In [51]:
# 计算后序表达式
# "AB+C*DE-FG+*-"
def compute(string):
    stack = Stack()
    for s in string:
        if s not in ['+','-','*','/']:
            stack.push(s)
        else:
            num2 = int(stack.pop())
            num1 = int(stack.pop())
            if s == '+':
                temp = num1 + num2
            elif s == '-':
                temp = num1 - num2
            elif s == '*':
                temp = num1 * num2
            elif s == '/':
                temp = num1 / num2
            stack.push(temp)
            
    return stack.pop()

string = '23+1*54-64+*-'
compute(string)

-5

In [52]:
(((2+3)*1)-((5-4)*(6+4)))

-5

### 队列

In [1]:
class Queue():
    def __init__(self):
        self.q = []
        
    def isempty(self):
        return self.q == []
    
    def enqueue(self,item):
        self.q.insert(0,item)
        
    def dequeue(self):
        return self.q.pop()
    
    def size(self):
        return len(self.q)

In [2]:
q = Queue()
q.enqueue(3); q.enqueue(4); q.dequeue(); q.enqueue(5)

In [3]:
q.dequeue()

4

In [4]:
q.dequeue()

5

#### 约瑟夫问题/传土豆

In [9]:
def yousefu(n,m):
    l = list(range(n))
    while len(l) > 1:
        for i in range(m):
            l.append(l.pop(0))
        l.pop(0)
    return l[0]

In [10]:
yousefu(6,7)

2

### 双端队列

In [4]:
class Deque():
    def __init__(self):
        self.d = []
        
    def isempty(self):
        return self.d == []
    
    def addRear(self,item):
        self.d.append(item)
        
    def addFront(self,item):
        self.d.insert(0,item)
        
    def removeRear(self):
        return self.d.pop()
    
    def removeFront(self):
        return self.d.pop(0)
    
    def size(self):
        return len(self.d)

#### 回文检测器

In [7]:
# 接受一个字符串并判断其是否为回文串
def isRe(string):
    d = Deque()
    for s in string:
        d.addRear(s)
        
    while d.size() > 1:
        if d.removeRear() != d.removeFront():
            return False
    
    return True

In [10]:
isRe('abcdcba'), isRe('abcddcba'), isRe('abddcba')

(True, True, False)

### 列表

#### 链表实现有序列表

In [11]:
# 链表 Node节点
class Node():
    def __init__(self,value):
        self.value = value
        self.next = None
        
    def getValue(self):
        return self.value
    
    def getNext(self):
        return self.next
    
    def setValue(self,newValue):
        self.value = newValue
    
    def setValue(self,newNext):
        self.next = newNext

In [44]:
# 有序列表
class orderedList():
    def __init__(self):
        self.head = Node(-1)
        
    def add(self,item):
        node = Node(item)
#         state = False
        
        if self.head.next == None:
            self.head.next = node
#             state = True
            return
            
        pre = self.head; cur = self.head.next
        while cur != None:
            if cur.value >= item:
                pre.next = node
                node.next = cur
#                 state = True
                break
            else:
                pre = pre.next
                cur = cur.next
        # 只有当while循环正常结束的情况下，才执行else块中的语句.
        # 当while 块遇到break强制跳出的时候，else 块中的语句不被执行
        else:
#       if state == False:       
            pre.next = node
                
    def remove(self,item):
        # 假设 item 已存在
        if self.head.next == None:
            return False
            
        pre = self.head; cur = self.head.next
        while cur != None:
            if cur.value > item:
                return False
            elif cur.value < item:
                pre = pre.next
                cur = cur.next
            else:
                pre.next = cur.next
                break
                
    def search(self,item):
        if self.head.next == None:
            return False
            
        cur = self.head.next
        while cur != None:
            if cur.value == item:
                return True
            else:
                cur = cur.next
        return False
    
    def isEmpty(self):
        return self.head.next == None
    
    def length(self):
        if self.head.next == None:
            return 0
            
        cur = self.head.next; count = 0
        while cur != None:
            count += 1
            cur = cur.next
            
        return count
    
    def index(self,item):
        # 假设 item 已存在
        if self.head.next == None:
            return False
            
        cur = self.head.next; index = 0
        while cur != None:
            if cur.value == item:
                return index
            else:
                index += 1
                cur = cur.next
            
        return False
    
    def pop(self):
        if self.head.next == None:
            return False
            
        pre = self.head; cur = self.head.next
        while cur.next != None:
            pre = pre.next
            cur = cur.next
        
        pre.next = None
        return cur.value
    
    def printL(self):
        if self.head.next == None:
            print("NULL")
            
        cur = self.head.next
        while cur != None:
            print(cur.value,end=' ')
            cur = cur.next

In [45]:
ol = orderedList(); ol.add(15); ol.add(23); ol.add(17); ol.add(9); ol.add(35); 
ol.printL()

9 15 17 23 35 

In [46]:
ol.search(17), ol.search(9), ol.search(19) 

(True, True, False)

In [47]:
ol.remove(17)
ol.printL()

9 15 23 35 

In [48]:
ol.index(23)

2

In [49]:
ol.pop()

35

## 第4章 递归

#### 十进制转换为任一进制（2～16）

In [50]:
def convert(n,m):
    s = '0123456789ABCDEF'
    if n < m:
        return s[n]
    else:
        return convert(n//m,m) + s[n%m]
    
convert(100,10)

'100'

In [52]:
convert(233,16), convert(10,2)

('E9', '1010')

#### 汉诺塔问题

In [58]:
def hanoi(n,A,B,C):
    if n == 1:
        print('move from ',A,' to ',C)
    else:
        hanoi(n-1,A,C,B)
        print('move from ',A,' to ',C)
        hanoi(n-1,B,A,C)

In [59]:
hanoi(3,'A','B','C')

move from  A  to  C
move from  A  to  B
move from  C  to  B
move from  A  to  C
move from  B  to  A
move from  B  to  C
move from  A  to  C


In [60]:
hanoi(5,'A','B','C')

move from  A  to  C
move from  A  to  B
move from  C  to  B
move from  A  to  C
move from  B  to  A
move from  B  to  C
move from  A  to  C
move from  A  to  B
move from  C  to  B
move from  C  to  A
move from  B  to  A
move from  C  to  B
move from  A  to  C
move from  A  to  B
move from  C  to  B
move from  A  to  C
move from  B  to  A
move from  B  to  C
move from  A  to  C
move from  B  to  A
move from  C  to  B
move from  C  to  A
move from  B  to  A
move from  B  to  C
move from  A  to  C
move from  A  to  B
move from  C  to  B
move from  A  to  C
move from  B  to  A
move from  B  to  C
move from  A  to  C


### 动态规划

#### 找零钱

In [67]:
# 一共有 1，5，10，25 四种面值的硬币
# 给定需找零钱 n，求至少需要多少枚硬币
# 贪婪算法有时会出现问题，如在找零 63，存在硬币面值 1，5，10，21，25 时。
def charge(n):
    minN = n
    if n == 1 or n == 5 or \
    n == 10 or n == 25:
        return 1
    else:
        for i in [c for c in [1,5,10,25] if c <= n]:
            temp = 1 + charge(n-i)
            if temp < minN:
                minN = temp
        return minN
#         return min(1 + charge(n - 1), 1 + charge(n - 5), \
#                    1 + charge(n - 10), 1 + charge(n - 25))

In [68]:
charge(37)

4

In [4]:
def charge2(coinValueList,n):
    minN = n
    if n in coinValueList:
        return 1
    else:
        for i in [c for c in coinValueList if c <= n]:
            temp = 1 + charge2(coinValueList,n-i)
            if temp < minN:
                minN = temp
        return minN

In [5]:
charge2([1,5,10,25],37)

4

In [9]:
# 上述递归中存在大量重复运算，
# 解决方法是用字典存储已经计算的值
def charge3(coinValueList,n,resDict):
    minN = n
    if n in coinValueList:
        resDict[n] = 1
        return 1
    elif n in resDict.keys():
        return resDict[n]
    else:
        for i in [c for c in coinValueList if c <= n]:
            temp = 1 + charge3(coinValueList,n-i,resDict)
            if temp < minN:
                minN = temp
        return minN

In [10]:
charge3([1,5,10,25],37,{})

4

In [1]:
# 动态规划解决找零钱问题
def dpCharge(coinValueList,n,minCoinsList):
    for i in range(n+1):
        coinsCount = i
        for j in [c for c in coinValueList if c <= n]:
            if 1 + minCoinsList[n-j] < coinsCount:
                coinsCount = 1 + minCoinsList[n-j]
        minCoinsList[i] = coinsCount
        print('i',i,'  ',minCoinsList[i])
    return minCoinsList[n]

In [2]:
dpCharge([1,5,10,25],37,[0,1,2,3,4,1])

IndexError: list index out of range

In [9]:
# 书中版本
def dpMakeChange(coinValueList, change, minCoins):
    for cents in range(change+1):
        coinCount = cents
        for j in [c for c in coinValueList if c <= cents]:
            if minCoins[cents-j] + 1 < coinCount:
                coinCount = minCoins[cents -j]+1
        minCoins[cents] = coinCount
    return minCoins[change]

In [10]:
dpMakeChange([1,5,10,25],37,[])

IndexError: list assignment index out of range

In [5]:
# 修改后的dp算法，存储所找零钱的信息
def dpMakeChange2(coinValueList, change, minCoins, coinsUsed):
    for cents in range(change+1):
        coinCount = cents
        newCoin = 1
        for j in [c for c in coinValueList if c <= cents]:
            if minCoins[cents-j] + 1 < coinCount:
                coinCount = minCoins[cents -j]+1
                newCoin = j
        minCoins[cents] = coinCount
        coinsUsed[cents] = newCoin
    return minCoins[change]

def printCoins(coinsUsed, change):
    coin = change
    while coin > 0:
        thisCoin = coinsUsed[coin]
        print(thisCoin)
        coin = coin - thisCoin

In [6]:
cl = [1, 5, 10, 21, 25]
coinsUsed = [0]*64
coinCount = [0]*64
dpMakeChange2(cl, 63, coinCount, coinsUsed) 

3

In [7]:
printCoins(coinsUsed, 63)

21
21
21


In [8]:
printCoins(coinsUsed, 52)

10
21
21


### 课后题

#### 阶乘计算

In [13]:
def jiecheng(n):
    if n == 0 or n == 1:
        return 1
    else:
        return n * jiecheng(n-1)

In [14]:
jiecheng(6)

720

#### 反转列表

In [17]:
# 循环
def reverse(l):
    n = len(l)
    if n < 2:
        return l
    for i in range(n//2):
        l[i],l[n-i-1] = l[n-i-1],l[i]
    return l

In [19]:
reverse([1,2,3,4,5,6,7,8,9,0]),reverse([1,2,3,4,5,6,7,8,9])

([0, 9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 8, 7, 6, 5, 4, 3, 2, 1])

In [28]:
# 递归
def reverse2(l):
    if len(l) == 1:
        return l
    else:
        return l[-1:]+reverse2(l[:-1])

In [29]:
reverse2([1,2,3,4,5,6,7,8,9,0]),reverse2([1,2,3,4,5,6,7,8,9])

([0, 9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 8, 7, 6, 5, 4, 3, 2, 1])

### 课外拓展

#### 斐波那契数列

In [5]:
def Fibnaci(n):
    if n == 1 or n == 2:
        return 1
    else:
        return Fibnaci(n-1) + Fibnaci(n-2)

In [6]:
Fibnaci(10)

55

In [27]:
# 上述递归中存在大量重复运算
# 优化方法：用字典存储中间值
def Fibnaci2(n,tempDict):
    if n == 1 or n == 2:
        return 1
    if n in tempDict.keys():
        return tempDict[n]
    tempDict[n-1] = Fibnaci2(n-1,tempDict)
    tempDict[n-2] = Fibnaci2(n-2,tempDict)
    return tempDict[n-1] + tempDict[n-2]

In [29]:
Fibnaci2(10,{})

55

#### 小青蛙跳台阶

In [9]:
# 小青蛙一次可以跳1阶，也可以跳两阶（只能向上），
# 问爬n阶台阶共多少种方法
def floor(n):
    if n <= 2:
        return n
    return floor(n-1) + floor(n-2)

In [10]:
floor(10)

89

In [11]:
# 拓展 青蛙一次可以跳1阶，也可以跳两阶（可以向上，也可以向下）
# 但是同一个台阶不能踩两次，问爬n阶台阶共多少种方法
def floor2(n):
    if n <= 3:
        return n
    else:
        return floor2(n-1) + floor2(n-2) + floor2(n-3)

In [14]:
floor2(30)

45152016

#### 反转单链表

In [11]:
# 结点定义
class Node:
    def __init__(self,value=0,next=None):
        self.value = value
        self.next = next
        
class linkList:
    def __init__(self):
        self.head = None

In [20]:
l = linkList()

In [21]:
l.head = Node(1); l.head.next = Node(2);
l.head.next.next = Node(3); l.head.next.next.next = Node(4); l.head.next.next.next.next = Node(5);

In [22]:
def printL(l):
    cur = l.head
    while cur != None:
        print(cur.value,end=' ')
        cur = cur.next
        
printL(l)

1 2 3 4 5 

In [23]:
# 反转链表(递归)
def reverseList(l):
    head = l.head
    if head == None or head.next == None:
        return head
    else:
        temp = reverseList(head.next)
        tail = head.next
        tail.next = head
        head.next = None
        return temp

In [24]:
l1 = reverseList(l)

AttributeError: 'Node' object has no attribute 'head'

In [10]:
printL(l)

## 第5章 搜索与排序

### 搜索

#### 顺序搜索

In [36]:
# 无序列表的顺序搜索
def orderedSearch(l,item):
    for i in range(len(l)):
        if l[i] == item:
            return True
    return False

In [44]:
l = [2,5,7,8,6,9,2,3]
orderedSearch(l,6), orderedSearch(l,1)

(True, False)

In [38]:
# 有序列表的顺序搜索
def orderedSearch2(l,item):
    for i in range(len(l)):
        if l[i] > item:
            return False
        if l[i] == item:
            return True
    return False

In [47]:
l2 = sorted(l)
orderedSearch2(l,6), orderedSearch2(l,1)

(True, False)

#### 二分搜索 - 分治策略

In [57]:
def binarySearch(l,target):
    if len(l) == 0:
        return False
    left = 0; right = len(l) # 左闭右开
    while left <= right:
        mid = (left + right) // 2
        if l[mid] == target:
            return mid
        elif l[mid] < target:
            left = mid + 1
        else:
            right = mid
            
    return False

In [58]:
l2

[2, 2, 3, 5, 6, 7, 8, 9]

In [59]:
binarySearch(l2,5), binarySearch(l2,2), binarySearch(l2,6)

(3, 1, 4)

In [60]:
def binarySearch2(l,target):
    if len(l) == 0:
        return False
    left = 0; right = len(l)-1 # 左闭右闭
    while left <= right:
        mid = (left + right) // 2
        if l[mid] == target:
            return mid
        elif l[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
            
    return False

In [61]:
binarySearch2(l2,5), binarySearch2(l2,2), binarySearch2(l2,6)

(3, 1, 4)

In [73]:
# 递归方法
def binarySearchRe(l,target,start,end):
    if len(l) == 0:
        return False
    mid = (start + end) // 2
    if l[mid] == target:
        return mid
    elif l[mid] < target:
        return binarySearchRe(l,target,mid + 1,end)
    else:
        return binarySearchRe(l,target,start,mid)

In [74]:
binarySearchRe(l2,5,0,len(l2)), binarySearchRe(l2,2,0,len(l2)), binarySearchRe(l2,6,0,len(l2))

(3, 1, 4)

In [18]:
def binarySearchRe2(l,target):
    if len(l) == 0:
        return False
    mid = len(l) // 2
    if l[mid] == target:
        return mid
    elif l[mid] < target:
        return binarySearchRe2(l[mid + 1:],target)
    else:
        return binarySearchRe2(l[:mid],target)

In [19]:
l = [2,5,7,8,6,9,2,3]
l2 = sorted(l)
l2

[2, 2, 3, 5, 6, 7, 8, 9]

In [20]:
# fixme: binarySearchRe2(l2,5)结果不对
binarySearchRe2(l2,5), binarySearchRe2(l2,2), binarySearchRe2(l2,6)

(0, 1, 4)

#### 散列

散列表（Hash table，也叫哈希表），是根据关键码值(Key value)而直接进行访问的数据结构。
也就是说，它通过把关键码值映射到表中一个位置来访问记录，以加快查找的速度。
这个映射函数叫做散列函数，存放记录的数组叫做散列表。 --百度百科

散列函数将关键字映射到0到数组大小N-1范围，作为下标并将关键字存入数组对应位置，数组称为散列表。
理想散列表（哈希表）是一个包含关键字的具有固定大小的数组，它能够**以常数时间执行插入，删除和查找操作**。

In [21]:
# 选择取余函数作为散列函数
def hash(valueList,n):
    res = [None] * n
    for v in valueList:
        res[v%n] = v
    return res

In [22]:
valueList = [54,26,93,17,77,31]
hash(valueList,11)

[77, None, None, None, 26, 93, 17, None, None, 31, 54]

若两个散列值映射到同一位置，则产生**冲突**

**处理冲突：**

**开放定址法 线性探测**：从起初的散列值开始，顺序遍历散列表，直到找到一个空槽，放入散列值。（将散列表看成环）  
缺点：出现聚集现象。
解决方法：扩展线性探测、平方探测等。  
**链接法**：用链表存储同一个槽中的散列值。

In [37]:
def hash2(valueList,n):
    res = [None] * n
    for v in valueList:
        if res[v%n] == None:
            res[v%n] = v
        else:
            # ！改进： 环的循环可通过 对数组size取余 实现，如 hash3（）所示
            flag = 0
            for m in range(v%n,n):
                if res[m] == None:
                    res[m] = v
                    flag = 1
                    break
            if flag == 0: 
                for m in range(v%n):
                    if res[m] == None:
                        res[m] = v
                        flag = 1
                        break
    if flag == 0:
        print("有溢出")
    return res

In [42]:
valueList2 = [54,26,93,17,77,31,42,53,64]
hash2(valueList2,11)

[77, 42, 53, 64, 26, 93, 17, None, None, 31, 54]

In [52]:
def hash3(valueList,n):
    res = [None] * n
    for v in valueList:
        slot = v % n
        while res[slot] != None:
            slot = (slot + 1) % n
            
        if res[slot] == None:
            res[slot] = v

    return res

In [53]:
valueList3 = [54,26,93,17,77,31,42,53,64]
hash3(valueList2,11)

[77, 42, 53, 64, 26, 93, 17, None, None, 31, 54]

#### 实现映射抽象数据类型（字典）

In [44]:
# 使用 两个列表 实现映射抽象数据类型
# 一个列表存储 键 ，一个列表存储 值 。
class HashTable():
    def __init__(self):
        self.size = 11
        self.slots = [None] * self.size
        self.data = [None] * self.size
        
    def put(self, key, data):
        hashvalue = self.hashfunction(key, len(self.slots))
        if self.slots[hashvalue] == None:
            self.slots[hashvalue] = key
            self.data[hashvalue] = data
        else:
            if self.slots[hashvalue] == key:
                self.data[hashvalue] = data # 替换
            else:
                nextslot = self.rehash(hashvalue, len(self.slots))
                while self.slots[nextslot] != None and \
                self.slots[nextslot] != key:
                    nextslot = self.rehash(nextslot, len(self.slots))
                    
                if self.slots[nextslot] == None:
                    self.slots[nextslot] = key
                    self.data[nextslot] = data
                else:
                    self.data[nextslot] = data # 替换
                        
    def hashfunction(self, key, size):
        return key%size
    
    def rehash(self, oldhash, size):
        return (oldhash + 1)%size
    
    def get(self, key):
        startslot = self.hashfunction(key, len(self.slots))
        
        data = None
        stop = False
        found = False
        position = startslot
        
        while self.slots[position] != None and \
        not found and not stop:
            if self.slots[position] == key:
                found = True
                data = self.data[position]
            else:
                position=self.rehash(position, len(self.slots))
                if position == startslot:
                    stop = True
        return data
                
    def __getitem__(self, key):
        return self.get(key)
    
    def __setitem__(self, key, data):
        self.put(key, data)


In [45]:
H = HashTable()
H[54] = "cat"
H[26] = "dog"
H[93] = "lion"
H[17] = "tiger"
H[77] = "bird"
H[31] = "cow"
H[44] = "goat"
H[55] = "pig"
H[20] = "chicken" 

In [46]:
H.slots

[77, 20, None, None, 26, 93, 17, None, None, 31, 54]

In [47]:
H.data

['chicken',
 'chicken',
 None,
 None,
 'dog',
 'lion',
 'tiger',
 None,
 None,
 'cow',
 'cat']

In [48]:
H[20]

'chicken'

In [50]:
H[20] = 'duck'
H[20]

'duck'

In [51]:
H.data

['duck', 'duck', None, None, 'dog', 'lion', 'tiger', None, None, 'cow', 'cat']

### 排序

#### 冒泡排序

In [69]:
def bubbleSort(l):
    for i in range(len(l)):
        for j in range(len(l)-i-1):
            if l[j] > l[j+1]:
                l[j], l[j+1] = l[j+1], l[j]
    return l

In [70]:
l = [2,7,5,8,9,4,5,7,1]
bubbleSort(l)

[1, 2, 4, 5, 5, 7, 7, 8, 9]

冒泡排序一般认为效率很低，因为在未确定最终位置时需要频繁交换。  
但是，**如果在一次循环中没有交换，说明已经排好序，可以停止循环。**  
可利用这一特性提前终止。

In [71]:
def bubbleSort2(l):
    for i in range(len(l)):
        flag = True
        for j in range(len(l)-i-1):
            flag = False
            if l[j] > l[j+1]:
                flag = True
                l[j], l[j+1] = l[j+1], l[j]
        if flag == False:
            break
    return l

In [72]:
l = [2,7,5,8,9,4,5,7,1]
bubbleSort2(l)

[1, 2, 4, 5, 5, 7, 7, 8, 9]

#### 选择排序

选择排序是对冒泡排序的改进，在一次循环中只进行一次交换。  
具体来说，先循环找到最大值，然后放在合适的位置上。

In [73]:
def selectSort(l):
    n = len(l)
    for i in range(n):
        maxNum = -1
        maxIndex = -1
        for j in range(n-i):
            if l[j] > maxNum:
                maxNum = l[j]
                maxIndex = j
        l[maxIndex], l[n-i-1] = l[n-i-1], l[maxIndex]
    return l

In [74]:
l = [2,7,5,8,9,4,5,7,1]
selectSort(l)

[1, 2, 4, 5, 5, 7, 7, 8, 9]

#### 插入排序

插入排序在列表的左端维护一个有序子列表，  
依次遍历列表，将值按照大小顺序插入有序子列表中。

In [91]:
def insertSort(l):
    n = len(l)
    if n < 2:
        return l
    for i in range(1,n):
        # 从前到后遍历子列表
        for j in range(i):
            if l[j] > l[i]:
                l.insert(j,l.pop(i))
    return l

In [92]:
l = [2,7,5,8,9,4,5,7,1]
insertSort(l)

[1, 2, 4, 5, 5, 7, 7, 8, 9]

In [103]:
# 可在插入时从后向前遍历子列表，这样可省去pop()、insert()
def insertSort2(l):
    n = len(l)
    if n < 2:
        return l
    for i in range(1,n):
        # 从后向前遍历子列表，依次交换
        for j in range(i,0,-1):
            if l[j] < l[j-1]:
                l[j], l[j-1] = l[j-1], l[j]
            else:
                break
    return l

In [104]:
l = [2,7,5,8,9,4,5,7,1]
insertSort2(l)

[1, 2, 4, 5, 5, 7, 7, 8, 9]

#### 希尔排序

希尔排序也称“递减增量排序”，它对插入排序做了改进，将列表分成数个子列表，并对每一个子列表应用插入排序。  
如何切分列表是希尔排序的关键——并不是连续切分，而是使用增量i（有时称作步长）选取所有间隔为i的元素组成子列表。

In [7]:
# FIXME
def shellSort(l):
    n = len(l)
    gap = n // 2
    while gap > 0:
        for i in range(gap):
            if l[i] > l[gap+i]:
                l[i], l[gap+i] = l[gap+i], l[i]
        gap //= 2
    return l

In [8]:
l = [2,7,5,8,9,4,5,7,1]
shellSort(l)

[2, 4, 5, 7, 9, 7, 5, 8, 1]

In [18]:
# 书中示例
def shellSort2(alist):
    sublistcount = len(alist) // 2
    
    while sublistcount > 0:
        for startposition in range(sublistcount):
            gapInsertionSort(alist, startposition, sublistcount)
            
        print("After increments of size", sublistcount,
              "The list is", alist)
        
        sublistcount = sublistcount // 2

def gapInsertionSort(alist, start, gap):
    for i in range(start+gap, len(alist), gap):
        currentvalue = alist[i]
        position = i
        
        while position >= gap and \
        alist[position-gap] > currentvalue:
            alist[position] = alist[position-gap]
            position = position - gap
            
        alist[position] = currentvalue

In [19]:
l = [2,7,5,8,9,4,5,7,1]
shellSort2(l)

After increments of size 4 The list is [1, 4, 5, 7, 2, 7, 5, 8, 9]
After increments of size 2 The list is [1, 4, 2, 7, 5, 7, 5, 8, 9]
After increments of size 1 The list is [1, 2, 4, 5, 5, 7, 7, 8, 9]


#### 归并排序

归并排序，是递归算法，每次将一个列表一分为二。如果列表为空或只有一个元素，那么从定义上来说它就是有序的（基本情况）。  
如果列表不止一个元素，就将列表一分为二，并对两部分都递归调用归并排序。当两部分都有序后，就进行归并这一基本操作。  
归并是指将两个较小的有序列表归并为一个有序列表的过程。

In [4]:
def mergeSort(alist):  
    print("Splitting ", alist)
    if len(alist) > 1:
        mid = len(alist) // 2
        lefthalf = alist[:mid]
        righthalf = alist[mid:]

        mergeSort(lefthalf)
        mergeSort(righthalf)

        i = 0
        j = 0
        k = 0
        while i < len(lefthalf) and j < len(righthalf):
            if lefthalf[i] < righthalf[j]:
                alist[k] = lefthalf[i]
                i = i + 1
            else:
                alist[k] = righthalf[j]
                j = j + 1
            k = k + 1
                
        while i < len(lefthalf):
            alist[k] = lefthalf[i]
            i = i + 1
            k = k + 1

        while j < len(righthalf):
            alist[k] = righthalf[j]
            j = j + 1
            k = k + 1
    print("Merging ", alist)

In [5]:
l = [2,7,5,8,9,4,5,7,1]
mergeSort(l)

Splitting  [2, 7, 5, 8, 9, 4, 5, 7, 1]
Splitting  [2, 7, 5, 8]
Splitting  [2, 7]
Splitting  [2]
Merging  [2]
Splitting  [7]
Merging  [7]
Merging  [2, 7]
Splitting  [5, 8]
Splitting  [5]
Merging  [5]
Splitting  [8]
Merging  [8]
Merging  [5, 8]
Merging  [2, 5, 7, 8]
Splitting  [9, 4, 5, 7, 1]
Splitting  [9, 4]
Splitting  [9]
Merging  [9]
Splitting  [4]
Merging  [4]
Merging  [4, 9]
Splitting  [5, 7, 1]
Splitting  [5]
Merging  [5]
Splitting  [7, 1]
Splitting  [7]
Merging  [7]
Splitting  [1]
Merging  [1]
Merging  [1, 7]
Merging  [1, 5, 7]
Merging  [1, 4, 5, 7, 9]
Merging  [1, 2, 4, 5, 5, 7, 7, 8, 9]


#### 快速排序

和归并排序一样，快速排序也采用分治策略，但不使用额外的存储空间。不过，代价是列表可能不会被一分为二。

In [7]:
def quickSort(alist):
    quickSortHelper(alist, 0, len(alist)-1)
    
def quickSortHelper(alist, first, last):
    if first < last:
        splitpoint = partition(alist, first, last)
        quickSortHelper(alist, first, splitpoint-1)
        quickSortHelper(alist, splitpoint+1, last)
        
def partition(alist, first, last):
    pivotvalue = alist[first]
    leftmark = first + 1
    rightmark = last
    done = False
    while not done:
        while leftmark <= rightmark and \
        alist[leftmark] <= pivotvalue:
            leftmark = leftmark + 1
            
        while alist[rightmark] >= pivotvalue and \
        rightmark >= leftmark:
            rightmark = rightmark - 1
            
        if rightmark < leftmark:
            done = True
        else:
            temp = alist[leftmark]
            alist[leftmark] = alist[rightmark]
            alist[rightmark] = temp
            
    temp = alist[first]
    alist[first] = alist[rightmark]
    alist[rightmark] = temp
    return rightmark

In [9]:
l = [2,7,5,8,9,4,5,7,1]
quickSort(l)
l

[1, 2, 4, 5, 5, 7, 7, 8, 9]

## 第6章 树

### 实现

python提供了两种存储树的方法，分别为“列表之列表”、“节点与引用。”

#### 列表之列表

In [11]:
# 用列表以 [根结点, [左子树], [右子树]] 存储树结构。
# 列表函数 binaryTree
def binaryTree(r):
    return [r,[],[]]

# 插入左子树(插入后将原来的左子树作为插入节点的左子树)
def insertLeft(root, newBranch):
    t = root.pop(1)
    if len(t) > 1:
        root.insert(1, [newBranch, t, []])
    else:
        root.insert(1, [newBranch, [], []])
    return root

# 插入右子树
def insertRight(root, newBranch):
    t = root.pop(2)
    if len(t) > 1:
        root.insert(2, [newBranch, [], t])
    else:
        root.insert(2, [newBranch, [], []])
    return root

# 树的访问函数
def getRootVal(root):
    return root[0]

def setRootVal(root, newVal):
    root[0] = newVal
    
def getLeftChild(root):
    return root[1]

def getRightChild(root):
    return root[2]

In [13]:
r = binaryTree(3)
r

[3, [], []]

In [15]:
insertLeft(r,4)
r

[3, [4, [], []], []]

In [16]:
insertLeft(r,5)
r

[3, [5, [4, [], []], []], []]

**Problems: 插入时只能对根结点的左右子树进行插入，访问时也不方便。**

#### 节点与引用

面向对象编程，定义一个类，其中有根结点与左右子树的属性。

In [38]:
class Node:
    def __init__(self,value):
        self.key = value
        self.leftChild = None
        self.rightChild = None

class binaryTree:
    def __init__(self,root):
        self.root = Node(root)
        
    def insertLeft(self,newBranch):
        left = Node(newBranch)
        oldLeftTree = self.root.leftChild
        left.leftChild = oldLeftTree
        self.root.leftChild = left
        
    def insertRight(self,newBranch):
        right = Node(newBranch)
        oldRightTree = self.root.rightChild
        right.rightChild = oldRightTree
        self.root.rightChild = right
        
    def getRootVal(self):
        return self.root.key
    
    def setRootVal(self,newVal):
        self.root.key = newVal
    
    def getLeftChild(self):
        return self.root.leftChild

    def getRightChild(self):
        return self.root.rightChild
    
    def printT(self):
        queue = []
        if self.root != None:
            queue.append(self.root)
        while queue:
            temp = queue.pop(0)
            
            if temp.rightChild:
                queue.append(temp.rightChild)
            if temp.leftChild:
                queue.append(temp.leftChild)
                
            print(temp.key,end=' ')

In [39]:
t = binaryTree(3)
t.printT()

3 

In [40]:
t.insertLeft(4)
t.printT()

3 4 

In [41]:
t.insertLeft(5)
t.printT()

3 5 4 

In [42]:
t.insertRight(6); t.insertRight(7)
t.printT()

3 7 5 6 4 

In [1]:
# 书中递归定义方法
class BinaryTree:
    def __init__(self, rootObj):
        self.key = rootObj
        self.leftChild = None
        self.rightChild = None
        
    def insertLeft(self, newNode):
        if self.leftChild == None:
            self.leftChild = BinaryTree(newNode)
        else:
            t = BinaryTree(newNode)
            t.left = self.leftChild
            self.leftChild = t

    def insertRight(self, newNode):
        if self.rightChild == None:
            self.rightChild = BinaryTree(newNode)
        else:
            t = BinaryTree(newNode)
            t.right = self.rightChild
            self.rightChild = t
    
    def getRightChild(self):
        return self.rightChild
    
    def getLeftChild(self):
        return self.leftChild
    
    def setRootVal(self, obj):
        self.key = obj
        
    def getRootVal(self):
        return self.key

In [44]:
r = BinaryTree('a')
r.getRootVal()

'a'

In [45]:
print(r.getLeftChild())

None


### 二叉树的应用 - 解析树

主要有三类问题：  
*如何根据完全括号表达式构建解析树；  
如何计算解析树中的表达式；  
如何将解析树还原成最初的数学表达式。*

#### 构建解析树

构建解析树的第一步是将表达式字符串拆分成标记列表。  
需要考虑 4 种标记：左括号、右括号、运算符和操作数。  
(1) 如果当前标记是(，就为当前节点添加一个左子节点，并下沉至该子节点；  
(2) 如果当前标记在列表['+', '-', '/', '*']中，将当前节点的值设为当前标记对应的运算符；为当前节点添加一个右子节点，并下沉至该子节点；  
(3) 如果当前标记是数字，就将当前节点的值设为这个数并返回至父节点；  
(4) 如果当前标记是)，就跳到当前节点的父节点。 

In [48]:
# FIXME : 用指针存储当前节点的父节点 不可行，因为只能存储上一层的父节点。目前想到的只有类似栈的存法
def generateParseTree(string):
    r = BinaryTree('')
    cur = r; par = r
    for s in string:
        if s == '(':
            cur.insertLeft('') 
            par = cur
            cur = cur.getLeftChild()
        elif s not in '+-*/)':
            cur.setRootVal(eval(i)) # eval('1') => 1
            

In [17]:
from pythonds.basic import Stack
from pythonds.trees import BinaryTree

def buildParseTree(fpexp):
    fplist = fpexp.split()
    # 用栈存储当前节点的父节点，下沉时先入栈，返回时出栈
    pStack = Stack()
    eTree = BinaryTree('')
    pStack.push(eTree)
    currentTree = eTree
    
    for i in fplist:
        if i == '(':
            currentTree.insertLeft('')
            pStack.push(currentTree)
            currentTree = currentTree.getLeftChild()
        elif i not in '+-*/)':
            currentTree.setRootVal(eval(i))
            parent = pStack.pop()
            currentTree = parent
        elif i in '+-*/':
            currentTree.setRootVal(i)
            currentTree.insertRight('')
            pStack.push(currentTree)
            currentTree = currentTree.getRightChild()
        elif i == ')':
            currentTree = pStack.pop()
        else:
            raise ValueError("Unknown Operator: " + i)
    
    return eTree

In [18]:
eTree = buildParseTree('(3+(4*5))')

#### 计算解析树

In [5]:
def evaluate(eTree):
    if eTree.leftChild == None and eTree.rightChild == None:
        return eTree.key
    else:
        if eTree.key == '+':
            return evaluate(eTree.leftChild) + evaluate(eTree.rightChild)
        elif eTree.key == '-':
            return evaluate(eTree.leftChild) - evaluate(eTree.rightChild)
        elif eTree.key == '*':
            return evaluate(eTree.leftChild) * evaluate(eTree.rightChild)
        else:
            return evaluate(eTree.leftChild) / evaluate(eTree.rightChild)

In [6]:
evaluate(eTree)

23

**思考：如何优化else部分的判断？**

In [9]:
# 书中示例，利用字典存储运算符与操作函数的对应关系以实现优化
import operator

def evaluate2(parseTree):
    opers = {'+':operator.add, '-':operator.sub,'*':operator.mul, '/':operator.truediv}
    leftC = parseTree.getLeftChild()
    rightC = parseTree.getRightChild()
    
    if leftC and rightC:
        fn = opers[parseTree.getRootVal()]
        return fn(evaluate(leftC), evaluate(rightC))
    else:
        return parseTree.getRootVal()

In [10]:
evaluate2(eTree)

23

### 树的遍历

对所有节点的访问称为“遍历”，共有 3 种遍历方式，分别为前序遍历、中序遍历和后序遍历。  
**前序遍历：** 先访问根节点，然后递归地前序遍历左子树，最后递归地前序遍历右子树。  
**中序遍历：** 先递归地中序遍历左子树，然后访问根节点，最后递归地中序遍历右子树。  
**后序遍历：** 先递归地后序遍历右子树，然后递归地后序遍历左子树，最后访问根节点。

In [11]:
# 前序遍历（递归写法）
def preOrder(tree):
    if tree:
        print(tree.key)
        preOrder(tree.leftChild())
        preOrder(tree.rightChild())

In [13]:
# 后序遍历（递归写法）
def postOrder(tree):
    if tree:
        preOrder(tree.leftChild())
        preOrder(tree.rightChild())
        print(tree.key)

**如前所示，后序遍历可以用于计算解析树**

In [14]:
# 中序遍历（递归写法）
def inOrder(tree):
    if tree:
        preOrder(tree.leftChild())
        print(tree.key)
        preOrder(tree.rightChild())

通过中序遍历解析树，可以还原不带括号的表达式。接下来修改中序遍历算法，以得到完全括号表达式。  
唯一要做的修改是：在递归调用左子树前打印一个左括号，在递归调用右子树后打印一个右括号。

In [15]:
def printexp(tree):
    sVal = ""
    if tree:
        sVal = '(' + printexp(tree.getLeftChild())
        sVal = sVal + str(tree.getRootVal())
        sVal = sVal + printexp(tree.getRightChild()) + ')'
    return sVal

In [19]:
printexp(eTree)

'(23)'

### 利用二叉堆实现优先级队列

### 二叉搜索树

### 平衡二叉搜索树