In [None]:
## 启发式搜索

- 贪婪最佳优先搜索 Greedy best-first search
  - 扩展最接近目标的节点的搜索算法
    - 由启发式函数heuristic function h(n)来估计距离终点的接近程度
    - 曼哈顿距离 Manhattan distance：$|x_1-x_2|+|y_1-y_2|$
  - 缺点：局部最优，整体不一定最优
- 改进：A*搜索
  - 扩展g(n)+h(n)值最小的节点的搜索算法
  - g(n) = 起始节点到当前节点已经产生的代价/成本
  - h(n) =当前节点到目标节点的预估代价/成本

## 对抗搜索 Adversarial Search

- 假设对手与智能体目标相反
- S0:初始状态
- Player(s): 返回状态s下能够行动的选手
- Actions(s): 返回状态s的所有合法行动
- Result(s, a): 返回状态s下执行行动a后到达的新状态
- Terminal(s): 确认状态s是否为结束状态(Terminal state)
- Utility(s): 结束状态(terminal state) s的最终数值表示


In [None]:
class Node:
    """记录营销策略"""
    def __init__(self, strategy):
        self.strategy = strategy
        self.left = None
        self.right = None

class Graph:
    """搜索"""
    def __init__(self, start):
        self.root = Node(start)
        self.paths = []

    def addNode(self, strategy, parent_strategy, is_left=True):
        """搜索过程中创建节点"""
        parent_node = self.findNode(self.root, parent_strategy)
        if not parent_node:
            raise ValueError(f"Parent strategy {parent_strategy} not found")
        
        new_node = Node(strategy)
        if is_left:
            parent_node.left = new_node
        else:
            parent_node.right = new_node

    def findNode(self, node, strategy):
        """在树中查找节点"""
        if not node:
            return None
        if node.strategy == strategy:
            return node
        left_result = self.findNode(node.left, strategy)
        if left_result:
            return left_result
        return self.findNode(node.right, strategy)

    def solve(self):
        """搜索所有的解"""
        self.dfs(self.root, [])
        return self.paths

    def dfs(self, node, path):
        """深度优先搜索"""
        if not node:
            return
        path.append(node.strategy)
        if not node.left and not node.right:
            self.paths.append(path.copy())
        self.dfs(node.left, path)
        self.dfs(node.right, path)
        path.pop()

# 储存营销策略图的信息
advertising = {
    'Email Campaign': [('Discount offer', True), ('Content Marketing', True)],  # Corrected key name
    'Discount offer': [('Follow-up email', False), ('Phone call', False)],
    'Content Marketing': [('Social media Ads', False)]
}

# 创建图并添加节点
g = Graph('Email Campaign')
g.addNode('Email Campaign', 'Email Campaign')  # Add root node explicitly
for parent, children in advertising.items():
    for child, is_left in children:
        g.addNode(child, parent, is_left)

# 解决并输出所有路径
solutions = g.solve()
print(solutions)

In [8]:
class Bid:
    def __init__(self, payoff):
        """记录需要的信息"""
        self.payoff = payoff

    def actions(self, previous_action=None):
        """每次出价可以竞标的价格"""
        if previous_action is None:
            return ['H', 'M', 'L']
        if previous_action == 'H':
            return ['H']
        if previous_action == 'M':
            return ['H', 'M']
        if previous_action == 'L':
            return ['H', 'M', 'L']

    def A_value(self, round, A_action, B_action):
        """让 A 收益最大化策略时 A，B 的收益"""
        return self.payoff[(A_action, B_action)][0]

    def B_value(self, round, A_action, B_action):
        """让 B 收益最大化策略时 A，B 的收益"""
        return self.payoff[(A_action, B_action)][1]

    def minimax(self, round, isA, A_action, B_action):
        """返回每轮出价价格"""
        if round == 5:
            return self.payoff[(A_action, B_action)][0 if isA else 1]

        if isA:
            max_eval = float('-inf')
            best_action = None
            for action in self.actions(A_action):
                eval = self.minimax(round + 1, False, action, B_action)
                if eval > max_eval:
                    max_eval = eval
                    best_action = action
            return best_action if round == 1 else max_eval
        else:
            min_eval = float('inf')
            best_action = None
            for action in self.actions(B_action):
                eval = self.minimax(round + 1, True, A_action, action)
                if eval < min_eval:
                    min_eval = eval
                    best_action = action
            return best_action if round == 2 else min_eval

# 竞价收益
gain = {
    ('H', 'H'): (80000, 20000),
    ('H', 'M'): (70000, 30000),
    ('H', 'L'): (60000, 40000),
    ('M', 'H'): (30000, 70000),
    ('M', 'M'): (50000, 50000),
    ('M', 'L'): (40000, 60000),
    ('L', 'H'): (20000, 80000),
    ('L', 'M'): (30000, 70000),
    ('L', 'L'): (50000, 50000)
}

# 每次最优出价策略
b = Bid(gain)
print(b.minimax(1, True, None, None))  # A: L
print(b.minimax(2, False, 'L', None))  # B: L
print(b.minimax(3, True, 'L', 'L'))    # A: H
print(b.minimax(4, False, 'H', 'L'))   # B: L

TypeError: '>' not supported between instances of 'str' and 'float'

In [2]:
class Bid:
    def __init__(self, payoff):
        """记录需要的信息"""
        self.payoff = payoff

    def actions(self, previous_action=None):
        """每次出价可以竞标的价格"""
        if previous_action is None:
            return ['H', 'M', 'L']
        if previous_action == 'H':
            return ['H']
        if previous_action == 'M':
            return ['H', 'M']
        if previous_action == 'L':
            return ['H', 'M', 'L']

    def A_value(self, round, A_action, B_action):
        """让 A 收益最大化策略时 A，B 的收益"""
        return self.payoff[(A_action, B_action)][0]

    def B_value(self, round, A_action, B_action):
        """让 B 收益最大化策略时 A，B 的收益"""
        return self.payoff[(A_action, B_action)][1]

    def minimax(self, round, isA, A_action, B_action, return_action=False):
        """返回每轮出价价格"""
        if round == 4:
            return self.payoff[(A_action, B_action)][0 if isA else 1]

        if isA:
            max_eval = float('-inf')
            best_action = None
            for action in self.actions(A_action):
                eval = self.minimax(round + 1, False, action, B_action)
                if eval > max_eval:
                    max_eval = eval
                    best_action = action
            return best_action if return_action else max_eval
        else:
            min_eval = float('inf')
            best_action = None
            for action in self.actions(B_action):
                eval = self.minimax(round + 1, True, A_action, action)
                if eval < min_eval:
                    min_eval = eval
                    best_action = action
            return best_action if return_action else min_eval

# 竞价收益
gain = {
    ('H', 'H'): (80000, 20000),
    ('H', 'M'): (70000, 30000),
    ('H', 'L'): (60000, 40000),
    ('M', 'H'): (30000, 70000),
    ('M', 'M'): (50000, 50000),
    ('M', 'L'): (40000, 60000),
    ('L', 'H'): (20000, 80000),
    ('L', 'M'): (30000, 70000),
    ('L', 'L'): (50000, 50000)
}

# 每次最优出价策略
b = Bid(gain)
print(b.minimax(1, True, None, None, True))  # A: L
print(b.minimax(2, False, 'L', None, True))  # B: L
print(b.minimax(3, True, 'L', 'L', True))    # A: H
print(b.minimax(4, False, 'H', 'L', True))   # B: L

L
L
M
40000


In [3]:
import math
import numpy as np


class Bid():
    def __init__(self, payoff):
        """记录需要的信息"""
        self.payoff=gain
        self.solution_A=[]
        self.solution_B=[]

    def actions(self,previous_action=None):
        """每次出价可以竞标的价格"""
        action=['L','M','H']

        if previous_action == 'M':
            action.remove('L')
        elif previous_action == 'H':
            action=['H']

        return action

    def A_value(self,round,A_action,B_action):
        """让A收益最大化策略时A，B的收益"""
        if round==5:
            return self.payoff[(A_action,B_action)]

        va = -np.inf
        vb = -np.inf
        A_action = self.actions(A_action)
        for a in A_action:
            temp=self.B_value(round+1,a,B_action)
            if temp[0]>va:
                va=temp[0]
                vb=temp[1]
        return va,vb # 最重要的区别，A和B的Value是两个数字，而非一个数字，所以需要分别记录两者的收益，以及两者都是最大化收益

    def B_value(self,round,A_action,B_action):
        """让B收益最大化策略时A，B的收益"""

        if round==5:
            return self.payoff[(A_action,B_action)]

        va = -np.inf
        vb = -np.inf
        B_action = self.actions(B_action)
        for b in B_action:
            temp=self.A_value(round+1,A_action,b)
            if temp[1] > vb:
                va = temp[0]
                vb = temp[1]
        return va, vb

    def minimax(self,round,isA,A_action,B_action):
        """返回每轮出价策略"""

        if round == 5:
            return

        if isA:
            A_action = self.actions(A_action)
            v = -np.inf
            for a in A_action:
                temp = self.B_value(round+1,a,B_action)[0]
                if temp > v:
                    v = temp
                    solution = a
            return solution

        else:

            B_action = self.actions(B_action)
            v = -np.inf
            for b in B_action:
                temp = self.A_value(round+1,A_action,b)[1]
                if temp > v:
                    v = temp
                    solution = b
            return solution
#竞价收益
gain={
    ('H','H'):[8,2],
    ('H','M'):[7,3],
    ('H','L'):[6,4],
    ('M','H'):[3,7],
    ('M','M'):[5,5],
    ('M','L'):[4,6],
    ('L','H'):[2,8],
    ('L','M'):[3,7],
    ('L','L'):[5,5]
}

#每次最优出价策略
b=Bid(gain)
b.minimax(1,True,None,None) #A:L
b.minimax(2,False,'L',None)#B:L
b.minimax(3,True,'L','L') #A:H
b.minimax(4,False,'H','L')#B:L



'L'