# Tuning Model

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load in Data
x_file = 'synthetic_data/batch_1/x_gen_syn_n3_p10_corr0.5_snr5_seed2022_0.csv'
y_file = 'synthetic_data/batch_1/y_gen_syn_n3_p10_corr0.5_snr5_seed2022_0.csv'
x = np.loadtxt(x_file, delimiter=",")
y = np.loadtxt(y_file, delimiter=",")
l0 = 0.01
l2 = 0

from sklearn.linear_model import LinearRegression

# Fit a linear regression model to estimate coefficients (betas)
linear_model = LinearRegression()
linear_model.fit(x, y)

# Calculate the maximum absolute coefficient value
max_abs_beta = np.max(np.abs(linear_model.coef_))

# Calculate the optimal M value as 1.5 times the maximum absolute coefficient value
optimal_M = 1.5 * max_abs_beta

print(f"Optimal M: {optimal_M}")

Optimal M: 1.3422286033108315


# Retro Branching

In [2]:
import networkx as nx
import matplotlib.pyplot as plt

### Rough Visualization of Tree Structure

def hierarchy_pos(G, root=None, width=1., vert_gap = 0.1, vert_loc = 0, xcenter = 0.5):
    pos = _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
    return pos

def _hierarchy_pos(G, root, width=2., vert_gap = 0.1, vert_loc = 0, xcenter = 0.5, pos = None, parent = None, parsed = []):
    if pos is None:
        pos = {root: (xcenter, vert_loc)}
    else:
        pos[root] = (xcenter, vert_loc)
    
    children = list(G.neighbors(root))
    if not isinstance(G, nx.DiGraph) and parent is not None:
        children.remove(parent)  
            
    if len(children) != 0:
        dx = width / len(children) 
        nextx = xcenter - width/2 - dx/2
        for child in children:
            nextx += dx
            pos = _hierarchy_pos(G, child, width = dx, vert_gap = vert_gap, vert_loc = vert_loc-vert_gap, xcenter=nextx, pos=pos, parent=root, parsed=parsed)
    
    return pos

def visualize_tree(root):
    if not root:
        return

    tree_graph = nx.DiGraph()

    def add_nodes_edges(node):
        if node.left:
            tree_graph.add_edge(node.node_key, node.left.node_key)
            add_nodes_edges(node.left)
        if node.right:
            tree_graph.add_edge(node.node_key, node.right.node_key)
            add_nodes_edges(node.right)

    add_nodes_edges(root)

    pos = hierarchy_pos(tree_graph, root.node_key)
    nx.draw(tree_graph, pos=pos, with_labels=True, node_size=100, node_color="skyblue", font_size=6, font_weight='bold')
    plt.title("Binary Tree Visualization")
    plt.show()

In [3]:
import newTree
# Solve using Branch and Bound
newProblem = newTree.Problem(x,y,l0,l2, m = 1.5)
newTree2 = newTree.tree(newProblem)
newTree2.branch_and_bound()
# i, reward = tree.branch_and_bound(x,y,l0,l2)

# def count_nodes(node):
#     """Count the number of nodes in the tree."""
#     if not node:
#         return 0
#     return 1 + count_nodes(node.left) + count_nodes(node.right)

# print(f'Number of iterations: {i}')
# print(f'Number of Nodes in Tree: {count_nodes(test_tree.root)}')
# pairs = tree.get_state_pairs(test_tree.root)
# print(f'Number of Edges/State Pairs: {len(pairs)}')



OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


TypeError: '<' not supported between instances of 'int' and 'NoneType'

In [None]:
import random
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import deque, namedtuple
import tree 
from tree import get_state_pairs
from settings import MAX_ITERS, EPSILON_START, \
     EPSILON_END, EPSILON_DECAY, BATCH_SIZE, INT_EPS, GAMMA, TARGET_UPDATE


# Memory representation of states
Transition = namedtuple('Transition', 
                        ('prev_state', 'state', 'reward'))

# Deep Q Network
class DQN(nn.Module):
    def __init__(self, input_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        output1 = F.relu(self.fc1(x))
        output2 = F.relu(self.fc2(output1))
        output = F.relu(self.fc3(output2))
        return(output)

# Memory for our agent
class Memory(object):
    def __init__(self, capacity):
        self.memory = deque(maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

# Agent that performs, remembers and learns actions
class Agent():
    def __init__(self):
        self.policy_net = DQN(32)
        self.target_net = DQN(32)
        self.optimizer = optim.RMSprop(self.policy_net.parameters())
        self.memory = Memory(10000)
        self.episodes_played = 0
        self.epsilon = EPSILON_START
        self.epsilon_decay = EPSILON_DECAY
        self.epsilon_end = EPSILON_END

    def retrobranch(self, tree):
        # Complete Tree -- Get (Non-Optimal) States for Leaf Nodes
        for node in tree.all_nodes.values():
            if node.state is None: 
                if len(node.support) == 0:
                    best_j = 0
                # Select an action according to an epsilon greedy approach 
                elif (random.random() < self.epsilon):
                    z = node.z
                    support = node.support
                    diff = [min(1-z[i], z[i]-0) for i in range(len(support))]
                    best_j = support[np.argmax(diff)]
                
                else:
                    support = node.support
                    best_val = -math.inf
                    best_j = 0

                    for i in range(len(support)):
                        state = torch.tensor(np.array([tree.get_state(node.node_key, support[i])]), 
                                            dtype=torch.float)
                        # Agent estimates using policy network
                        val = self.policy_net(state) 
                        if val > best_val:
                            best_val = val
                            best_j = support[i]
            
                node.state = tree.get_state(node.node_key, best_j)

        # Set rewards
        total_reward = 0

        # Call tree function to create all state to state pairs
        state_pairs = get_state_pairs(tree.root)
        for prev, curr, r in state_pairs:
            total_reward += r

            # Add state pairs and reward to memory 
            self.memory.push(torch.tensor(np.array([prev]), dtype=torch.float), 
                             torch.tensor(np.array([curr]), dtype=torch.float), 
                             torch.tensor([r], dtype=torch.float))
        
        # Update target network
        if self.episodes_played % TARGET_UPDATE == 0:
            self.target_net.load_state_dict(self.policy_net.state_dict())

        return total_reward

    def select_action(self, T):
        # Select an action according to an epsilon greedy approach        
        if (random.random() < self.epsilon):
            # max fraction branching
            best_node_key, best_j = T.max_frac_branch()
        else:
            # calculate estimated value for all nodes
            best_val = -math.inf
            best_node_key = None
            best_j = 0

            for node_key in T.active_nodes:
                support = T.active_nodes[node_key].support
                for i in range(len(support)):
                    if (T.active_nodes[node_key].z[i] < INT_EPS) or (T.active_nodes[node_key].z[i] > 1-INT_EPS):
                        continue
                    state = torch.tensor(np.array([T.get_state(node_key, support[i])]), dtype=torch.float)
                    # Agent estimates usings policy network
                    val = self.policy_net(state) 
                    if(val > best_val):
                        best_val = val
                        best_node_key = node_key
                        best_j = support[i]

        return(best_node_key, best_j)
    
    def replay_memory(self):
        # Only Replay Memory if enough enteries in Memory
        if len(self.memory) < BATCH_SIZE:
            return
        
        # Sample from our memory
        transitions = self.memory.sample(BATCH_SIZE)
        batch = Transition(*zip(*transitions))

        # Concatenate our tensors for previous states
        prev_state_batch = torch.cat(batch.prev_state)
        state_batch = torch.cat(batch.state)
        reward_batch = torch.cat(batch.reward)

        # Predict Q-values for the previous states
        pred_q_values = self.policy_net(prev_state_batch)
        pred_q_values = pred_q_values.squeeze(1) # Match shape of targets

        # Compute expected Q-values based on next states and rewards
        with torch.no_grad():
            max_next_q_values = torch.flatten(self.target_net(state_batch))
            targets = reward_batch + GAMMA * max_next_q_values

        # Compute loss
        loss_f = nn.MSELoss()
        loss = loss_f(pred_q_values, targets)

        # Optimization
        self.optimizer.zero_grad()
        loss.backward()

        for param in self.policy_net.parameters():
            param.grad.data.clamp_(-1, 1)

        if self.epsilon > self.epsilon_end:
            self.epsilon *= self.epsilon_decay
        
        # Update Parameters
        self.optimizer.step()


def RL_solve(agent, x, y, l0, l2):
    # Solving an instance using agent to make choices in tree
    T = tree.tree(x,y,l0,l2)
    fin_solving = T.start_root(None)
    iters = 0

    while (fin_solving == False) and (iters < MAX_ITERS):
        # Select and perform an action
        node, j = agent.select_action(T)
        fin_solving, old_gap, new_gap = T.step(node, j) 

        # Optimize the target network using replay memory
        agent.replay_memory()

        iters += 1

    # Store tree in memory and get total reward for tree
    tot_reward = agent.retrobranch(T)

    # Update number of episodes Agent has played
    agent.episodes_played += 1
        
    return(iters, tot_reward, len(T.best_beta))





OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [None]:
#initialize agent
agent = Agent()

# Solve Using RL Agent
iters, tot_reward, nnz = RL_solve(agent, x, y, l0, l2)

print(f'Iterations This Episode: {iters}')
print(f'Total Reward this Episode: {tot_reward}')
print(f'Number of Items in Memory: {len(agent.memory.memory)}')
print("-----------------------------------------")
print(f'Episodes Played: {agent.episodes_played}')
print(f'Epsilon: {round(agent.epsilon,4)}')

In [None]:
print(f'Number of Non-Zero Coeffs: {nnz}')

In [None]:
import matplotlib.pyplot as plt

# Initialize New Agent
agent = model.Agent()

total_rewards = []
iterations_per_episode = []
epsilons = []

# Play through 100 episodes
for episode in range(100):
    iters, tot_reward, test_tree = model.RL_solve(agent, x, y, l0, l2)
    total_rewards.append(tot_reward)
    iterations_per_episode.append(iters)
    epsilons.append(agent.epsilon)
    if episode % 10 == 0:
        print(f"Episode {episode} - Total Reward: {tot_reward}, Iterations: {iters}")

# Plotting the results
plt.figure(figsize=(18, 8))

plt.subplot(2, 4, 1)
plt.plot(total_rewards)
plt.title('Total Rewards per Episode')
plt.xlabel('Episode')
plt.ylabel('Total Reward')

plt.subplot(2, 4, 2)
plt.plot(iterations_per_episode)
plt.title('Iterations per Episode')
plt.xlabel('Episode')
plt.ylabel('Iterations')

plt.subplot(2, 4, 3)
plt.plot(epsilons)
plt.title('Epsilon per Episode')
plt.xlabel('Episode')
plt.ylabel('Epsilon')

plt.tight_layout()
plt.show()