# PoC: MARL for Infrastructure Migration Planning

## Introduction

This notebook presents a proof-of-concept for using MARL in planning infrastructure migrations for app modernization. We'll simulate a scenario where multiple agents collaborate to plan the migration of different components of an application to a new infrastructure.

## Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(42)
np.random.seed(42)

class MigrationAgent(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MigrationAgent, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(self.fc3(x), dim=-1)

class InfrastructureMigrationEnvironment:
    def __init__(self, num_components, num_resources, num_agents):
        self.num_components = num_components
        self.num_resources = num_resources
        self.num_agents = num_agents
        self.reset()
    
    def reset(self):
        self.components = torch.rand(self.num_components, self.num_resources)
        self.optimal_allocation = torch.randint(0, self.num_resources, (self.num_components,))
        return self.components.clone()
    
    def step(self, actions):
        rewards = torch.zeros(self.num_agents)
        for i, action in enumerate(actions):
            component_idx = i % self.num_components
            if torch.argmax(action) == self.optimal_allocation[component_idx]:
                rewards[i] = 1
            else:
                rewards[i] = -1
        
        done = True  # In this simplified version, we're done after one step
        return self.components.clone(), rewards, done

# Hyperparameters
num_components = 10
num_resources = 4
num_agents = 5
hidden_dim = 64

env = InfrastructureMigrationEnvironment(num_components, num_resources, num_agents)
agents = [MigrationAgent(num_resources, hidden_dim, num_resources) for _ in range(num_agents)]

## Training Loop

In [None]:
def train_agents(num_episodes):
    optimizers = [optim.Adam(agent.parameters(), lr=0.001) for agent in agents]
    episode_rewards = []

    for episode in range(num_episodes):
        state = env.reset()
        actions = [agent(state[i % num_components]) for i, agent in enumerate(agents)]
        _, rewards, _ = env.step(actions)
        
        episode_reward = rewards.sum().item()
        episode_rewards.append(episode_reward)

        for i, agent in enumerate(agents):
            loss = -rewards[i]
            optimizers[i].zero_grad()
            loss.backward()
            optimizers[i].step()

        if episode % 100 == 0:
            print(f"Episode {episode}, Avg Reward: {np.mean(episode_rewards[-100:]):.2f}")

    return episode_rewards

num_episodes = 5000
rewards = train_agents(num_episodes)

plt.plot(rewards)
plt.title("Learning Curve")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.show()

## Evaluation

In [None]:
def evaluate_agents():
    state = env.reset()
    actions = [agent(state[i % num_components]) for i, agent in enumerate(agents)]
    _, rewards, _ = env.step(actions)
    
    print("Optimal Allocation:", env.optimal_allocation)
    print("Agent Decisions:", [torch.argmax(a).item() for a in actions])
    print("Rewards:", rewards)

evaluate_agents()

## Conclusion

This PoC demonstrates how MARL can be applied to infrastructure migration planning for app modernization. The agents learn to allocate application components to appropriate resources in the new infrastructure. In a real-world scenario, this could be extended to handle more complex infrastructure setups and dependencies between components.