# PoC: MARL for Codebase Analysis in App Modernization

## Introduction

This notebook demonstrates a proof-of-concept for using Multi-Agent Reinforcement Learning (MARL) in codebase analysis for app modernization. We'll simulate a scenario where multiple agents analyze different parts of a codebase to identify modernization opportunities.

## Setup

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(42)
np.random.seed(42)

class CodeAnalysisAgent(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CodeAnalysisAgent, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)  # Output logits for BCEWithLogitsLoss

class CodebaseEnvironment:
    def __init__(self, num_modules, features_per_module, num_agents):
        self.num_modules = num_modules
        self.features_per_module = features_per_module
        self.num_agents = num_agents
        self.reset()
        
    def reset(self):
        self.codebase = torch.rand(self.num_modules, self.features_per_module)
        self.modernization_opportunities = (torch.rand(self.num_modules) > 0.7).float()
        return self.codebase.clone()
        
    def step(self, actions):
        rewards = torch.zeros(self.num_agents)
        for i, action in enumerate(actions):
            module_idx = i % self.num_modules
            prediction = (torch.sigmoid(action) > 0.5).float()
            if prediction == self.modernization_opportunities[module_idx]:
                rewards[i] = 1
            else:
                rewards[i] = -1
            
        done = True  # In this simplified version, we're done after one step
        return self.codebase.clone(), rewards, done

# Hyperparameters
num_modules = 10
features_per_module = 5
num_agents = 5
hidden_dim = 64

env = CodebaseEnvironment(num_modules, features_per_module, num_agents)
agents = [CodeAnalysisAgent(features_per_module, hidden_dim, 1) for _ in range(num_agents)]

## Training Loop

In [4]:
def train_agents(num_episodes):
    optimizers = [optim.Adam(agent.parameters(), lr=0.001) for agent in agents]
    episode_rewards = []
    loss_fn = nn.BCEWithLogitsLoss()
    
    for episode in range(num_episodes):
        state = env.reset()
        
        actions = []
        targets = []
        for i, agent in enumerate(agents):
            module_idx = i % env.num_modules
            module_features = state[module_idx].unsqueeze(0)  # Shape: [1, features_per_module]
            action = agent(module_features)
            actions.append(action)
            target = env.modernization_opportunities[module_idx].unsqueeze(0)
            targets.append(target)
        
        # Take a step in the environment
        next_state, rewards, done = env.step(actions)
        episode_rewards.append(rewards.sum().item())
        
        # Update agents
        for i, agent in enumerate(agents):
            optimizer = optimizers[i]
            optimizer.zero_grad()
            
            loss = loss_fn(actions[i], targets[i])
            loss.backward()
            optimizer.step()
        
        if episode % 100 == 0:
            avg_reward = sum(episode_rewards[-100:]) / 100
            print(f"Episode {episode}, Average Reward: {avg_reward}")
    
    return episode_rewards

# Train the agents
num_episodes = 5000
rewards = train_agents(num_episodes)

# Plot the learning curve
plt.plot(rewards)
plt.title("Learning Curve")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.show()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

## Evaluation

In [None]:
def evaluate_agents():
    state = env.reset()
    actions = [agent(state[i % num_modules].unsqueeze(0)).squeeze() for i, agent in enumerate(agents)]
    _, rewards, _ = env.step(actions)
    
    print("Modernization Opportunities:", env.modernization_opportunities)
    print("Agent Decisions:", [a > 0.5 for a in actions])
    print("Rewards:", rewards)

evaluate_agents()

## Conclusion

This PoC demonstrates how MARL can be applied to codebase analysis for app modernization. The agents learn to identify modernization opportunities in different modules of the codebase. In a real-world scenario, this could be extended to more complex code analysis tasks and larger codebases.