In [1]:
import torch 

In [2]:
torch.backends.cudnn.benchmark = True

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import random

# Deep Neural Network for Burst Time Classification
class BurstTimeClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BurstTimeClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Deep Q-Network
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, action_size)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Job Scheduling Environment
class JobSchedulingEnv:
    def __init__(self, k, N):
        self.k = k  # number of resource types
        self.N = N  # maximum burst time
        self.resources = np.zeros(k)
        self.backlog = [[] for _ in range(N)]
        self.current_time = 0
        
    def reset(self):
        self.resources = np.zeros(self.k)
        self.backlog = [[] for _ in range(self.N)]
        self.current_time = 0
        return self._get_state()
        
    def step(self, action):
        reward = 0
        done = False
        
        if self.backlog[action]:
            job = self.backlog[action].pop(0)
            if np.all(self.resources + job['resources'] <= 1):
                self.resources += job['resources']
                completion_time = self.current_time + action + 1
                slowdown = completion_time / job['burst_time']
                reward = -slowdown
            else:
                self.backlog[action].append(job)
                reward = -10  # Penalty for invalid action
        else:
            reward = -10  # Penalty for selecting empty backlog
        
        self.current_time += 1
        if self.current_time >= 1000 or all(len(b) == 0 for b in self.backlog):
            done = True
        
        return self._get_state(), reward, done
        
    def _get_state(self):
        state = np.concatenate([self.resources] + [np.array([len(b)]) for b in self.backlog])
        return state

# DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.gamma = 1.0  # discount factor
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.0001
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = DQN(state_size, action_size).to(self.device)
        self.target_model = DQN(state_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        act_values = self.model(state)
        return np.argmax(act_values.cpu().data.numpy())
        
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            next_state = torch.FloatTensor(next_state).unsqueeze(0).to(self.device)
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.target_model(next_state).cpu().data.numpy())
            target_f = self.model(state)
            target_f[0][action] = target
            loss = nn.MSELoss()(self.model(state), target_f)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

# Function to generate job signatures
def generate_job_signature():
    # This is a placeholder. In reality, you would generate this from the ELF file
    return np.random.rand(10)  # Assuming 10 features in the job signature

# Function to train the Burst Time Classifier
def train_burst_time_classifier(classifier, num_samples=100000):
    optimizer = optim.Adam(classifier.parameters(), lr=0.0001)
    criterion = nn.CrossEntropyLoss()
    
    for _ in range(num_samples):
        job_signature = generate_job_signature()
        burst_time = np.random.randint(1, 11)  # Assuming 10 burst time classes
        
        optimizer.zero_grad()
        outputs = classifier(torch.FloatTensor(job_signature))
        loss = criterion(outputs.unsqueeze(0), torch.LongTensor([burst_time-1]))
        loss.backward()
        optimizer.step()

# Main training loop
def train():
    k = 2  # number of resource types
    N = 10  # maximum burst time
    
    env = JobSchedulingEnv(k, N)
    state_size = k + N
    action_size = N
    agent = DQNAgent(state_size, action_size)
    
    burst_time_classifier = BurstTimeClassifier(10, 100, N)  # Assuming 10 features in job signature
    train_burst_time_classifier(burst_time_classifier)
    
    batch_size = 32
    episodes = 1000
    
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
            
            if done:
                agent.update_target_model()
                print(f"Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
                break
    
    torch.save(agent.model.state_dict(), "job_scheduler_dqn.pth")
    torch.save(burst_time_classifier.state_dict(), "burst_time_classifier.pth")

if __name__ == "__main__":
    train()