In [22]:
import numpy as np
import time
import copy
import random

In [1]:
import gym
import numpy.ma as ma

import torch
import torch.nn as nn

import cv2
import gym.spaces
import collections

import argparse
import time
import torch.optim as optim

from tensorboardX import SummaryWriter

In [2]:
nSamples = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
machineNames = ['m0','m1','m2','m3','m4']

In [3]:
# p - persistance
# r - ready time

# Cj oznacza moment zakończenia wykonywania zadania Jj w uszeregowaniu

# p r 
# r  >= c - p

# Game - Generate

In [160]:
class ptszGenerator(gym.Env):
    def __init__(self, k, N):
        self.action_space = k
        self.observation_space = N
        
        self.machines = None
        self.tasks = None
        self.observations = None
        
        self.schedule = []
        for i in range(self.action_space):
            self.schedule.append([])
        
        self.taskCounter = 0
    
    def seed(self, seed=None):
        print("seed not implemented")
        pass        
    
    
    def step(self, actions):
        if (actions == self.observation_space).all(): #if action is do nothing then increase current time
            self.observations[-1] += 1
            return self.observations, 0, False, ""
        
        #preprocess_action = self.preprocess_action(actions)
        #print(preprocess_action)
        #for i,a in enumerate(preprocess_action):
        for i,a in enumerate(actions):
            if a == self.observation_space: # machine 'a' does nothing
                continue
            self.observations[self.action_space*2 + 3*a + 2] = 1 # mark tasks as done

            current_time = self.observations[-1]
            p,r,_ = self.observations[self.action_space*2 + 3*a: self.action_space*2 + 3*(a+1)] # take r,p,done of selected task
            
            speed = self.observations[2*i]
            self.observations[2*i+1] = current_time + p / speed #ready_time of the current mashine
            
            #print(f"m_{i} took task {a}")
            self.schedule[i].append(a)
            self.taskCounter += 1
            
        done = False
        reward = 0
        if self.taskCounter == self.observation_space:
            done = True
            reward = self.calculateCost()
            
        
        return self.observations, reward, done, ""
    
    def reset(self):
        # returns: observation: machines, tasks, current_time
        #                       [speed, ready_time]*k, [r, p, done]*N, current_time
        # shape: (1,2*k+3*N+1)
        self.taskCounter = 0
        self.machines = None
        self.tasks = None
        self.observations = None
        
        self.schedule = []
        for i in range(self.action_space):
            self.schedule.append([])
        
        timeCounter = 0
        tasks = []
        for _ in range(self.observation_space):
            p = np.random.randint(1, 100)
            r = np.random.randint(timeCounter - 5 if timeCounter - 5 > 0 else 0 ,timeCounter + 2)
            timeCounter += int(p/3)
            
            #ready, persistance, doneBoolean
            tasks.append([p,r,0])
        #random.shuffle(tasks) ///////////////////////////////////////////// to uncomment
        self.tasks = np.array(tasks)
        
        m0 = np.random.randint(50,100)
        m1 = np.random.randint(50,100)
        m2 = np.random.randint(50,60)
        m3 = np.random.randint(50,100)
        m4 = np.random.randint(99,100)
        machines = np.array([ [m0,0], [m1,0], [m2,0] ,[m3,0] ,[m4,0]], dtype="float")
        machines[:,0] =  machines[:,0] /max(machines[:,0])

        index = range(5)
        indicies = np.random.choice( index, 5, replace=False )
        self.machines = machines[indicies]

        mechineTasks = np.concatenate((machines,tasks), axis=None)
        self.observations = np.concatenate((mechineTasks, 0), axis=None)
        
        return self.observations
    
        
    def get_valid_tasks(self):
        readyToGo = np.ones(self.observation_space)
        timeCounter = self.observations[-1]
        for i,obs in enumerate(range(self.action_space*2, self.action_space*2+self.observation_space*3, 3)):
            p,r,done = self.observations[[obs,obs+1,obs+2]]
            if r <= timeCounter and done==0: # task is ready and not done
                readyToGo[i] = 0
                                 
        return readyToGo
                               
    
    def get_valid_machines(self):
        readyMachines = []
        timeCounter = self.observations[-1]
        for i in range(self.action_space):
            speed = self.observations[2*i]
            readyTime = self.observations[2*i+1]
            
            if readyTime <= timeCounter:
                readyMachines.append(i)
        return np.array(readyMachines)
    
    
    def _get_obs(self):
        return self.observations
    
    
    def preprocess_action(self, action):
        # input: actions in shape (n,k)
        #output: actions in shape (n), where each number means id of task to take od sleep for number== -1
        filteredAction = []
        validT = self.get_valid_tasks()
        mask = np.array((*validT, 0)) # 1 means invalid
        
        valid_machines = self.get_valid_machines()        
        for i, j in enumerate(range(0, self.action_space* (self.observation_space+1) , self.observation_space+1 )):            
            a = action[j: j+ (self.observation_space+1)]
            if i not in valid_machines:
                filteredAction.append(self.observation_space)
                continue
            mask_arr = ma.masked_array(a, mask=mask) 
            argmax = mask_arr.argmax()
            
            if argmax == self.observation_space:
                filteredAction.append(argmax)
                continue
            mask[argmax] = 1
            filteredAction.append(argmax)
        return np.array(filteredAction)
        
        
    def print_observations(self):
        for i in range(self.action_space):
            print(f"machine_{i} speed: {round(self.observations[2*i],2)} readyTime: {round(self.observations[2*i+1],2)}")
        for j in range(self.observation_space):
            print(f"task_{j+1} p: {self.observations[2*(i+1)+j*3]} r: {self.observations[2*(i+1)+j*3+1]} done: {self.observations[2*(i+1)+j*3+2]}")
        
        print(self.observations[-1])
    
    
    def sample(self):
        return self.preprocess_action(np.random.rand(self.action_space*(self.observation_space+1))) #+1 because of sleep behaviour
    
    def calculateCost(self):
        cTime = [0]*self.observation_space
        p,r=0,1
        speed,readyTime = 0,1
        calculatedWeight = 0
        timeCounter = 0
        
        for machine in self.machines:
            machine[readyTime] = 0
        
        for key, tasks in enumerate(self.schedule):
            #print(f"m: {key}")
            timeCounter = 0
            #print(f"machine: {key}, {self.machines[key]}")
            sortedTasks = sorted(tasks, key=lambda i: self.tasks[i][1]) # python is really good
            for task in sortedTasks:
                #print(f"task: {task}, {self.tasks[task]}")
                while self.machines[key][readyTime] > timeCounter or self.tasks[task][r] > timeCounter:
                    timeCounter += 1 
                #print(f"machines[{key}][readyTime]: {self.machines[key][readyTime]}")

                self.machines[key][readyTime] = timeCounter + (self.tasks[task][p] / float(self.machines[key][speed]) )

                #print(f"machines[{key}]['readyTime']: {self.machines[key][readyTime]}")

                cTime[task] = timeCounter + (self.tasks[task][p] / float(self.machines[key][speed]) ) - self.tasks[task][r]
                calculatedWeight += cTime[task]

                #print(f"cTime[task]: {cTime[task]}")
                #print(f"real p: {(self.tasks[task][p] / float(self.machines[key][speed]) )}")
                #print()
         
        #for key, value in enumerate(self.schedule):
        #    print(f"{key} -> {value}")
        #
        #print(calculatedWeight)
        return calculatedWeight


In [95]:
for i,a in enumerate(range(0,255,51)):
    print(i,a)

0 0
1 51
2 102
3 153
4 204


In [96]:
x = ptszGenerator(5,50)
x.reset()


array([5.35353535e-01, 0.00000000e+00, 5.85858586e-01, 0.00000000e+00,
       5.85858586e-01, 0.00000000e+00, 7.97979798e-01, 0.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 6.00000000e+01, 1.00000000e+00,
       0.00000000e+00, 2.60000000e+01, 2.10000000e+01, 0.00000000e+00,
       6.20000000e+01, 2.60000000e+01, 0.00000000e+00, 7.80000000e+01,
       4.50000000e+01, 0.00000000e+00, 6.70000000e+01, 7.40000000e+01,
       0.00000000e+00, 9.70000000e+01, 9.60000000e+01, 0.00000000e+00,
       4.40000000e+01, 1.23000000e+02, 0.00000000e+00, 6.00000000e+00,
       1.42000000e+02, 0.00000000e+00, 8.10000000e+01, 1.45000000e+02,
       0.00000000e+00, 9.70000000e+01, 1.67000000e+02, 0.00000000e+00,
       9.20000000e+01, 2.01000000e+02, 0.00000000e+00, 6.40000000e+01,
       2.33000000e+02, 0.00000000e+00, 5.40000000e+01, 2.53000000e+02,
       0.00000000e+00, 9.50000000e+01, 2.73000000e+02, 0.00000000e+00,
       2.00000000e+00, 3.00000000e+02, 0.00000000e+00, 3.60000000e+01,
      

In [97]:
x.print_observations()

machine_0 speed: 0.54 readyTime: 0.0
machine_1 speed: 0.59 readyTime: 0.0
machine_2 speed: 0.59 readyTime: 0.0
machine_3 speed: 0.8 readyTime: 0.0
machine_4 speed: 1.0 readyTime: 0.0
task_1 p: 60.0 r: 1.0 done: 0.0
task_2 p: 26.0 r: 21.0 done: 0.0
task_3 p: 62.0 r: 26.0 done: 0.0
task_4 p: 78.0 r: 45.0 done: 0.0
task_5 p: 67.0 r: 74.0 done: 0.0
task_6 p: 97.0 r: 96.0 done: 0.0
task_7 p: 44.0 r: 123.0 done: 0.0
task_8 p: 6.0 r: 142.0 done: 0.0
task_9 p: 81.0 r: 145.0 done: 0.0
task_10 p: 97.0 r: 167.0 done: 0.0
task_11 p: 92.0 r: 201.0 done: 0.0
task_12 p: 64.0 r: 233.0 done: 0.0
task_13 p: 54.0 r: 253.0 done: 0.0
task_14 p: 95.0 r: 273.0 done: 0.0
task_15 p: 2.0 r: 300.0 done: 0.0
task_16 p: 36.0 r: 301.0 done: 0.0
task_17 p: 18.0 r: 312.0 done: 0.0
task_18 p: 75.0 r: 317.0 done: 0.0
task_19 p: 99.0 r: 343.0 done: 0.0
task_20 p: 42.0 r: 380.0 done: 0.0
task_21 p: 59.0 r: 392.0 done: 0.0
task_22 p: 16.0 r: 408.0 done: 0.0
task_23 p: 35.0 r: 412.0 done: 0.0
task_24 p: 9.0 r: 423.0 done: 

In [98]:
o, r , done, _ = x.step(x.sample())
while not done:
    o, r , done, _ = x.step(x.sample())
print(r)

[50 50 50 50 50]
[ 0 50 50 50 50]
m_0 took task 0
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50  1 50 50 50]
m_1 took task 1
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50  2 50 50]
m_2 took task 2
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50  3 50]
m_3 took task 3
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[

[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 35 50 50 50]
m_1 took task 35
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 36]
m_4 took task 36
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 37 50 50]
m_2 took task 37
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 39 50]
m_3 took task 39
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 50]
[50 50 50 50 5

In [91]:
x.reset()

array([5.45454545e-01, 0.00000000e+00, 7.37373737e-01, 0.00000000e+00,
       5.15151515e-01, 0.00000000e+00, 9.49494949e-01, 0.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 2.80000000e+01, 0.00000000e+00,
       0.00000000e+00, 4.00000000e+00, 8.00000000e+00, 0.00000000e+00,
       6.90000000e+01, 7.00000000e+00, 0.00000000e+00, 1.10000000e+01,
       2.80000000e+01, 0.00000000e+00, 3.30000000e+01, 3.30000000e+01,
       0.00000000e+00, 9.60000000e+01, 4.80000000e+01, 0.00000000e+00,
       1.50000000e+01, 7.80000000e+01, 0.00000000e+00, 8.10000000e+01,
       8.20000000e+01, 0.00000000e+00, 8.10000000e+01, 1.09000000e+02,
       0.00000000e+00, 5.50000000e+01, 1.37000000e+02, 0.00000000e+00,
       4.20000000e+01, 1.55000000e+02, 0.00000000e+00, 2.30000000e+01,
       1.68000000e+02, 0.00000000e+00, 4.40000000e+01, 1.72000000e+02,
       0.00000000e+00, 8.80000000e+01, 1.91000000e+02, 0.00000000e+00,
       3.50000000e+01, 2.21000000e+02, 0.00000000e+00, 3.00000000e+01,
      

In [69]:
x.sample()

i 0
j 0
255 255
 self.observation_space+1 51
[0.67324886 0.75800769 0.92883131 0.36812887 0.38286408 0.79052108
 0.17240238 0.66419625 0.1951614  0.25282911 0.36007897 0.97220855
 0.47120424 0.19028733 0.50323816 0.67455203 0.63110715 0.70705853
 0.30835771 0.96252748 0.49778206 0.27257921 0.97428333 0.46590919
 0.95827768 0.43536684 0.23792548 0.86009044 0.58425116 0.39750746
 0.13437513 0.26535386 0.56458639 0.06910875 0.95498814 0.16195388
 0.39980614 0.38049173 0.75287467 0.10689929 0.80057248 0.41813235
 0.92274992 0.5627994  0.95722944 0.7157407  0.81893873 0.48564819
 0.0323756  0.04280291 0.51465768]
i 1
j 51
255 255
 self.observation_space+1 51
[0.04755325 0.56622643 0.11526842 0.47885836 0.83869797 0.06154628
 0.29497451 0.5784386  0.98698227 0.28202399 0.05670812 0.56396049
 0.41413634 0.97797379 0.13107873 0.44033693 0.76627177 0.50483284
 0.30291343 0.71872469 0.67965463 0.63384386 0.56936423 0.81883581
 0.09209816 0.01751341 0.6185353  0.41403424 0.65003903 0.87701772
 0.

array([ 0, 50, 50, 50, 50])

# QGN neural network model

In [106]:
class DQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()

        # self.conv = nn.Sequential(
        #     nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=4, stride=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1),
        #     nn.ReLU()
        # )

        self.fc = nn.Sequential(
            nn.Linear(input_shape[0], 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )

    def forward(self, x):
        #conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(x.float())

    # def __init__(self, input_shape, n_actions):
    #     super(DQN, self).__init__()

    #     self.conv = nn.Sequential(
    #         nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
    #         nn.ReLU(),
    #         nn.Conv2d(32, 64, kernel_size=4, stride=2),
    #         nn.ReLU(),
    #         nn.Conv2d(64, 64, kernel_size=3, stride=1),
    #         nn.ReLU()
    #     )

    #     conv_out_size = self._get_conv_out(input_shape)
    #     self.fc = nn.Sequential(
    #         nn.Linear(conv_out_size, 512),
    #         nn.ReLU(),
    #         nn.Linear(512, n_actions)
    #     )

    # def _get_conv_out(self, shape):
    #     o = self.conv(torch.zeros(1, *shape))
    #     return int(np.prod(o.size()))

    # def forward(self, x):
    #     conv_out = self.conv(x).view(x.size()[0], -1)
    #     return self.fc(conv_out)

# DQN

In [158]:
DEVICE = False #  True if'cuda'
DEFAULT_ENV_NAME = "TaskScheduler"

MEAN_REWARD_BOUND = -2000

GAMMA = 0.99
BATCH_SIZE = 128
REPLAY_SIZE = 10000
LEARNING_RATE = 1e-4
SYNC_TARGET_FRAMES = 1000
REPLAY_START_SIZE = 10000

EPSILON_DECAY_LAST_FRAME = 300000
EPSILON_START = 1.0
EPSILON_FINAL = 0.01


Experience = collections.namedtuple(
    'Experience', field_names=['state', 'action', 'reward',
                               'done', 'new_state'])

In [35]:
class ExperienceBuffer:
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def __len__(self):
        return len(self.buffer)

    def append(self, experience):
        self.buffer.append(experience)

    def sample(self, batch_size):
        indices = np.random.choice(len(self.buffer), batch_size,
                                   replace=False)
        states, actions, rewards, dones, next_states = \
            zip(*[self.buffer[idx] for idx in indices])
        return np.array(states), np.array(actions), \
               np.array(rewards, dtype=np.float32), \
               np.array(dones, dtype=np.uint8), \
               np.array(next_states)

In [121]:
class Agent:
    def __init__(self, env, exp_buffer):
        self.env = env
        self.exp_buffer = exp_buffer
        self._reset()

    def _reset(self):
        self.state = env.reset()
        self.total_reward = 0.0

    @torch.no_grad()
    def play_step(self, net, epsilon=0.0, device="cpu"):
        done_reward = None

        if np.random.random() < epsilon:
            action = env.sample()
        else:
            state_a = np.array([self.state], copy=False)
            state_v = torch.tensor(state_a).to(device)
            q_vals_v = net(state_v)
            action = env.preprocess_action(q_vals_v[0])
            
            #_, act_v = torch.max(q_vals_v, dim=1)
            #action = int(act_v.item())

        # do step in the environment
        new_state, reward, is_done, _ = self.env.step(action)
        self.total_reward += reward

        exp = Experience(self.state, action, reward,
                         is_done, new_state)
        self.exp_buffer.append(exp)
        self.state = new_state
        if is_done:
            done_reward = self.total_reward
            self._reset()
        return done_reward


In [119]:
def calc_loss(batch, net, tgt_net, device="cpu"):
    states, actions, rewards, dones, next_states = batch

    states_v = torch.tensor(np.array(
        states, copy=False)).to(device)
    next_states_v = torch.tensor(np.array(
        next_states, copy=False)).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.BoolTensor(dones).to(device)

    state_action_values = net(states_v).gather(
        1, actions_v.unsqueeze(-1)).squeeze(-1)
    with torch.no_grad():
        next_state_values = tgt_net(next_states_v).max(1)[0]
        next_state_values[done_mask] = 0.0
        next_state_values = next_state_values.detach()

    expected_state_action_values = next_state_values * GAMMA + \
                                   rewards_v
    return nn.MSELoss()(state_action_values,
                        expected_state_action_values)

In [None]:
MACHINES = 5
TASKS = 50
action_space = MACHINES* (TASKS+1)
observation_space = (MACHINES * 2 + TASKS * 3 + 1,)

device = torch.device("cuda" if DEVICE else "cpu")

env = ptszGenerator(MACHINES,TASKS)

net = DQN(observation_space,
                    action_space).to(device) # TODO: take it from the env
tgt_net = DQN(observation_space,
                        action_space).to(device)
writer = SummaryWriter(comment="-" + DEFAULT_ENV_NAME)
print(net)

buffer = ExperienceBuffer(REPLAY_SIZE)
agent = Agent(env, buffer)
epsilon = EPSILON_START

optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
total_rewards = []
frame_idx = 0
ts_frame = 0
ts = time.time()
best_m_reward = None

while True:
    frame_idx += 1
    epsilon = max(EPSILON_FINAL, EPSILON_START -
                  frame_idx / EPSILON_DECAY_LAST_FRAME)

    reward = agent.play_step(net, epsilon, device=device)
    if reward is not None:
        total_rewards.append(reward)
        speed = (frame_idx - ts_frame) / (time.time() - ts)
        ts_frame = frame_idx
        ts = time.time()
        m_reward = np.mean(total_rewards[-100:])
        print("%d: done %d games, reward %.3f, "
              "eps %.2f, speed %.2f f/s" % (
            frame_idx, len(total_rewards), m_reward, epsilon,
            speed
        ))
        writer.add_scalar("epsilon", epsilon, frame_idx)
        writer.add_scalar("speed", speed, frame_idx)
        writer.add_scalar("reward_100", m_reward, frame_idx)
        writer.add_scalar("reward", reward, frame_idx)
        if best_m_reward is None or best_m_reward > m_reward:
            torch.save(net.state_dict(), DEFAULT_ENV_NAME +
                       "-best_%.0f.dat" % m_reward)
            if best_m_reward is not None:
                print("Best reward updated %.3f -> %.3f" % (
                    best_m_reward, m_reward))
            best_m_reward = m_reward
        if m_reward < MEAN_REWARD_BOUND:
            print("Solved in %d frames!" % frame_idx)
            break

    if len(buffer) < REPLAY_START_SIZE:
        continue

    if frame_idx % SYNC_TARGET_FRAMES == 0:
        tgt_net.load_state_dict(net.state_dict())

    optimizer.zero_grad()
    batch = buffer.sample(BATCH_SIZE)
    states, actions, rewards, dones, next_states = batch
    loss_t = calc_loss(batch, net, tgt_net, device=device)
    loss_t.backward()
    optimizer.step()
writer.close()

DQN(
  (fc): Sequential(
    (0): Linear(in_features=161, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=255, bias=True)
  )
)
905: done 1 games, reward 3903.701, eps 1.00, speed 2728.79 f/s
1721: done 2 games, reward 4669.502, eps 0.99, speed 3386.15 f/s
2577: done 3 games, reward 4254.012, eps 0.99, speed 2955.93 f/s
3528: done 4 games, reward 4172.416, eps 0.99, speed 3121.78 f/s
4333: done 5 games, reward 4607.143, eps 0.99, speed 3612.46 f/s
5116: done 6 games, reward 4528.426, eps 0.98, speed 2863.95 f/s
6033: done 7 games, reward 4674.481, eps 0.98, speed 3154.69 f/s
6949: done 8 games, reward 4569.398, eps 0.98, speed 3143.04 f/s
7738: done 9 games, reward 4450.568, eps 0.97, speed 2887.32 f/s
8670: done 10 games, reward 4455.764, eps 0.97, speed 2846.48 f/s
9532: done 11 games, reward 4491.928, eps 0.97, speed 2963.20 f/s
10417: done 12 games, reward 4547.906, eps 0.97, speed 227.13 f/s
11195: done 13 games, reward 4439.653, eps 0.96

106393: done 124 games, reward 5036.875, eps 0.65, speed 92.86 f/s
107306: done 125 games, reward 5056.926, eps 0.64, speed 88.42 f/s
108103: done 126 games, reward 5033.237, eps 0.64, speed 80.94 f/s
108917: done 127 games, reward 5055.250, eps 0.64, speed 94.89 f/s
109847: done 128 games, reward 5027.330, eps 0.63, speed 95.31 f/s
110687: done 129 games, reward 5012.880, eps 0.63, speed 87.19 f/s
111566: done 130 games, reward 4967.504, eps 0.63, speed 91.33 f/s
112397: done 131 games, reward 4974.605, eps 0.63, speed 92.11 f/s
113141: done 132 games, reward 4977.281, eps 0.62, speed 91.08 f/s
114001: done 133 games, reward 4967.426, eps 0.62, speed 89.64 f/s
114961: done 134 games, reward 4971.935, eps 0.62, speed 88.90 f/s
115865: done 135 games, reward 4971.061, eps 0.61, speed 84.09 f/s
116804: done 136 games, reward 4974.893, eps 0.61, speed 82.46 f/s
117550: done 137 games, reward 4977.314, eps 0.61, speed 63.77 f/s
118430: done 138 games, reward 4975.620, eps 0.61, speed 65.43

In [155]:
def calc_loss(batch, net, tgt_net, device="cpu"):
    states, actions, rewards, dones, next_states = batch

    states_v = torch.tensor(np.array(
        states, copy=False)).to(device)
    next_states_v = torch.tensor(np.array(
        next_states, copy=False)).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.BoolTensor(dones).to(device)


    state_action_values = net(states_v).gather(
        1, actions_v).sum(1)
    
    with torch.no_grad():
        tgt_state_values = tgt_net(next_states_v)#.max(1)[0]
        for i, j in enumerate(range(0, MACHINES* (TASKS+1) , TASKS+1 )):    
            if i==0:
                next_state_values = tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
            else:
                next_state_values += tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
        #print(f"next_state_values= {next_state_values}")
        next_state_values[done_mask] = 0.0
        next_state_values = next_state_values.detach()

    expected_state_action_values = next_state_values * GAMMA + \
                                   rewards_v
    return nn.MSELoss()(state_action_values,
                        expected_state_action_values)

In [105]:
for x in nSamples:
    timeCounter = 0
    fileData = []
    fileData.append(x)
    for _ in range(x):
        p = np.random.randint(1, 100)
        r = np.random.randint(timeCounter - 5 if timeCounter - 5 > 0 else 0 ,timeCounter + 2)
        timeCounter += int(p/3)
        
        fileData.append([p,r])
        
    with open(f'PTSZ-instancje/136723/in136723_{x}.txt', "w") as f:
        f.write(f"{str(fileData[0])}\n")
        
        m0 = np.random.randint(50,100)
        m1 = np.random.randint(50,100)
        m2 = np.random.randint(50,60)
        m3 = np.random.randint(50,100)
        m4 = np.random.randint(99,100)
        machines = np.array([m0,m1,m2,m3,m4])
        machines =  machines /max(machines)
        machines = np.random.choice( machines, 5, replace=False )
        f.write(f"{' '.join([str(x) for x in machines])}\n")
        
        instances = fileData[1:]
        random.shuffle(instances)
        for line in instances:            
            f.write(f"{' '.join([str(x) for x in line])}\n")

# Verify

In [5]:
x = 50

In [101]:
data

{1: {'p': 89, 'r': 588, 'c': 1123.578947368421},
 2: {'p': 78, 'r': 131, 'c': 519.4736842105262},
 3: {'p': 36, 'r': 405, 'c': 1369.5263157894738},
 4: {'p': 52, 'r': 363, 'c': 399.0},
 5: {'p': 64, 'r': 441, 'c': 419.0},
 6: {'p': 34, 'r': 8, 'c': 788.0},
 7: {'p': 60, 'r': 685, 'c': 87.35294117647061},
 8: {'p': 79, 'r': 299, 'c': 175.01470588235293},
 9: {'p': 38, 'r': 263, 'c': 772.0},
 10: {'p': 23, 'r': 210, 'c': 1155.0},
 11: {'p': 21, 'r': 638, 'c': 240.57352941176475},
 12: {'p': 24, 'r': 421, 'c': 563.9411764705883},
 13: {'p': 39, 'r': 245, 'c': 857.7368421052631},
 14: {'p': 4, 'r': 495, 'c': 5.012658227848078},
 15: {'p': 23, 'r': 3, 'c': 1339.0},
 16: {'p': 89, 'r': 496, 'c': 847.578947368421},
 17: {'p': 49, 'r': 374, 'c': 465.1052631578947},
 18: {'p': 5, 'r': 351, 'c': 933.0},
 19: {'p': 12, 'r': 550, 'c': 827.0},
 20: {'p': 11, 'r': 342, 'c': 16.014705882352928},
 21: {'p': 61, 'r': 667, 'c': 406.8088235294117},
 22: {'p': 31, 'r': 50, 'c': 364.0},
 23: {'p': 40, 'r':

In [68]:
taskQueue

{'m0': [28,
  22,
  42,
  43,
  4,
  6,
  5,
  35,
  24,
  23,
  47,
  37,
  50,
  44,
  18,
  27,
  15,
  10,
  19],
 'm1': [20, 8, 7, 30, 11, 29, 32, 12, 21, 26],
 'm2': [34],
 'm3': [45, 25, 36, 2, 41, 17, 33, 9, 13, 31, 16, 48, 40, 1, 3, 46, 49],
 'm4': [14, 39, 38]}

In [69]:
machines

{'m0': {'speed': '0.9583333333333334', 'readyTime': 0},
 'm1': {'speed': '0.5729166666666666', 'readyTime': 0},
 'm2': {'speed': '0.010416666666666666', 'readyTime': 0},
 'm3': {'speed': '1.0', 'readyTime': 0},
 'm4': {'speed': '0.20833333333333334', 'readyTime': 0}}

In [546]:
def calculateCost(data, taskQueue, machines):
    calculatedWeight = 0
    for key, tasks in taskQueue.items():
        print(f"m: {key}")
        timeCounter = 0
        for task in tasks:
            print(f"task: {task}, {data[task]}")
            while machines[key]['readyTime'] > timeCounter or data[task]['r'] > timeCounter:
                timeCounter += 1 
            print(f"timeCounter: {timeCounter}")
            print(f"machines[key]['readyTime']: {machines[key]['readyTime']}")
            
            machines[key]['readyTime'] = timeCounter + (data[task]['p'] / float(machines[key]['speed']) )
            
            print(f"machines[key]['readyTime']: {machines[key]['readyTime']}")
            
            data[task]['c'] = timeCounter + (data[task]['p'] / float(machines[key]['speed']) ) - data[task]['r']
            
            
            print(f"data[task]['c']: {data[task]['c']}")
            print(f"realy p: {(data[task]['p'] / float(machines[key]['speed']) )}")
            print()
        #    if timeCounter < data[task]['r']:
        #        timeCounter = data[task]['r']
            # add p time to current time
        #    timeCounter += data[task]['p']
      #      c = timeCounter
      #      data[task]['c'] = c
    
    for key, value in data.items():
        print(f"{key} -> {value}")
    # check correctness
    if not data[task]["r"] <= data[task]["c"] - data[task]["p"]: 
        print("Error")

    # verify cost
    if data[task]["c"] > data[task]["d"]:
        calculatedWeight += data[task]["w"]
    #print(calculatedWeight)
    return calculatedWeight

In [547]:
motherFolder = "out" # "PTSZ-instancje"
data = {}
indicies = ['136723'] #["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
for index  in indicies:
    print(index)
    for samples in nSamples:
        taskQueue = {}
        with open(f'{motherFolder}/{index}/out{index}_{samples}.txt', "r") as result:
            res = result.read().split('\n')
            weight = int(res[0])
            for i, name in enumerate(machineNames):
                gen = (t for t in res[i+1].split(" ") if t.isdigit())
                taskQueue[name] = [int(t) for t in gen]
    
        with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
            d = instance.read().split('\n')
            n = int(d[0])
            machines = {}
            for name, speed in zip(machineNames, d[1].split(' ')):
                machines[name] = {'speed':speed, 'readyTime':0}
            for i,line in enumerate(d[2:]):
                if line == '':
                    continue
                p,r = line.split(' ')
                p,r = int(p), int(r)
                data[i+1] = {"p": p, "r": r, "c": 0 }
        
        calculatedWeight = calculateCost(data, taskQueue, machines)
        print(calculatedWeight)
        #if calculatedWeight != weight:
        #    print(f"wrong weight. Calculated: {calculatedWeight}, read: {weight}")
        #else:
        #    print(f"Weight: {calculatedWeight} confirmed")
    break
    print()
    print()
        
        

136723
m: m0
task: 21, {'p': 31, 'r': 83, 'c': 0}
timeCounter: 83
machines[key]['readyTime']: 0
machines[key]['readyTime']: 143.1764705882353
data[task]['c']: 60.176470588235304
realy p: 60.1764705882353

task: 2, {'p': 67, 'r': 137, 'c': 0}
timeCounter: 144
machines[key]['readyTime']: 143.1764705882353
machines[key]['readyTime']: 274.05882352941177
data[task]['c']: 137.05882352941177
realy p: 130.05882352941177

task: 29, {'p': 65, 'r': 274, 'c': 0}
timeCounter: 275
machines[key]['readyTime']: 274.05882352941177
machines[key]['readyTime']: 401.1764705882353
data[task]['c']: 127.1764705882353
realy p: 126.1764705882353

task: 38, {'p': 37, 'r': 402, 'c': 0}
timeCounter: 402
machines[key]['readyTime']: 401.1764705882353
machines[key]['readyTime']: 473.8235294117647
data[task]['c']: 71.8235294117647
realy p: 71.82352941176471

task: 24, {'p': 33, 'r': 480, 'c': 0}
timeCounter: 480
machines[key]['readyTime']: 473.8235294117647
machines[key]['readyTime']: 544.0588235294117
data[task]['c']:

KeyError: 'd'

In [66]:
machines

{'m0': {'speed': '0.9583333333333334', 'readyTime': 0},
 'm1': {'speed': '0.5729166666666666', 'readyTime': 0},
 'm2': {'speed': '0.010416666666666666', 'readyTime': 0},
 'm3': {'speed': '1.0', 'readyTime': 0},
 'm4': {'speed': '0.20833333333333334', 'readyTime': 0}}

In [11]:
# p - persistance
# r - ready time
# d - oczekiwany termin zakończenia
# w - weight
# Cj oznacza moment zakończenia wykonywania zadania Jj w uszeregowaniu

# p r d w
# r  >= c - p

# TaskScheduler

In [106]:
a,b,c,d = (1,1,1,1)
totalCost = 0
data = {}

indicies = ['136723']#["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
for index  in indicies:
    for samples in nSamples:
        with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
            d = instance.read().split('\n')
            n = int(d[0])
            machines = {}
            for name, speed in zip(machineNames, d[1].split(' ')):
                machines[name] = {'speed':speed, 'readyTime':0, 'gain':0}
            for i,line in enumerate(d[2:]):
                if line == '':
                    continue
                p,r = line.split(' ')
                p,r = int(p), int(r)
                data[i+1] = {"p": p, "r": r, "c": 0, "penalty": 0 }
            dataCopy = copy.deepcopy(data)
        
        calculatedWeight = 0
        timeCounter = 0
        outSchedule = {}
        for name in machineNames:
            outSchedule[name] = []
        
        start = time.time()
        while len(data) > 0:
            readyToGo = {}
            for key, task in data.items():
                if task['r'] <= timeCounter: # add penalty ( (timeCounter+p-d)*w )
                    endTime = timeCounter+task['p']
                    penaltyEndTask = ( endTime if endTime > 0 else 0 ) * 1
                    penaltyPersistance = task['p']
                    task['penalty'] = a*penaltyEndTask + c*penaltyPersistance
                    readyToGo[key] = task
                
            if len(readyToGo) == 0: # currently there is no task to go
                #print("upsi readyToGo")
                timeCounter += 1
                continue
            #print(f"readyToGo: {readyToGo}")

            # select task
            readyToGoSorted = sorted(readyToGo.items(), key=lambda y: (y[1]['penalty']), reverse=True)
            selectedTask = readyToGoSorted[0]
            
            # select machine
            readyMachines = [d for d in zip(machines.keys(), machines.values() ) if d[1]['readyTime'] <= timeCounter]
            if len(readyMachines) == 0: # currently there is free machine
                #print("upsi readyMachines")
                timeCounter += 1
                continue
            
            
            for name, machine in machines.values():
                howLongItWillBeDisabled = 0 ##########################################
                speed = machines[name]['speed']
                
                machines[name]['gain'] = speed
            selectedMachine = max(readyMachines, key=lambda y: (y[1]['speed']))
            
            print(f"timeCounter: {timeCounter}")
            print(f"selectedTask: {selectedTask}")
            
            # remove task from list active tasks, add time, append outschedule
            data.pop(selectedTask[0])
            
            print(f"selectedMachine: {selectedMachine}")
            #timeCounter += 1
            machines[selectedMachine[0]]['readyTime'] = timeCounter + (selectedTask[1]['p'] / float(selectedMachine[1]['speed']) )
            print(machines)
            
            print()
            
            outSchedule[selectedMachine[0]].append(selectedTask[0])    
        
        end = time.time()
        #print(end-start)
        
        #cost = calculateCost(dataCopy, outSchedule)
        cost = 5
        #print(cost)
        totalCost += cost
        
        print(outSchedule)
        
        with open(f'out/{index}/out{index}_{samples}.txt', "w") as f:
            f.write(f"{cost}\n")         
                #f.write(f"{' '.join([str(x) for x in line])}\n")
            for name in machineNames:
                f.write(f"{' '.join([str(x) for x in outSchedule[name]])}\n")
        
        break
        
print(totalCost)

timeCounter: 0
selectedTask: (28, {'p': 89, 'r': 0, 'c': 0, 'penalty': 178})
selectedMachine: ('m4', {'speed': '1.0', 'readyTime': 0})
{'m0': {'speed': '0.5151515151515151', 'readyTime': 0}, 'm1': {'speed': '0.5252525252525253', 'readyTime': 0}, 'm2': {'speed': '0.7777777777777778', 'readyTime': 0}, 'm3': {'speed': '0.797979797979798', 'readyTime': 0}, 'm4': {'speed': '1.0', 'readyTime': 89.0}}

timeCounter: 29
selectedTask: (13, {'p': 71, 'r': 29, 'c': 0, 'penalty': 171})
selectedMachine: ('m3', {'speed': '0.797979797979798', 'readyTime': 0})
{'m0': {'speed': '0.5151515151515151', 'readyTime': 0}, 'm1': {'speed': '0.5252525252525253', 'readyTime': 0}, 'm2': {'speed': '0.7777777777777778', 'readyTime': 0}, 'm3': {'speed': '0.797979797979798', 'readyTime': 117.9746835443038}, 'm4': {'speed': '1.0', 'readyTime': 89.0}}

timeCounter: 49
selectedTask: (31, {'p': 50, 'r': 49, 'c': 0, 'penalty': 149})
selectedMachine: ('m2', {'speed': '0.7777777777777778', 'readyTime': 0})
{'m0': {'speed': '

In [33]:
x = {'m0':{'speed':2, 'readyTime':0},'m1':{'speed':3, 'readyTime':0},'m2':{'speed':1, 'readyTime':5}, }

In [18]:
zip(machines.keys(), machines.values() )

<zip at 0x7f9a01101588>

In [37]:
[d for d in x.values() if d['readyTime'] <= 0]

[{'speed': 2, 'readyTime': 0}, {'speed': 3, 'readyTime': 0}]

In [42]:
readyMachines = [d for d in x.values() if d['readyTime'] <= 0]
readyMachines = max(readyMachines, key=lambda y: (y['speed']))
readyMachines

{'speed': 3, 'readyTime': 0}

In [9]:
from scipy.optimize import minimize

In [10]:
x0 = (1, 1, 1, 1)

In [17]:
def costFunction(x):
    a,b,c,d = x
    totalCost = 0
    data = {}
    indicies = ["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
    for index  in indicies:
        for samples in nSamples:
            with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
                d = instance.read().split('\n')
                n = int(d[0])                
                for i,line in enumerate(d[1:]):
                    if line == '':
                        continue
                    p,r,d,w = line.split(' ')
                    p,r,d,w = int(p), int(r), int(d), int(w)
                    data[i+1] = {"p": p, "r": r, "d": d, "w": w, "c": 0, "penalty": 0 }
                dataCopy = copy.deepcopy(data)


            calculatedWeight = 0
            timeCounter = 0
            outSchedule = []

            start = time.time()
            while len(data) > 0:
                readyToGo = {}
                for key, task in data.items():
                    if task['r'] <= timeCounter: # add penalty ( (timeCounter+p-d)*w )
                        endTime = timeCounter+task['p']-task['d']
                        penaltyEndTask = ( endTime if endTime > 0 else 0 ) * task['w']
                        penaltyWeight = task['w']
                        penaltyPersistance = task['p']
                        penaltyDue = task['d'] - timeCounter
                        task['penalty'] = a*penaltyEndTask + b*penaltyWeight + c*penaltyPersistance - d*penaltyDue
                        readyToGo[key] = task

                if len(readyToGo) == 0: # currently there is no task to go
                    #print("upsi")
                    timeCounter += 1
                    continue
                #print(f"readyToGo: {readyToGo}")

                readyToGoSorted = sorted(readyToGo.items(), key=lambda y: (y[1]['penalty'], y[1]['w']), reverse=True)
                selectedTask = readyToGoSorted.pop(0)
                #print(timeCounter)
                #print(f"selectedTask: {selectedTask}")

                data.pop(selectedTask[0])
                timeCounter += selectedTask[1]['p']
                outSchedule.append(selectedTask[0])    

            end = time.time()
          #  print(int((end-start)*1000) if int((end-start)*1000) != 0 else 1)

            cost = calculateCost(dataCopy, outSchedule)
            #print(cost)
            totalCost += cost
    print(f"[{totalCost},{a},{b},{c},{d}],")
    return totalCost

In [18]:
options = {"maxiter":40, "disp":True}

In [23]:
res = minimize(costFunction, x0, method='TNC', options=options, tol=1e-2)

[499007,1.0,1.0,1.0,1183],
[499007,1.0,1.0,1.0,1183],
[499012,1.00000001,1.0,1.0,1183],
[499007,1.0,1.00000001,1.0,1183],
[499030,1.0,1.0,1.00000001,1183],
[499007,1.0,1.0,1.0,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,1.0000000036691032,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.00000001,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999980877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,1.0000000036691032,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.00000001,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999980877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999968345517,1.0,0.9999999854389375,1183],
[499007,0.9999999968345517,1.0,0.9999999854389375,1183],
[499007,1.0000000

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = [1,2,3]

In [None]:
plt.plot(x)
plt.title("Minimalizacja funkcji celu")
plt.xaxis("iteracja")
plt.yaxis("Koszt")

In [94]:
costFunction(x)

1
7
13
25
17
19
26
33
38
61
1
4
10
17
29
42
64
85
99
140
1
4
11
19
30
45
63
152
111
133
1
3
15
16
25
32
48
61
82
88
1
1
6
13
30
36
48
64
94
108
1
3
6
11
19
27
38
52
62
79
1
3
8
22
29
38
56
71
105
124
1
4
10
17
26
39
53
69
105
107
1
1
1
2
2
4
7
6
9
10
1
3
6
10
16
26
33
54
57
68
1
3
7
15
22
45
45
56
75
108
1
1
1
1
2
4
5
7
8
11
1
4
11
21
32
47
74
87
110
146
1
3
5
8
14
28
34
39
54
84


385570

In [85]:
x = (-140036.8041378972, 2.5486165590751364, -0.1998934445512417, 1183)