In [1]:
import numpy as np
import time
import copy
import random

In [2]:
import gym
import numpy.ma as ma

import torch
import torch.nn as nn

import cv2
import gym.spaces
import collections

import argparse
import time
import torch.optim as optim

from tensorboardX import SummaryWriter

In [3]:
nSamples = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
machineNames = ['m0','m1','m2','m3','m4']

In [4]:
# p - persistance
# r - ready time

# Cj oznacza moment zakończenia wykonywania zadania Jj w uszeregowaniu

# p r 
# r  >= c - p

# Game - Generate

In [211]:
class ptszGenerator(gym.Env):
    def __init__(self, k, N):
        self.action_space = k
        self.observation_space = N
        
        self.machines = None
        self.tasks = None
        self.observations = None
        
        self.schedule = []
        for i in range(self.action_space):
            self.schedule.append([])
        
        self.taskCounter = 0
    
    def seed(self, seed=None):
        print("seed not implemented")
        pass        
    
    
    def step(self, actions):
        if (actions == self.observation_space).all(): #if action is do nothing then increase current time to allow new machine or new task
            #self.current_time
            #if there is no active task 
            i = self.action_space - 1
            theClosestTaskTime = np.inf
            thereIsActiveTask = False
            current_time = self.observations[-1]
            for j in range(self.observation_space):
                r = self.observations[2*(i+1)+j*3+1]
                done = self.observations[2*(i+1)+j*3+2]
                if not done and r < current_time:
                    #print("There is active task")
                    thereIsActiveTask = True
                    break # there is active task
                elif not done:
                    if r < theClosestTaskTime:
                        theClosestTaskTime = r
            if not thereIsActiveTask:
                self.observations[-1] += theClosestTaskTime
                return self.observations, 0, False, ""
            #else there is active task but there is no active machine
        
            readyTime = []
            for i in range(self.action_space):
                readyTime.append(self.observations[2*i+1])
            #print(f"readyTime: {readyTime} theClosestTaskTime: {theClosestTaskTime} currentTime: {current_time}")
            readyTime = np.array(readyTime)
            try:
                minNextMachine = np.min(readyTime[readyTime > current_time])
            except:
                print("except")
                return self.observations, 0, False, ""
            #print(f"minNextMachine: {minNextMachine}")
          #  if len(readyTime[readyTime > current_time]) != self.action_space:
          #      print(f"len: {len(readyTime[readyTime > current_time])}")
          #      print("ocho we wait to next machine here")
          #      print(f"readyTime: {readyTime} theClosestTaskTime: {theClosestTaskTime} currentTime: {current_time}")
          #      print(f"minNextMachine: {minNextMachine}")
            self.observations[-1] = minNextMachine
            return self.observations, 0, False, ""
        
        #preprocess_action = self.preprocess_action(actions)
        #print(preprocess_action)
        #for i,a in enumerate(preprocess_action):
        for i,a in enumerate(actions):
            if a == self.observation_space: # machine 'a' does nothing
                continue
            self.observations[self.action_space*2 + 3*a + 2] = 1 # mark tasks as done

            current_time = self.observations[-1]
            p,r,_ = self.observations[self.action_space*2 + 3*a: self.action_space*2 + 3*(a+1)] # take r,p,done of selected task
            
            speed = self.observations[2*i]
            self.observations[2*i+1] = current_time + p / speed #ready_time of the current mashine
            
            #print(f"m_{i} took task {a}")
            self.schedule[i].append(a)
            self.taskCounter += 1
            
        done = False
        reward = 0
        if self.taskCounter == self.observation_space:
            done = True
            reward = self.calculateCost()
            
        
        return self.observations, reward, done, ""
    
    def reset(self):
        # returns: observation: machines, tasks, current_time
        #                       [speed, ready_time]*k, [r, p, done]*N, current_time
        # shape: (1,2*k+3*N+1)
        self.taskCounter = 0
        self.machines = None
        self.tasks = None
        self.observations = None
        
        self.schedule = []
        for i in range(self.action_space):
            self.schedule.append([])
        
        timeCounter = 0
        tasks = []
        for _ in range(self.observation_space):
            p = np.random.randint(1, 100)
            r = np.random.randint(timeCounter - 5 if timeCounter - 5 > 0 else 0 ,timeCounter + 2)
            timeCounter += int(p/3)
            
            #ready, persistance, doneBoolean
            tasks.append([p,r,0])
        #random.shuffle(tasks) ///////////////////////////////////////////// to uncomment
        self.tasks = np.array(tasks)
        
        m0 = np.random.randint(30,100)
        m1 = np.random.randint(30,100)
        m2 = np.random.randint(30,35)
        m3 = np.random.randint(30,100)
        m4 = np.random.randint(99,100)
        machines = np.array([ [m0,0], [m1,0], [m2,0] ,[m3,0] ,[m4,0]], dtype="float")
        machines[:,0] =  machines[:,0] /max(machines[:,0])

        index = range(5)
        indicies = np.random.choice( index, 5, replace=False )
        self.machines = machines[indicies]

        mechineTasks = np.concatenate((machines,tasks), axis=None)
        self.observations = np.concatenate((mechineTasks, 0), axis=None)
        
        return self.observations
    
        
    def get_valid_tasks(self):
        readyToGo = np.ones(self.observation_space)
        timeCounter = self.observations[-1]
        for i,obs in enumerate(range(self.action_space*2, self.action_space*2+self.observation_space*3, 3)):
            p,r,done = self.observations[[obs,obs+1,obs+2]]
            if r <= timeCounter and done==0: # task is ready and not done
                readyToGo[i] = 0
                                 
        return readyToGo
                               
    
    def get_valid_machines(self):
        readyMachines = []
        timeCounter = self.observations[-1]
        for i in range(self.action_space):
            speed = self.observations[2*i]
            readyTime = self.observations[2*i+1]
            
            if readyTime <= timeCounter:
                readyMachines.append(i)
        return np.array(readyMachines)
    
    
    def _get_obs(self):
        return self.observations
    
    
    def preprocess_action(self, action):
        # input: actions in shape (n,k)
        #output: actions in shape (n), where each number means id of task to take od sleep for number== -1
        filteredAction = []
        validT = self.get_valid_tasks()
        mask = np.array((*validT, 0)) # 1 means invalid
        
        valid_machines = self.get_valid_machines()        
        for i, j in enumerate(range(0, self.action_space* (self.observation_space+1) , self.observation_space+1 )):            
            a = action[j: j+ (self.observation_space+1)]
            if i not in valid_machines:
                filteredAction.append(self.observation_space)
                continue
            mask_arr = ma.masked_array(a, mask=mask) 
            argmin = mask_arr.argmin()
            
            if argmin == self.observation_space:
                filteredAction.append(argmin)
                continue
            mask[argmin] = 1
            filteredAction.append(argmin)
        return np.array(filteredAction)
        
        
    def print_observations(self):
        for i in range(self.action_space):
            print(f"machine_{i} speed: {round(self.observations[2*i],2)} readyTime: {round(self.observations[2*i+1],2)}")
        for j in range(self.observation_space):
            print(f"task_{j+1} p: {self.observations[2*(i+1)+j*3]} r: {self.observations[2*(i+1)+j*3+1]} done: {self.observations[2*(i+1)+j*3+2]}")
        
        print(self.observations[-1])
    
    
    def sample(self):
        return self.preprocess_action(np.random.rand(self.action_space*(self.observation_space+1))) #+1 because of sleep behaviour
    
    def calculateCost(self):
        cTime = [0]*self.observation_space
        p,r=0,1
        speed,readyTime = 0,1
        calculatedWeight = 0
        timeCounter = 0
        
        for machine in self.machines:
            machine[readyTime] = 0
        
        for key, tasks in enumerate(self.schedule):
            #print(f"m: {key}")
            timeCounter = 0
            #print(f"machine: {key}, {self.machines[key]}")
            sortedTasks = sorted(tasks, key=lambda i: self.tasks[i][1]) # python is really good
            for task in sortedTasks:
                #print(f"task: {task}, {self.tasks[task]}")
                if self.machines[key][readyTime] > timeCounter or self.tasks[task][r] > timeCounter:
                    timeCounter = max(self.machines[key][readyTime],  self.tasks[task][r])
                #print(f"machines[{key}][readyTime]: {self.machines[key][readyTime]}")

                self.machines[key][readyTime] = timeCounter + (self.tasks[task][p] / float(self.machines[key][speed]) )

                #print(f"machines[{key}]['readyTime']: {self.machines[key][readyTime]}")

                cTime[task] = timeCounter + (self.tasks[task][p] / float(self.machines[key][speed]) ) - self.tasks[task][r]
                calculatedWeight += cTime[task]

                #print(f"cTime[task]: {cTime[task]}")
                #print(f"real p: {(self.tasks[task][p] / float(self.machines[key][speed]) )}")
                #print()
         
        #for key, value in enumerate(self.schedule):
        #    print(f"{key} -> {value}")
        #
        #print(calculatedWeight)
        return calculatedWeight / self.observation_space


In [212]:
for i,a in enumerate(range(0,255,51)):
    print(i,a)

0 0
1 51
2 102
3 153
4 204


In [213]:
x = ptszGenerator(5,50)
x.reset()


array([6.86868687e-01, 0.00000000e+00, 9.49494949e-01, 0.00000000e+00,
       3.13131313e-01, 0.00000000e+00, 7.77777778e-01, 0.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 5.90000000e+01, 0.00000000e+00,
       0.00000000e+00, 1.00000000e+01, 1.60000000e+01, 0.00000000e+00,
       3.00000000e+00, 2.30000000e+01, 0.00000000e+00, 1.30000000e+01,
       2.30000000e+01, 0.00000000e+00, 5.20000000e+01, 2.40000000e+01,
       0.00000000e+00, 3.30000000e+01, 4.20000000e+01, 0.00000000e+00,
       1.10000000e+01, 5.40000000e+01, 0.00000000e+00, 7.50000000e+01,
       5.30000000e+01, 0.00000000e+00, 7.50000000e+01, 8.10000000e+01,
       0.00000000e+00, 6.00000000e+00, 1.05000000e+02, 0.00000000e+00,
       8.30000000e+01, 1.06000000e+02, 0.00000000e+00, 4.70000000e+01,
       1.35000000e+02, 0.00000000e+00, 1.30000000e+01, 1.51000000e+02,
       0.00000000e+00, 3.20000000e+01, 1.53000000e+02, 0.00000000e+00,
       8.80000000e+01, 1.62000000e+02, 0.00000000e+00, 8.50000000e+01,
      

In [220]:
x.print_observations()

machine_0 speed: 0.64 readyTime: 0.0
machine_1 speed: 0.89 readyTime: 0.0
machine_2 speed: 0.34 readyTime: 0.0
machine_3 speed: 0.41 readyTime: 0.0
machine_4 speed: 1.0 readyTime: 0.0
task_1 p: 9.0 r: 1.0 done: 0.0
task_2 p: 92.0 r: 0.0 done: 0.0
task_3 p: 59.0 r: 33.0 done: 0.0
task_4 p: 40.0 r: 48.0 done: 0.0
task_5 p: 3.0 r: 66.0 done: 0.0
task_6 p: 2.0 r: 62.0 done: 0.0
task_7 p: 80.0 r: 61.0 done: 0.0
task_8 p: 60.0 r: 90.0 done: 0.0
task_9 p: 14.0 r: 110.0 done: 0.0
task_10 p: 70.0 r: 113.0 done: 0.0
task_11 p: 38.0 r: 134.0 done: 0.0
task_12 p: 32.0 r: 146.0 done: 0.0
task_13 p: 63.0 r: 158.0 done: 0.0
task_14 p: 28.0 r: 177.0 done: 0.0
task_15 p: 59.0 r: 190.0 done: 0.0
task_16 p: 83.0 r: 206.0 done: 0.0
task_17 p: 67.0 r: 232.0 done: 0.0
task_18 p: 37.0 r: 259.0 done: 0.0
task_19 p: 7.0 r: 271.0 done: 0.0
task_20 p: 84.0 r: 273.0 done: 0.0
task_21 p: 9.0 r: 301.0 done: 0.0
task_22 p: 41.0 r: 304.0 done: 0.0
task_23 p: 15.0 r: 313.0 done: 0.0
task_24 p: 91.0 r: 319.0 done: 0.0


In [215]:
o, r , done, _ = x.step(x.sample())
while not done:
    o, r , done, _ = x.step(x.sample())
print(r)

198.2916484891829


In [216]:
x.reset()

array([6.36363636e-01, 0.00000000e+00, 8.88888889e-01, 0.00000000e+00,
       3.43434343e-01, 0.00000000e+00, 4.14141414e-01, 0.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 9.00000000e+00, 1.00000000e+00,
       0.00000000e+00, 9.20000000e+01, 0.00000000e+00, 0.00000000e+00,
       5.90000000e+01, 3.30000000e+01, 0.00000000e+00, 4.00000000e+01,
       4.80000000e+01, 0.00000000e+00, 3.00000000e+00, 6.60000000e+01,
       0.00000000e+00, 2.00000000e+00, 6.20000000e+01, 0.00000000e+00,
       8.00000000e+01, 6.10000000e+01, 0.00000000e+00, 6.00000000e+01,
       9.00000000e+01, 0.00000000e+00, 1.40000000e+01, 1.10000000e+02,
       0.00000000e+00, 7.00000000e+01, 1.13000000e+02, 0.00000000e+00,
       3.80000000e+01, 1.34000000e+02, 0.00000000e+00, 3.20000000e+01,
       1.46000000e+02, 0.00000000e+00, 6.30000000e+01, 1.58000000e+02,
       0.00000000e+00, 2.80000000e+01, 1.77000000e+02, 0.00000000e+00,
       5.90000000e+01, 1.90000000e+02, 0.00000000e+00, 8.30000000e+01,
      

In [217]:
x.sample()

array([50, 50, 50,  1, 50])

In [218]:
x.observations[-1] = 1

In [219]:
x.sample()

array([ 1,  0, 50, 50, 50])

# QGN neural network model

In [223]:
class DQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()

        # self.conv = nn.Sequential(
        #     nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=4, stride=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1),
        #     nn.ReLU()
        # )

        self.fc = nn.Sequential(
            nn.Linear(input_shape[0], 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, n_actions)
        )

    def forward(self, x):
        #conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(x.float())

    # def __init__(self, input_shape, n_actions):
    #     super(DQN, self).__init__()

    #     self.conv = nn.Sequential(
    #         nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
    #         nn.ReLU(),
    #         nn.Conv2d(32, 64, kernel_size=4, stride=2),
    #         nn.ReLU(),
    #         nn.Conv2d(64, 64, kernel_size=3, stride=1),
    #         nn.ReLU()
    #     )

    #     conv_out_size = self._get_conv_out(input_shape)
    #     self.fc = nn.Sequential(
    #         nn.Linear(conv_out_size, 512),
    #         nn.ReLU(),
    #         nn.Linear(512, n_actions)
    #     )

    # def _get_conv_out(self, shape):
    #     o = self.conv(torch.zeros(1, *shape))
    #     return int(np.prod(o.size()))

    # def forward(self, x):
    #     conv_out = self.conv(x).view(x.size()[0], -1)
    #     return self.fc(conv_out)

# DQN

In [205]:
DEVICE = False #  True if'cuda'
DEFAULT_ENV_NAME = "TaskScheduler"

MEAN_REWARD_BOUND = 150

GAMMA = 0.99
BATCH_SIZE = 128
REPLAY_SIZE = 10000
LEARNING_RATE = 1e-4
SYNC_TARGET_FRAMES = 1000
REPLAY_START_SIZE = 1000

EPSILON_DECAY_LAST_FRAME = 100000
EPSILON_START = 1.0
EPSILON_FINAL = 0.01


Experience = collections.namedtuple(
    'Experience', field_names=['state', 'action', 'reward',
                               'done', 'new_state'])

In [206]:
class ExperienceBuffer:
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def __len__(self):
        return len(self.buffer)

    def append(self, experience):
        self.buffer.append(experience)

    def sample(self, batch_size):
        indices = np.random.choice(len(self.buffer), batch_size,
                                   replace=False)
        states, actions, rewards, dones, next_states = \
            zip(*[self.buffer[idx] for idx in indices])
        return np.array(states), np.array(actions), \
               np.array(rewards, dtype=np.float32), \
               np.array(dones, dtype=np.uint8), \
               np.array(next_states)

In [207]:
class Agent:
    def __init__(self, env, exp_buffer):
        self.env = env
        self.exp_buffer = exp_buffer
        self._reset()

    def _reset(self):
        self.state = env.reset()
        self.total_reward = 0.0

    @torch.no_grad()
    def play_step(self, net, epsilon=0.0, device="cpu"):
        done_reward = None

        if np.random.random() < epsilon:
            action = env.sample()
        else:
            state_a = np.array([self.state], copy=False)
            state_v = torch.tensor(state_a).to(device)
            q_vals_v = net(state_v)
            action = env.preprocess_action(q_vals_v[0])
            
            #_, act_v = torch.max(q_vals_v, dim=1)
            #action = int(act_v.item())

        # do step in the environment
        new_state, reward, is_done, _ = self.env.step(action)
        self.total_reward += reward

        exp = Experience(self.state, action, reward,
                         is_done, new_state)
        self.exp_buffer.append(exp)
        self.state = new_state
        if is_done:
            done_reward = self.total_reward
            self._reset()
        return done_reward


In [208]:
def calc_loss(batch, net, tgt_net, device="cpu"):
    states, actions, rewards, dones, next_states = batch

    states_v = torch.tensor(np.array(
        states, copy=False)).to(device)
    next_states_v = torch.tensor(np.array(
        next_states, copy=False)).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.BoolTensor(dones).to(device)


    state_action_values = net(states_v).gather(
        1, actions_v).sum(1)
    
    with torch.no_grad():
        tgt_state_values = tgt_net(next_states_v)#.max(1)[0]
        for i, j in enumerate(range(0, MACHINES* (TASKS+1) , TASKS+1 )):    
            if i==0:
                next_state_values = tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
            else:
                next_state_values += tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
        #print(f"next_state_values= {next_state_values}")
        next_state_values[done_mask] = 0.0
        next_state_values = next_state_values.detach()

    expected_state_action_values = next_state_values * GAMMA + \
                                   rewards_v
    return nn.MSELoss()(state_action_values,
                        expected_state_action_values)

In [None]:
MACHINES = 5
TASKS = 50
action_space = MACHINES* (TASKS+1)
observation_space = (MACHINES * 2 + TASKS * 3 + 1,)

device = torch.device("cuda" if DEVICE else "cpu")

env = ptszGenerator(MACHINES,TASKS)

net = DQN(observation_space,
                    action_space).to(device) # TODO: take it from the env
tgt_net = DQN(observation_space,
                        action_space).to(device)
writer = SummaryWriter(comment="-" + DEFAULT_ENV_NAME)
print(net)

buffer = ExperienceBuffer(REPLAY_SIZE)
agent = Agent(env, buffer)
epsilon = EPSILON_START

optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
total_rewards = []
frame_idx = 0
ts_frame = 0
ts = time.time()
best_m_reward = None

while True:
    frame_idx += 1
    epsilon = max(EPSILON_FINAL, EPSILON_START -
                  frame_idx / EPSILON_DECAY_LAST_FRAME)

    reward = agent.play_step(net, epsilon, device=device)
    if reward is not None:
        total_rewards.append(reward)
        speed = (frame_idx - ts_frame) / (time.time() - ts)
        ts_frame = frame_idx
        ts = time.time()
        m_reward = np.mean(total_rewards[-100:])
        print("%d: done %d games, reward %.3f, "
              "eps %.2f, speed %.2f f/s" % (
            frame_idx, len(total_rewards), m_reward, epsilon,
            speed
        ))
        writer.add_scalar("epsilon", epsilon, frame_idx)
        writer.add_scalar("speed", speed, frame_idx)
        writer.add_scalar("reward_100", m_reward, frame_idx)
        writer.add_scalar("reward", reward, frame_idx)
        if best_m_reward is None or best_m_reward > m_reward:
            torch.save(net.state_dict(), DEFAULT_ENV_NAME +
                       "-best_%.0f.dat" % m_reward)
            if best_m_reward is not None:
                print("Best reward updated %.3f -> %.3f" % (
                    best_m_reward, m_reward))
            best_m_reward = m_reward
        if m_reward < MEAN_REWARD_BOUND:
            print("Solved in %d frames!" % frame_idx)
            break

    if len(buffer) < REPLAY_START_SIZE:
        continue

    if frame_idx % SYNC_TARGET_FRAMES == 0:
        tgt_net.load_state_dict(net.state_dict())

    optimizer.zero_grad()
    batch = buffer.sample(BATCH_SIZE)
    states, actions, rewards, dones, next_states = batch
    loss_t = calc_loss(batch, net, tgt_net, device=device)
    loss_t.backward()
    optimizer.step()
writer.close()

DQN(
  (fc): Sequential(
    (0): Linear(in_features=161, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=255, bias=True)
  )
)
88: done 1 games, reward 184.073, eps 1.00, speed 1234.34 f/s
168: done 2 games, reward 247.674, eps 1.00, speed 1364.71 f/s
265: done 3 games, reward 352.808, eps 1.00, speed 1622.37 f/s
343: done 4 games, reward 285.954, eps 1.00, speed 2324.66 f/s
425: done 5 games, reward 272.259, eps 1.00, speed 2293.16 f/s
514: done 6 games, reward 268.287, eps 0.99, speed 2529.45 f/s
606: done 7 games, reward 302.661, eps 0.99, speed 2869.03 f/s
694: done 8 games, reward 304.955, eps 0.99, speed 2921.40 f/s
787: done 9 games, reward 323.106, eps 0.99, speed 3045.50 f/s
869: done 10 games, reward 327.571, eps 0.99, speed 2349.57 f/s
952: done 11 games, reward 327.173, eps 0.99, speed 2529.44 f/s
1034: done 12 games, reward 332.066, eps 0.99, speed 239.

10803: done 127 games, reward 281.891, eps 0.89, speed 87.75 f/s
10889: done 128 games, reward 280.854, eps 0.89, speed 82.23 f/s
10981: done 129 games, reward 279.646, eps 0.89, speed 79.17 f/s
11059: done 130 games, reward 276.429, eps 0.89, speed 86.04 f/s
11152: done 131 games, reward 276.856, eps 0.89, speed 85.34 f/s
11245: done 132 games, reward 279.467, eps 0.89, speed 87.19 f/s
11334: done 133 games, reward 278.916, eps 0.89, speed 84.94 f/s
11417: done 134 games, reward 276.427, eps 0.89, speed 82.99 f/s
11509: done 135 games, reward 275.406, eps 0.88, speed 84.23 f/s
11595: done 136 games, reward 276.669, eps 0.88, speed 83.17 f/s
11681: done 137 games, reward 278.594, eps 0.88, speed 88.69 f/s
11767: done 138 games, reward 276.597, eps 0.88, speed 84.09 f/s
11848: done 139 games, reward 273.421, eps 0.88, speed 81.73 f/s
11931: done 140 games, reward 272.567, eps 0.88, speed 87.27 f/s
12020: done 141 games, reward 270.348, eps 0.88, speed 82.92 f/s
12096: done 142 games, re

21621: done 253 games, reward 268.100, eps 0.78, speed 82.73 f/s
21705: done 254 games, reward 268.900, eps 0.78, speed 83.71 f/s
21782: done 255 games, reward 269.405, eps 0.78, speed 72.23 f/s
21862: done 256 games, reward 269.153, eps 0.78, speed 82.07 f/s
21953: done 257 games, reward 272.799, eps 0.78, speed 84.44 f/s
22032: done 258 games, reward 272.215, eps 0.78, speed 84.86 f/s
22124: done 259 games, reward 273.406, eps 0.78, speed 82.49 f/s
22216: done 260 games, reward 271.356, eps 0.78, speed 82.63 f/s
22302: done 261 games, reward 272.677, eps 0.78, speed 85.64 f/s
22392: done 262 games, reward 273.222, eps 0.78, speed 86.40 f/s
22471: done 263 games, reward 273.334, eps 0.78, speed 84.70 f/s
22552: done 264 games, reward 275.746, eps 0.77, speed 85.66 f/s
22634: done 265 games, reward 276.455, eps 0.77, speed 81.35 f/s
22718: done 266 games, reward 278.648, eps 0.77, speed 84.29 f/s
22804: done 267 games, reward 274.936, eps 0.77, speed 77.51 f/s
22884: done 268 games, re

32443: done 380 games, reward 249.374, eps 0.68, speed 90.71 f/s
32526: done 381 games, reward 248.947, eps 0.67, speed 81.71 f/s
32607: done 382 games, reward 249.895, eps 0.67, speed 94.43 f/s
32694: done 383 games, reward 250.637, eps 0.67, speed 96.98 f/s
32782: done 384 games, reward 250.299, eps 0.67, speed 80.30 f/s
32866: done 385 games, reward 249.914, eps 0.67, speed 91.34 f/s
32956: done 386 games, reward 252.309, eps 0.67, speed 101.25 f/s
33032: done 387 games, reward 248.435, eps 0.67, speed 96.56 f/s
33107: done 388 games, reward 247.838, eps 0.67, speed 94.89 f/s
33195: done 389 games, reward 247.749, eps 0.67, speed 88.79 f/s
33283: done 390 games, reward 249.051, eps 0.67, speed 85.30 f/s
33374: done 391 games, reward 248.834, eps 0.67, speed 97.01 f/s
33461: done 392 games, reward 249.194, eps 0.67, speed 83.66 f/s
33549: done 393 games, reward 247.364, eps 0.66, speed 88.27 f/s
33645: done 394 games, reward 250.875, eps 0.66, speed 98.38 f/s
33724: done 395 games, r

43446: done 507 games, reward 242.796, eps 0.57, speed 99.37 f/s
43527: done 508 games, reward 241.058, eps 0.56, speed 85.04 f/s
43616: done 509 games, reward 240.927, eps 0.56, speed 93.43 f/s
43696: done 510 games, reward 240.473, eps 0.56, speed 95.46 f/s
43789: done 511 games, reward 241.689, eps 0.56, speed 73.16 f/s
43885: done 512 games, reward 242.567, eps 0.56, speed 90.91 f/s
43974: done 513 games, reward 240.307, eps 0.56, speed 72.09 f/s
44050: done 514 games, reward 237.859, eps 0.56, speed 73.03 f/s
44127: done 515 games, reward 237.116, eps 0.56, speed 90.96 f/s
44210: done 516 games, reward 236.151, eps 0.56, speed 89.81 f/s
44298: done 517 games, reward 236.236, eps 0.56, speed 83.85 f/s
44378: done 518 games, reward 237.115, eps 0.56, speed 88.20 f/s
44462: done 519 games, reward 238.696, eps 0.56, speed 94.23 f/s
44551: done 520 games, reward 238.971, eps 0.55, speed 88.84 f/s
44641: done 521 games, reward 239.097, eps 0.55, speed 87.30 f/s
44727: done 522 games, re

54348: done 633 games, reward 282.395, eps 0.46, speed 90.43 f/s
54440: done 634 games, reward 283.761, eps 0.46, speed 93.72 f/s
54526: done 635 games, reward 283.482, eps 0.45, speed 93.21 f/s
54613: done 636 games, reward 283.220, eps 0.45, speed 93.19 f/s
54696: done 637 games, reward 283.328, eps 0.45, speed 84.10 f/s
54784: done 638 games, reward 283.928, eps 0.45, speed 73.87 f/s
54871: done 639 games, reward 284.949, eps 0.45, speed 89.03 f/s
54962: done 640 games, reward 287.636, eps 0.45, speed 86.85 f/s
55045: done 641 games, reward 285.379, eps 0.45, speed 81.74 f/s
55126: done 642 games, reward 284.943, eps 0.45, speed 89.01 f/s
55210: done 643 games, reward 283.550, eps 0.45, speed 82.56 f/s
55298: done 644 games, reward 284.349, eps 0.45, speed 90.63 f/s
55375: done 645 games, reward 279.750, eps 0.45, speed 92.94 f/s
55457: done 646 games, reward 279.675, eps 0.45, speed 83.11 f/s
55542: done 647 games, reward 280.638, eps 0.44, speed 92.07 f/s
55633: done 648 games, re

65086: done 756 games, reward 255.346, eps 0.35, speed 96.14 f/s
65170: done 757 games, reward 254.639, eps 0.35, speed 85.88 f/s
65265: done 758 games, reward 256.077, eps 0.35, speed 85.47 f/s
65353: done 759 games, reward 255.446, eps 0.35, speed 79.73 f/s
65439: done 760 games, reward 256.306, eps 0.35, speed 79.04 f/s
65523: done 761 games, reward 256.232, eps 0.34, speed 84.15 f/s
65610: done 762 games, reward 256.092, eps 0.34, speed 85.92 f/s
65712: done 763 games, reward 254.511, eps 0.34, speed 89.91 f/s
65800: done 764 games, reward 254.415, eps 0.34, speed 85.33 f/s
65886: done 765 games, reward 253.960, eps 0.34, speed 88.74 f/s
65973: done 766 games, reward 253.183, eps 0.34, speed 92.11 f/s
66052: done 767 games, reward 254.488, eps 0.34, speed 95.50 f/s
66135: done 768 games, reward 255.598, eps 0.34, speed 84.09 f/s
66217: done 769 games, reward 254.732, eps 0.34, speed 85.81 f/s
except
66311: done 770 games, reward 256.380, eps 0.34, speed 90.55 f/s
66389: done 771 ga

76062: done 879 games, reward 267.849, eps 0.24, speed 80.04 f/s
76144: done 880 games, reward 267.699, eps 0.24, speed 76.52 f/s
76229: done 881 games, reward 266.954, eps 0.24, speed 73.66 f/s
76316: done 882 games, reward 264.280, eps 0.24, speed 77.28 f/s
76408: done 883 games, reward 266.518, eps 0.24, speed 79.41 f/s
76496: done 884 games, reward 267.306, eps 0.24, speed 86.66 f/s
except
except
except
except
except
except
except
except
76593: done 885 games, reward 267.575, eps 0.23, speed 85.80 f/s
76679: done 886 games, reward 266.616, eps 0.23, speed 78.79 f/s
except
except
76777: done 887 games, reward 265.443, eps 0.23, speed 80.06 f/s
76867: done 888 games, reward 264.837, eps 0.23, speed 84.66 f/s
76948: done 889 games, reward 264.131, eps 0.23, speed 83.75 f/s
77035: done 890 games, reward 267.945, eps 0.23, speed 82.85 f/s
77121: done 891 games, reward 266.984, eps 0.23, speed 85.51 f/s
77217: done 892 games, reward 267.535, eps 0.23, speed 77.97 f/s
77301: done 893 game

86969: done 999 games, reward 274.081, eps 0.13, speed 76.11 f/s
87064: done 1000 games, reward 273.615, eps 0.13, speed 78.68 f/s
87154: done 1001 games, reward 273.561, eps 0.13, speed 75.68 f/s
except
87249: done 1002 games, reward 272.936, eps 0.13, speed 70.98 f/s
87340: done 1003 games, reward 274.506, eps 0.13, speed 74.72 f/s
87431: done 1004 games, reward 276.201, eps 0.13, speed 76.68 f/s
87523: done 1005 games, reward 276.120, eps 0.12, speed 74.08 f/s
87615: done 1006 games, reward 277.639, eps 0.12, speed 68.68 f/s
87702: done 1007 games, reward 277.818, eps 0.12, speed 70.55 f/s
87803: done 1008 games, reward 276.918, eps 0.12, speed 78.89 f/s
87893: done 1009 games, reward 275.316, eps 0.12, speed 79.00 f/s
87979: done 1010 games, reward 275.568, eps 0.12, speed 71.26 f/s
except
except
except
except
except
except
except
except
except
88084: done 1011 games, reward 275.417, eps 0.12, speed 68.50 f/s
88181: done 1012 games, reward 276.894, eps 0.12, speed 74.06 f/s
except


96598: done 1097 games, reward 304.364, eps 0.03, speed 67.83 f/s
96692: done 1098 games, reward 304.374, eps 0.03, speed 64.18 f/s
96789: done 1099 games, reward 305.778, eps 0.03, speed 67.81 f/s
96886: done 1100 games, reward 307.133, eps 0.03, speed 66.44 f/s
96983: done 1101 games, reward 304.508, eps 0.03, speed 67.70 f/s
97080: done 1102 games, reward 306.657, eps 0.03, speed 67.93 f/s
except
except
except
except
except
except
except
except
except
except
except
97188: done 1103 games, reward 307.051, eps 0.03, speed 67.72 f/s
97280: done 1104 games, reward 308.549, eps 0.03, speed 64.74 f/s
97372: done 1105 games, reward 309.285, eps 0.03, speed 66.93 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
97502: done 1106 games, reward 307.501, eps 0.02, speed 66.85 f/s
except
97599: done 1107 games, r

except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
101107: done 1135 games, reward 334.805, eps 0.01, speed 58.06 f/s
101199: done 1136 games, reward 335.956, eps 0.01, speed 59.70 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
excep

105362: done 1169 games, reward 348.939, eps 0.01, speed 52.06 f/s
105458: done 1170 games, reward 348.407, eps 0.01, speed 51.98 f/s
105560: done 1171 games, reward 346.383, eps 0.01, speed 52.05 f/s
105698: done 1172 games, reward 347.782, eps 0.01, speed 50.07 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
105807: done 1173 games, reward 347.994, eps 0.01, speed 52.17 f/s
105967: done 1174 games, reward 349.249, eps 0.01, speed 50.10 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
exc

112145: done 1221 games, reward 345.574, eps 0.01, speed 49.52 f/s
112233: done 1222 games, reward 346.690, eps 0.01, speed 49.41 f/s
except
except
except
except
except
except
except
except
except
112338: done 1223 games, reward 345.168, eps 0.01, speed 49.19 f/s
except
except
except
except
except
except
except
except
except
112439: done 1224 games, reward 343.622, eps 0.01, speed 48.73 f/s
112740: done 1225 games, reward 343.198, eps 0.01, speed 47.46 f/s
112834: done 1226 games, reward 342.570, eps 0.01, speed 50.17 f/s
112926: done 1227 games, reward 339.607, eps 0.01, speed 50.10 f/s
113029: done 1228 games, reward 338.737, eps 0.01, speed 49.03 f/s
except
except
except
except
except
except
except
except
except
except
except
except
113135: done 1229 games, reward 336.722, eps 0.01, speed 49.36 f/s
113228: done 1230 games, reward 335.057, eps 0.01, speed 48.74 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
e

118319: done 1268 games, reward 314.436, eps 0.01, speed 46.24 f/s
118413: done 1269 games, reward 311.957, eps 0.01, speed 46.27 f/s
118511: done 1270 games, reward 313.405, eps 0.01, speed 45.57 f/s
118605: done 1271 games, reward 314.559, eps 0.01, speed 46.33 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
118757: done 1272 games, reward 314.771, eps 0.01, speed 42.70 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
118871: done 1273 games, reward 318.538, eps 0.01, speed 42.53 f/s
118963: done 1274 games, reward 317.385, eps 0.01, speed 47.33 f/s

except
except
124167: done 1309 games, reward 318.011, eps 0.01, speed 45.07 f/s
124369: done 1310 games, reward 316.943, eps 0.01, speed 46.25 f/s
124681: done 1311 games, reward 314.196, eps 0.01, speed 45.72 f/s
125004: done 1312 games, reward 316.184, eps 0.01, speed 43.90 f/s
except
except
except
except
except
except
except
except
except
except
except
125113: done 1313 games, reward 312.172, eps 0.01, speed 43.91 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
125248: done 1314 games, reward 314.917, eps 0.01, speed 44.70 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
125359: done 1315 games, reward 317.427, eps 0.01, speed 43.89 f/s
except
except
except
except
except
except
except

132609: done 1357 games, reward 369.736, eps 0.01, speed 39.12 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
132732: done 1358 games, reward 368.559, eps 0.01, speed 45.12 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
132847: done 1359 games, reward 369.574, eps 0.01, speed 44.92 f/s
133050: done 1360 games, reward 364.702, eps 0.01, speed 44.73 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
133165: done 1361 games, reward 366.367, eps 0.01, speed 43.99 f/s
except
except
except
except
except
except
except
except
except
except
except
except
133272: done 1362 games, reward 366.247, eps 0.01

except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
138833: done 1400 games, reward 360.110, eps 0.01, speed 47.48 f/s
138934: done 1401 games, reward 361.669, eps 0.01, speed 49.62 f/s
except
except
except
except
except
except
except
except
139036: done 1402 games, reward 365.568, eps 0.01, speed 48.41 f/s
139197: done 1403 games, reward 365.412, eps 0.01, speed 48.54 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
exce

147833: done 1488 games, reward 320.067, eps 0.01, speed 47.18 f/s
147927: done 1489 games, reward 318.193, eps 0.01, speed 46.52 f/s
148016: done 1490 games, reward 315.526, eps 0.01, speed 46.62 f/s
148108: done 1491 games, reward 316.396, eps 0.01, speed 44.51 f/s
148194: done 1492 games, reward 320.049, eps 0.01, speed 47.63 f/s
148288: done 1493 games, reward 316.944, eps 0.01, speed 46.80 f/s
148382: done 1494 games, reward 316.515, eps 0.01, speed 47.02 f/s
148470: done 1495 games, reward 315.546, eps 0.01, speed 47.97 f/s
148565: done 1496 games, reward 310.352, eps 0.01, speed 45.22 f/s
148657: done 1497 games, reward 306.149, eps 0.01, speed 47.71 f/s
148750: done 1498 games, reward 305.732, eps 0.01, speed 47.05 f/s
148845: done 1499 games, reward 306.416, eps 0.01, speed 46.95 f/s
148937: done 1500 games, reward 309.130, eps 0.01, speed 47.47 f/s
149025: done 1501 games, reward 308.883, eps 0.01, speed 45.47 f/s
149121: done 1502 games, reward 304.477, eps 0.01, speed 47.04

156253: done 1555 games, reward 302.689, eps 0.01, speed 40.49 f/s
156347: done 1556 games, reward 304.843, eps 0.01, speed 44.23 f/s
156439: done 1557 games, reward 309.171, eps 0.01, speed 44.88 f/s
156531: done 1558 games, reward 311.498, eps 0.01, speed 46.77 f/s
156628: done 1559 games, reward 308.480, eps 0.01, speed 44.51 f/s
156725: done 1560 games, reward 308.802, eps 0.01, speed 44.96 f/s
156818: done 1561 games, reward 308.040, eps 0.01, speed 47.14 f/s
156911: done 1562 games, reward 309.471, eps 0.01, speed 44.39 f/s
157003: done 1563 games, reward 310.253, eps 0.01, speed 45.99 f/s
except
except
except
except
except
except
except
except
except
except
157108: done 1564 games, reward 311.594, eps 0.01, speed 45.27 f/s
157195: done 1565 games, reward 312.823, eps 0.01, speed 46.00 f/s
157288: done 1566 games, reward 317.071, eps 0.01, speed 44.22 f/s
157383: done 1567 games, reward 317.595, eps 0.01, speed 46.36 f/s
157476: done 1568 games, reward 319.356, eps 0.01, speed 46

except
except
except
except
163877: done 1625 games, reward 306.911, eps 0.01, speed 45.33 f/s
163970: done 1626 games, reward 306.030, eps 0.01, speed 45.75 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
164176: done 1627 games, reward 3

except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
169624: done 1671 games, reward 318.052, eps 0.01, speed 43.89 f/s
169720: done 1672 games, reward 319.198, eps 0.01, speed 44.04 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
169846: done 1673 games, reward 319.387, eps 0.01, speed 45.09 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
170046: done 1674 games, reward 315.885, eps 0.01, speed 43.75 f/s
170315: done 1675 games, reward 317.658, eps 0.01, speed 44.12 f/s
except
except
except
except
except
except
except
except
170417: done 1676 games, reward 319.598, eps 0.01, speed 45.12 f/s
except
except
except
except
except
exc

except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
173257: done 1693 games, reward 307.723, eps 0.01, speed 41.83 f/s
except
except
173352: done 1694 games, reward 307.574, eps 0.01, speed 41.16 f/s
173450: done 1695 games, reward 313.214, eps 0.01, speed 43.98 f/s
173541: done 1696 games, reward 311.173, eps 0.01, speed 44.79 f/s
173633: done 1697 games, reward 312.728, eps 0.01, speed 44.55 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except


except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
178650: done 1742 games, reward 331.141, eps 0.01, speed 38.75 f/s
178747: done 1743 games, reward 328.778, eps 0.01, speed 41.97 f/s
178836: done 1744 games, reward 325.800, eps 0.01, speed 42.22 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
179006: done 1745 games, reward 322.644, eps 0.01, speed 41.85 f/s
179100: done 1746 games, 

except
except
except
except
except
except
except
except
187071: done 1813 games, reward 328.706, eps 0.01, speed 42.12 f/s
187164: done 1814 games, reward 326.711, eps 0.01, speed 42.10 f/s
187259: done 1815 games, reward 326.602, eps 0.01, speed 39.42 f/s
187358: done 1816 games, reward 324.890, eps 0.01, speed 40.99 f/s
187446: done 1817 games, reward 322.305, eps 0.01, speed 42.39 f/s
187544: done 1818 games, reward 329.470, eps 0.01, speed 42.09 f/s
187638: done 1819 games, reward 329.166, eps 0.01, speed 42.28 f/s
187738: done 1820 games, reward 328.052, eps 0.01, speed 42.46 f/s
187888: done 1821 games, reward 322.793, eps 0.01, speed 41.74 f/s
188014: done 1822 games, reward 322.717, eps 0.01, speed 42.19 f/s
except
except
except
except
except
188108: done 1823 games, reward 323.245, eps 0.01, speed 42.81 f/s
188197: done 1824 games, reward 324.092, eps 0.01, speed 43.20 f/s
188294: done 1825 games, reward 324.419, eps 0.01, speed 42.28 f/s
188450: done 1826 games, reward 321.81

except
except
except
except
except
196147: done 1886 games, reward 301.684, eps 0.01, speed 41.96 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
196262: done 1887 games, reward 301.241, eps 0.01, speed 42.21 f/s
196514: done 1888 games, reward 303.504, eps 0.01, speed 41.97 f/s
196606: done 1889 games, reward 304.874, eps 0.01, speed 41.68 f/s
196696: done 1890 games, reward 301.676, eps 0.01, speed 43.47 f/s
except
except
except
except
except
except
except
except
except
196797: done 1891 games, reward 305.092, eps 0.01, speed 41.89 f/s
except
except
except
except
except
except
except
except
except
except
except
except
196902: done 1892 games, reward 302.476, eps 0.01, speed 41.99 f/s
197407: done 1893 games, reward 301.809, eps 0.01, speed 41.27 f/s
except
except
except
except
except
except
197505: done 1894 games, reward 299.044, eps 0.01, speed 42.23 f/s
197596: done 1895 games, reward 2

208166: done 1990 games, reward 343.780, eps 0.01, speed 41.38 f/s
except
except
208267: done 1991 games, reward 339.313, eps 0.01, speed 39.56 f/s
208363: done 1992 games, reward 337.622, eps 0.01, speed 41.19 f/s
208461: done 1993 games, reward 339.475, eps 0.01, speed 37.40 f/s
208554: done 1994 games, reward 342.671, eps 0.01, speed 42.66 f/s
208649: done 1995 games, reward 343.602, eps 0.01, speed 42.00 f/s
208743: done 1996 games, reward 344.091, eps 0.01, speed 42.05 f/s
208836: done 1997 games, reward 348.358, eps 0.01, speed 40.59 f/s
208929: done 1998 games, reward 347.808, eps 0.01, speed 41.02 f/s
209014: done 1999 games, reward 347.880, eps 0.01, speed 41.13 f/s
209109: done 2000 games, reward 344.974, eps 0.01, speed 41.90 f/s
209202: done 2001 games, reward 344.612, eps 0.01, speed 40.14 f/s
209295: done 2002 games, reward 345.246, eps 0.01, speed 41.69 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
exc

except
except
except
except
217620: done 2082 games, reward 321.511, eps 0.01, speed 40.66 f/s
217712: done 2083 games, reward 320.640, eps 0.01, speed 40.06 f/s
except
except
except
except
except
except
except
except
except
217811: done 2084 games, reward 320.361, eps 0.01, speed 40.77 f/s
217908: done 2085 games, reward 320.611, eps 0.01, speed 40.74 f/s
218017: done 2086 games, reward 319.011, eps 0.01, speed 40.44 f/s
218134: done 2087 games, reward 317.952, eps 0.01, speed 40.59 f/s
218229: done 2088 games, reward 312.017, eps 0.01, speed 37.94 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
218340: done 2089 games, reward 314.559, eps 0.01, speed 40.81 f/s
except
except
except
except
except
except
except
except
218442: done 2090 games, reward 314.428, eps 0.01, speed 41.30 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
excep

222631: done 2121 games, reward 320.366, eps 0.01, speed 41.01 f/s
222959: done 2122 games, reward 320.661, eps 0.01, speed 40.57 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
223077: done 2123 games, reward 321.435, eps 0.01, speed 37.82 f/s
223338: done 2124 games, reward 323.998, eps 0.01, speed 40.29 f/s
223429: done 2125 games, reward 324.386, eps 0.01, speed 40.39 f/s
223578: done 2126 games, reward 324.001, eps 0.01, speed 40.35 f/s
223670: done 2127 games, reward 324.879, eps 0.01, speed 40.08 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
223790: done 2128 games, reward 323.323, eps 0.01, speed 40.91 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
ex

except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except

233369: done 2198 games, reward 312.786, eps 0.01, speed 40.07 f/s
except
except
except
except
except
except
except
except
except
except
except
233468: done 2199 games, reward 313.693, eps 0.01, speed 39.95 f/s
233591: done 2200 games, reward 311.167, eps 0.01, speed 38.64 f/s
233784: done 2201 games, reward 309.918, eps 0.01, speed 38.40 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
233902: done 2202 games, reward 310.867, eps 0.01, speed 40.10 f/s
234005: done 2203 games, reward 305.365, eps 0.01, speed 39.65 f/s
234098: done 2204 games, reward 307.387, eps 0.01, speed 39.86 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
234205: done 2205 games, reward 303.130, eps 0.01, speed 39.53 f/s
234309: done 2206 games, reward 302.797, eps 0.01, speed 38.67 f/s
234404: done 2207 games, r

240055: done 2244 games, reward 300.223, eps 0.01, speed 40.00 f/s
240199: done 2245 games, reward 299.943, eps 0.01, speed 39.96 f/s
except
except
except
except
except
except
except
240300: done 2246 games, reward 301.600, eps 0.01, speed 40.39 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
240411: done 2247 games, reward 303.963, eps 0.01, speed 40.05 f/s
240831: done 2248 games, reward 303.928, eps 0.01, speed 38.74 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except


except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
249259: done 2308 games, reward 318.632, eps 0.01, speed 39.11 f/s
except
249356: done 2309 games, reward 320.048, eps 0.01, speed 40.11 f/s
249466: done 2310 games, reward 321.692, eps 0.01, speed 40.50 f/s
249585: done 2311 games, reward 321.667, eps 0.01, speed 39.24 f/s
249766: done 2312 games, reward 324.954, eps 0.01, speed 38.96 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
249886: done 2313 games, reward 328.616, eps 0.01, speed 39.68 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
250003: done 2314 games, reward 328.812, eps 0.01, speed 38.70 f/s
except
except
250097: done 2315 ga

261313: done 2395 games, reward 327.237, eps 0.01, speed 39.73 f/s
261405: done 2396 games, reward 328.116, eps 0.01, speed 39.59 f/s
261501: done 2397 games, reward 332.705, eps 0.01, speed 38.23 f/s
261596: done 2398 games, reward 332.382, eps 0.01, speed 38.95 f/s
261841: done 2399 games, reward 332.733, eps 0.01, speed 38.94 f/s
261933: done 2400 games, reward 330.880, eps 0.01, speed 40.11 f/s
262024: done 2401 games, reward 330.057, eps 0.01, speed 36.22 f/s
except
262157: done 2402 games, reward 331.395, eps 0.01, speed 39.33 f/s
except
except
except
except
except
except
except
except
except
262258: done 2403 games, reward 329.966, eps 0.01, speed 38.81 f/s
262389: done 2404 games, reward 333.977, eps 0.01, speed 39.05 f/s
except
except
except
except
except
except
except
except
except
except
262487: done 2405 games, reward 333.875, eps 0.01, speed 37.91 f/s
except
except
except
except
except
except
except
except
except
262581: done 2406 games, reward 333.072, eps 0.01, speed 38.

except
272327: done 2500 games, reward 281.827, eps 0.01, speed 33.52 f/s
except
except
272423: done 2501 games, reward 282.005, eps 0.01, speed 31.08 f/s
272512: done 2502 games, reward 279.925, eps 0.01, speed 37.33 f/s
272677: done 2503 games, reward 281.765, eps 0.01, speed 38.79 f/s
272899: done 2504 games, reward 279.493, eps 0.01, speed 38.69 f/s
273013: done 2505 games, reward 279.021, eps 0.01, speed 38.11 f/s
273470: done 2506 games, reward 282.285, eps 0.01, speed 38.62 f/s
except
except
except
except
except
273568: done 2507 games, reward 280.111, eps 0.01, speed 37.20 f/s
273658: done 2508 games, reward 279.638, eps 0.01, speed 38.76 f/s
273752: done 2509 games, reward 280.879, eps 0.01, speed 38.82 f/s
273904: done 2510 games, reward 280.585, eps 0.01, speed 38.61 f/s
except
except
except
except
except
except
except
except
except
except
274006: done 2511 games, reward 280.346, eps 0.01, speed 39.10 f/s
274282: done 2512 games, reward 282.608, eps 0.01, speed 38.55 f/s
274

285827: done 2584 games, reward 274.427, eps 0.01, speed 30.51 f/s
285993: done 2585 games, reward 273.353, eps 0.01, speed 38.86 f/s
286112: done 2586 games, reward 274.011, eps 0.01, speed 33.03 f/s
except
except
except
except
except
except
except
except
except
except
except
286216: done 2587 games, reward 275.372, eps 0.01, speed 35.21 f/s
except
286310: done 2588 games, reward 279.551, eps 0.01, speed 37.42 f/s
286437: done 2589 games, reward 283.861, eps 0.01, speed 37.20 f/s
286643: done 2590 games, reward 283.898, eps 0.01, speed 37.20 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
286761: done 2591 games, reward 284.026, eps 0.01, speed 28.07 f/s
286857: done 2592 games, reward 284.074, eps 0.01, speed 37.89 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
excep

except
except
except
except
except
except
except
except
except
296232: done 2660 games, reward 277.239, eps 0.01, speed 37.66 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
296342: done 2661 games, reward 278.235, eps 0.01, speed 36.64 f/s
296655: done 2662 games, reward 278.813, eps 0.01, speed 38.59 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
296771: done 2663 games, reward 277.967, eps 0.01, speed 37.04 f/s
except
except
except
except
except
except
296862: done 2664 games, reward 277.303, eps 0.01, speed 38.58 f/s
297017: done 2665 games, reward 277.064, eps 0.01, speed 33.25 f/s
297110: done 2666 games, reward 276.684, eps 0.01, speed 27.55 f/s
297243: done 2667 games, reward 275.604, eps 0.01, speed 37.13 f/s
297335: done 2668 games, reward 274.986, eps 0.01, 

except
306591: done 2749 games, reward 270.467, eps 0.01, speed 38.54 f/s
306683: done 2750 games, reward 270.933, eps 0.01, speed 39.28 f/s
307014: done 2751 games, reward 270.570, eps 0.01, speed 39.96 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
307120: done 2752 games, reward 271.726, eps 0.01, speed 39.87 f/s
307308: done 2753 games, reward 270.344, eps 0.01, speed 39.33 f/s
307396: done 2754 games, reward 270.239, eps 0.01, speed 38.55 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
307532: done 2755 games, reward 269.333, eps 0.01, speed 39.76 f/s
307648: done 2756 games, reward 273.222, eps 0.01, speed 40.03

313786: done 2804 games, reward 296.906, eps 0.01, speed 35.63 f/s
313879: done 2805 games, reward 297.628, eps 0.01, speed 35.80 f/s
313975: done 2806 games, reward 298.622, eps 0.01, speed 35.02 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
314116: done 2807 games, reward 299.073, eps 0.01, speed 37.70 f/s
314213: done 2808 games, reward 298.132, eps 0.01, speed 34.07 f/s
except
except
except
314309: done 2809 games, reward 298.004, eps 0.01, speed 27.73 f/s
314400: done 2810 games, reward 298.280, eps 0.01, speed 32.98 f/s
314493: done 2811 games, reward 301.820, eps 0.01, speed 37.88 f/s
314589: done 2812 games, reward 302.885, eps 0.01, speed 35.92 f/s
314680: done 2813 games, reward 301.753,

except
except
except
321037: done 2862 games, reward 296.855, eps 0.01, speed 30.63 f/s
321379: done 2863 games, reward 295.945, eps 0.01, speed 33.31 f/s
321472: done 2864 games, reward 297.312, eps 0.01, speed 35.89 f/s
except
321563: done 2865 games, reward 297.824, eps 0.01, speed 37.11 f/s
except
except
except
except
except
except
except
except
except
except
321657: done 2866 games, reward 296.548, eps 0.01, speed 36.59 f/s
321755: done 2867 games, reward 298.585, eps 0.01, speed 38.82 f/s
321935: done 2868 games, reward 299.401, eps 0.01, speed 38.36 f/s
322090: done 2869 games, reward 297.706, eps 0.01, speed 37.40 f/s
322230: done 2870 games, reward 298.801, eps 0.01, speed 38.79 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
322344: done 2871 games, reward 295.409, eps 0.01, speed 35.18 f/s
except
except
except
except
except
except
except
except
except
except
except
e

except
except
except
except
except
except
except
328787: done 2918 games, reward 281.769, eps 0.01, speed 38.82 f/s
328896: done 2919 games, reward 282.086, eps 0.01, speed 38.20 f/s
except
except
except
except
except
except
except
except
except
except
except
except
329002: done 2920 games, reward 284.847, eps 0.01, speed 39.87 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
329188: done 2921 games, rewa

except
336146: done 2975 games, reward 287.780, eps 0.01, speed 36.43 f/s
336239: done 2976 games, reward 287.217, eps 0.01, speed 36.77 f/s
336333: done 2977 games, reward 288.829, eps 0.01, speed 36.81 f/s
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
except
e

343557: done 3033 games, reward 280.241, eps 0.01, speed 38.44 f/s
343650: done 3034 games, reward 281.661, eps 0.01, speed 35.56 f/s
343743: done 3035 games, reward 282.024, eps 0.01, speed 38.19 f/s
343840: done 3036 games, reward 283.980, eps 0.01, speed 37.63 f/s
343930: done 3037 games, reward 286.355, eps 0.01, speed 38.75 f/s
344019: done 3038 games, reward 288.305, eps 0.01, speed 38.02 f/s
344112: done 3039 games, reward 288.654, eps 0.01, speed 37.39 f/s
344204: done 3040 games, reward 287.139, eps 0.01, speed 39.05 f/s
344294: done 3041 games, reward 287.314, eps 0.01, speed 37.69 f/s
344388: done 3042 games, reward 286.282, eps 0.01, speed 37.98 f/s
344484: done 3043 games, reward 284.386, eps 0.01, speed 37.43 f/s
344576: done 3044 games, reward 283.586, eps 0.01, speed 36.99 f/s
344669: done 3045 games, reward 284.560, eps 0.01, speed 38.11 f/s
344761: done 3046 games, reward 282.481, eps 0.01, speed 36.67 f/s
344857: done 3047 games, reward 282.248, eps 0.01, speed 38.14

351236: done 3111 games, reward 283.917, eps 0.01, speed 38.18 f/s
351327: done 3112 games, reward 283.022, eps 0.01, speed 38.34 f/s
351421: done 3113 games, reward 283.109, eps 0.01, speed 39.17 f/s
351516: done 3114 games, reward 283.013, eps 0.01, speed 38.25 f/s
351606: done 3115 games, reward 281.890, eps 0.01, speed 38.20 f/s
351694: done 3116 games, reward 284.823, eps 0.01, speed 38.72 f/s
351789: done 3117 games, reward 282.913, eps 0.01, speed 38.13 f/s
351881: done 3118 games, reward 284.272, eps 0.01, speed 39.48 f/s
351977: done 3119 games, reward 285.652, eps 0.01, speed 39.17 f/s
352071: done 3120 games, reward 285.820, eps 0.01, speed 37.95 f/s
352165: done 3121 games, reward 285.273, eps 0.01, speed 39.43 f/s
352260: done 3122 games, reward 286.374, eps 0.01, speed 39.32 f/s
352421: done 3123 games, reward 284.783, eps 0.01, speed 38.88 f/s
352520: done 3124 games, reward 285.313, eps 0.01, speed 37.91 f/s
352617: done 3125 games, reward 288.333, eps 0.01, speed 39.31

In [155]:
def calc_loss(batch, net, tgt_net, device="cpu"):
    states, actions, rewards, dones, next_states = batch

    states_v = torch.tensor(np.array(
        states, copy=False)).to(device)
    next_states_v = torch.tensor(np.array(
        next_states, copy=False)).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.BoolTensor(dones).to(device)


    state_action_values = net(states_v).gather(
        1, actions_v).sum(1)
    
    with torch.no_grad():
        tgt_state_values = tgt_net(next_states_v)#.max(1)[0]
        for i, j in enumerate(range(0, MACHINES* (TASKS+1) , TASKS+1 )):    
            if i==0:
                next_state_values = tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
            else:
                next_state_values += tgt_state_values[:, j: j+(TASKS+1)].max(1)[0]
        #print(f"next_state_values= {next_state_values}")
        next_state_values[done_mask] = 0.0
        next_state_values = next_state_values.detach()

    expected_state_action_values = next_state_values * GAMMA + \
                                   rewards_v
    return nn.MSELoss()(state_action_values,
                        expected_state_action_values)

In [105]:
for x in nSamples:
    timeCounter = 0
    fileData = []
    fileData.append(x)
    for _ in range(x):
        p = np.random.randint(1, 100)
        r = np.random.randint(timeCounter - 5 if timeCounter - 5 > 0 else 0 ,timeCounter + 2)
        timeCounter += int(p/3)
        
        fileData.append([p,r])
        
    with open(f'PTSZ-instancje/136723/in136723_{x}.txt', "w") as f:
        f.write(f"{str(fileData[0])}\n")
        
        m0 = np.random.randint(50,100)
        m1 = np.random.randint(50,100)
        m2 = np.random.randint(50,60)
        m3 = np.random.randint(50,100)
        m4 = np.random.randint(99,100)
        machines = np.array([m0,m1,m2,m3,m4])
        machines =  machines /max(machines)
        machines = np.random.choice( machines, 5, replace=False )
        f.write(f"{' '.join([str(x) for x in machines])}\n")
        
        instances = fileData[1:]
        random.shuffle(instances)
        for line in instances:            
            f.write(f"{' '.join([str(x) for x in line])}\n")

# Verify

In [5]:
x = 50

In [101]:
data

{1: {'p': 89, 'r': 588, 'c': 1123.578947368421},
 2: {'p': 78, 'r': 131, 'c': 519.4736842105262},
 3: {'p': 36, 'r': 405, 'c': 1369.5263157894738},
 4: {'p': 52, 'r': 363, 'c': 399.0},
 5: {'p': 64, 'r': 441, 'c': 419.0},
 6: {'p': 34, 'r': 8, 'c': 788.0},
 7: {'p': 60, 'r': 685, 'c': 87.35294117647061},
 8: {'p': 79, 'r': 299, 'c': 175.01470588235293},
 9: {'p': 38, 'r': 263, 'c': 772.0},
 10: {'p': 23, 'r': 210, 'c': 1155.0},
 11: {'p': 21, 'r': 638, 'c': 240.57352941176475},
 12: {'p': 24, 'r': 421, 'c': 563.9411764705883},
 13: {'p': 39, 'r': 245, 'c': 857.7368421052631},
 14: {'p': 4, 'r': 495, 'c': 5.012658227848078},
 15: {'p': 23, 'r': 3, 'c': 1339.0},
 16: {'p': 89, 'r': 496, 'c': 847.578947368421},
 17: {'p': 49, 'r': 374, 'c': 465.1052631578947},
 18: {'p': 5, 'r': 351, 'c': 933.0},
 19: {'p': 12, 'r': 550, 'c': 827.0},
 20: {'p': 11, 'r': 342, 'c': 16.014705882352928},
 21: {'p': 61, 'r': 667, 'c': 406.8088235294117},
 22: {'p': 31, 'r': 50, 'c': 364.0},
 23: {'p': 40, 'r':

In [68]:
taskQueue

{'m0': [28,
  22,
  42,
  43,
  4,
  6,
  5,
  35,
  24,
  23,
  47,
  37,
  50,
  44,
  18,
  27,
  15,
  10,
  19],
 'm1': [20, 8, 7, 30, 11, 29, 32, 12, 21, 26],
 'm2': [34],
 'm3': [45, 25, 36, 2, 41, 17, 33, 9, 13, 31, 16, 48, 40, 1, 3, 46, 49],
 'm4': [14, 39, 38]}

In [69]:
machines

{'m0': {'speed': '0.9583333333333334', 'readyTime': 0},
 'm1': {'speed': '0.5729166666666666', 'readyTime': 0},
 'm2': {'speed': '0.010416666666666666', 'readyTime': 0},
 'm3': {'speed': '1.0', 'readyTime': 0},
 'm4': {'speed': '0.20833333333333334', 'readyTime': 0}}

In [546]:
def calculateCost(data, taskQueue, machines):
    calculatedWeight = 0
    for key, tasks in taskQueue.items():
        print(f"m: {key}")
        timeCounter = 0
        for task in tasks:
            print(f"task: {task}, {data[task]}")
            while machines[key]['readyTime'] > timeCounter or data[task]['r'] > timeCounter:
                timeCounter += 1 
            print(f"timeCounter: {timeCounter}")
            print(f"machines[key]['readyTime']: {machines[key]['readyTime']}")
            
            machines[key]['readyTime'] = timeCounter + (data[task]['p'] / float(machines[key]['speed']) )
            
            print(f"machines[key]['readyTime']: {machines[key]['readyTime']}")
            
            data[task]['c'] = timeCounter + (data[task]['p'] / float(machines[key]['speed']) ) - data[task]['r']
            
            
            print(f"data[task]['c']: {data[task]['c']}")
            print(f"realy p: {(data[task]['p'] / float(machines[key]['speed']) )}")
            print()
        #    if timeCounter < data[task]['r']:
        #        timeCounter = data[task]['r']
            # add p time to current time
        #    timeCounter += data[task]['p']
      #      c = timeCounter
      #      data[task]['c'] = c
    
    for key, value in data.items():
        print(f"{key} -> {value}")
    # check correctness
    if not data[task]["r"] <= data[task]["c"] - data[task]["p"]: 
        print("Error")

    # verify cost
    if data[task]["c"] > data[task]["d"]:
        calculatedWeight += data[task]["w"]
    #print(calculatedWeight)
    return calculatedWeight

In [547]:
motherFolder = "out" # "PTSZ-instancje"
data = {}
indicies = ['136723'] #["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
for index  in indicies:
    print(index)
    for samples in nSamples:
        taskQueue = {}
        with open(f'{motherFolder}/{index}/out{index}_{samples}.txt', "r") as result:
            res = result.read().split('\n')
            weight = int(res[0])
            for i, name in enumerate(machineNames):
                gen = (t for t in res[i+1].split(" ") if t.isdigit())
                taskQueue[name] = [int(t) for t in gen]
    
        with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
            d = instance.read().split('\n')
            n = int(d[0])
            machines = {}
            for name, speed in zip(machineNames, d[1].split(' ')):
                machines[name] = {'speed':speed, 'readyTime':0}
            for i,line in enumerate(d[2:]):
                if line == '':
                    continue
                p,r = line.split(' ')
                p,r = int(p), int(r)
                data[i+1] = {"p": p, "r": r, "c": 0 }
        
        calculatedWeight = calculateCost(data, taskQueue, machines)
        print(calculatedWeight)
        #if calculatedWeight != weight:
        #    print(f"wrong weight. Calculated: {calculatedWeight}, read: {weight}")
        #else:
        #    print(f"Weight: {calculatedWeight} confirmed")
    break
    print()
    print()
        
        

136723
m: m0
task: 21, {'p': 31, 'r': 83, 'c': 0}
timeCounter: 83
machines[key]['readyTime']: 0
machines[key]['readyTime']: 143.1764705882353
data[task]['c']: 60.176470588235304
realy p: 60.1764705882353

task: 2, {'p': 67, 'r': 137, 'c': 0}
timeCounter: 144
machines[key]['readyTime']: 143.1764705882353
machines[key]['readyTime']: 274.05882352941177
data[task]['c']: 137.05882352941177
realy p: 130.05882352941177

task: 29, {'p': 65, 'r': 274, 'c': 0}
timeCounter: 275
machines[key]['readyTime']: 274.05882352941177
machines[key]['readyTime']: 401.1764705882353
data[task]['c']: 127.1764705882353
realy p: 126.1764705882353

task: 38, {'p': 37, 'r': 402, 'c': 0}
timeCounter: 402
machines[key]['readyTime']: 401.1764705882353
machines[key]['readyTime']: 473.8235294117647
data[task]['c']: 71.8235294117647
realy p: 71.82352941176471

task: 24, {'p': 33, 'r': 480, 'c': 0}
timeCounter: 480
machines[key]['readyTime']: 473.8235294117647
machines[key]['readyTime']: 544.0588235294117
data[task]['c']:

KeyError: 'd'

In [66]:
machines

{'m0': {'speed': '0.9583333333333334', 'readyTime': 0},
 'm1': {'speed': '0.5729166666666666', 'readyTime': 0},
 'm2': {'speed': '0.010416666666666666', 'readyTime': 0},
 'm3': {'speed': '1.0', 'readyTime': 0},
 'm4': {'speed': '0.20833333333333334', 'readyTime': 0}}

In [11]:
# p - persistance
# r - ready time
# d - oczekiwany termin zakończenia
# w - weight
# Cj oznacza moment zakończenia wykonywania zadania Jj w uszeregowaniu

# p r d w
# r  >= c - p

# TaskScheduler

In [106]:
a,b,c,d = (1,1,1,1)
totalCost = 0
data = {}

indicies = ['136723']#["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
for index  in indicies:
    for samples in nSamples:
        with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
            d = instance.read().split('\n')
            n = int(d[0])
            machines = {}
            for name, speed in zip(machineNames, d[1].split(' ')):
                machines[name] = {'speed':speed, 'readyTime':0, 'gain':0}
            for i,line in enumerate(d[2:]):
                if line == '':
                    continue
                p,r = line.split(' ')
                p,r = int(p), int(r)
                data[i+1] = {"p": p, "r": r, "c": 0, "penalty": 0 }
            dataCopy = copy.deepcopy(data)
        
        calculatedWeight = 0
        timeCounter = 0
        outSchedule = {}
        for name in machineNames:
            outSchedule[name] = []
        
        start = time.time()
        while len(data) > 0:
            readyToGo = {}
            for key, task in data.items():
                if task['r'] <= timeCounter: # add penalty ( (timeCounter+p-d)*w )
                    endTime = timeCounter+task['p']
                    penaltyEndTask = ( endTime if endTime > 0 else 0 ) * 1
                    penaltyPersistance = task['p']
                    task['penalty'] = a*penaltyEndTask + c*penaltyPersistance
                    readyToGo[key] = task
                
            if len(readyToGo) == 0: # currently there is no task to go
                #print("upsi readyToGo")
                timeCounter += 1
                continue
            #print(f"readyToGo: {readyToGo}")

            # select task
            readyToGoSorted = sorted(readyToGo.items(), key=lambda y: (y[1]['penalty']), reverse=True)
            selectedTask = readyToGoSorted[0]
            
            # select machine
            readyMachines = [d for d in zip(machines.keys(), machines.values() ) if d[1]['readyTime'] <= timeCounter]
            if len(readyMachines) == 0: # currently there is free machine
                #print("upsi readyMachines")
                timeCounter += 1
                continue
            
            
            for name, machine in machines.values():
                howLongItWillBeDisabled = 0 ##########################################
                speed = machines[name]['speed']
                
                machines[name]['gain'] = speed
            selectedMachine = max(readyMachines, key=lambda y: (y[1]['speed']))
            
            print(f"timeCounter: {timeCounter}")
            print(f"selectedTask: {selectedTask}")
            
            # remove task from list active tasks, add time, append outschedule
            data.pop(selectedTask[0])
            
            print(f"selectedMachine: {selectedMachine}")
            #timeCounter += 1
            machines[selectedMachine[0]]['readyTime'] = timeCounter + (selectedTask[1]['p'] / float(selectedMachine[1]['speed']) )
            print(machines)
            
            print()
            
            outSchedule[selectedMachine[0]].append(selectedTask[0])    
        
        end = time.time()
        #print(end-start)
        
        #cost = calculateCost(dataCopy, outSchedule)
        cost = 5
        #print(cost)
        totalCost += cost
        
        print(outSchedule)
        
        with open(f'out/{index}/out{index}_{samples}.txt', "w") as f:
            f.write(f"{cost}\n")         
                #f.write(f"{' '.join([str(x) for x in line])}\n")
            for name in machineNames:
                f.write(f"{' '.join([str(x) for x in outSchedule[name]])}\n")
        
        break
        
print(totalCost)

timeCounter: 0
selectedTask: (28, {'p': 89, 'r': 0, 'c': 0, 'penalty': 178})
selectedMachine: ('m4', {'speed': '1.0', 'readyTime': 0})
{'m0': {'speed': '0.5151515151515151', 'readyTime': 0}, 'm1': {'speed': '0.5252525252525253', 'readyTime': 0}, 'm2': {'speed': '0.7777777777777778', 'readyTime': 0}, 'm3': {'speed': '0.797979797979798', 'readyTime': 0}, 'm4': {'speed': '1.0', 'readyTime': 89.0}}

timeCounter: 29
selectedTask: (13, {'p': 71, 'r': 29, 'c': 0, 'penalty': 171})
selectedMachine: ('m3', {'speed': '0.797979797979798', 'readyTime': 0})
{'m0': {'speed': '0.5151515151515151', 'readyTime': 0}, 'm1': {'speed': '0.5252525252525253', 'readyTime': 0}, 'm2': {'speed': '0.7777777777777778', 'readyTime': 0}, 'm3': {'speed': '0.797979797979798', 'readyTime': 117.9746835443038}, 'm4': {'speed': '1.0', 'readyTime': 89.0}}

timeCounter: 49
selectedTask: (31, {'p': 50, 'r': 49, 'c': 0, 'penalty': 149})
selectedMachine: ('m2', {'speed': '0.7777777777777778', 'readyTime': 0})
{'m0': {'speed': '

In [33]:
x = {'m0':{'speed':2, 'readyTime':0},'m1':{'speed':3, 'readyTime':0},'m2':{'speed':1, 'readyTime':5}, }

In [18]:
zip(machines.keys(), machines.values() )

<zip at 0x7f9a01101588>

In [37]:
[d for d in x.values() if d['readyTime'] <= 0]

[{'speed': 2, 'readyTime': 0}, {'speed': 3, 'readyTime': 0}]

In [42]:
readyMachines = [d for d in x.values() if d['readyTime'] <= 0]
readyMachines = max(readyMachines, key=lambda y: (y['speed']))
readyMachines

{'speed': 3, 'readyTime': 0}

In [9]:
from scipy.optimize import minimize

In [10]:
x0 = (1, 1, 1, 1)

In [17]:
def costFunction(x):
    a,b,c,d = x
    totalCost = 0
    data = {}
    indicies = ["136805", "136792", "136683", "132231", "136730", "136682", "136764", "136782", "136723", "136778", "136309", "136718", "136315", "136759"]
    for index  in indicies:
        for samples in nSamples:
            with open(f'PTSZ-instancje/{index}/in{index}_{samples}.txt', "r") as instance:
                d = instance.read().split('\n')
                n = int(d[0])                
                for i,line in enumerate(d[1:]):
                    if line == '':
                        continue
                    p,r,d,w = line.split(' ')
                    p,r,d,w = int(p), int(r), int(d), int(w)
                    data[i+1] = {"p": p, "r": r, "d": d, "w": w, "c": 0, "penalty": 0 }
                dataCopy = copy.deepcopy(data)


            calculatedWeight = 0
            timeCounter = 0
            outSchedule = []

            start = time.time()
            while len(data) > 0:
                readyToGo = {}
                for key, task in data.items():
                    if task['r'] <= timeCounter: # add penalty ( (timeCounter+p-d)*w )
                        endTime = timeCounter+task['p']-task['d']
                        penaltyEndTask = ( endTime if endTime > 0 else 0 ) * task['w']
                        penaltyWeight = task['w']
                        penaltyPersistance = task['p']
                        penaltyDue = task['d'] - timeCounter
                        task['penalty'] = a*penaltyEndTask + b*penaltyWeight + c*penaltyPersistance - d*penaltyDue
                        readyToGo[key] = task

                if len(readyToGo) == 0: # currently there is no task to go
                    #print("upsi")
                    timeCounter += 1
                    continue
                #print(f"readyToGo: {readyToGo}")

                readyToGoSorted = sorted(readyToGo.items(), key=lambda y: (y[1]['penalty'], y[1]['w']), reverse=True)
                selectedTask = readyToGoSorted.pop(0)
                #print(timeCounter)
                #print(f"selectedTask: {selectedTask}")

                data.pop(selectedTask[0])
                timeCounter += selectedTask[1]['p']
                outSchedule.append(selectedTask[0])    

            end = time.time()
          #  print(int((end-start)*1000) if int((end-start)*1000) != 0 else 1)

            cost = calculateCost(dataCopy, outSchedule)
            #print(cost)
            totalCost += cost
    print(f"[{totalCost},{a},{b},{c},{d}],")
    return totalCost

In [18]:
options = {"maxiter":40, "disp":True}

In [23]:
res = minimize(costFunction, x0, method='TNC', options=options, tol=1e-2)

[499007,1.0,1.0,1.0,1183],
[499007,1.0,1.0,1.0,1183],
[499012,1.00000001,1.0,1.0,1183],
[499007,1.0,1.00000001,1.0,1183],
[499030,1.0,1.0,1.00000001,1183],
[499007,1.0,1.0,1.0,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,1.0000000036691032,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.00000001,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999980877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,1.0000000036691032,1.0,0.999999970877875,1183],
[499007,0.9999999936691033,1.00000001,0.999999970877875,1183],
[499007,0.9999999936691033,1.0,0.999999980877875,1183],
[499007,0.9999999936691033,1.0,0.999999970877875,1183],
[499007,0.9999999968345517,1.0,0.9999999854389375,1183],
[499007,0.9999999968345517,1.0,0.9999999854389375,1183],
[499007,1.0000000

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = [1,2,3]

In [None]:
plt.plot(x)
plt.title("Minimalizacja funkcji celu")
plt.xaxis("iteracja")
plt.yaxis("Koszt")

In [94]:
costFunction(x)

1
7
13
25
17
19
26
33
38
61
1
4
10
17
29
42
64
85
99
140
1
4
11
19
30
45
63
152
111
133
1
3
15
16
25
32
48
61
82
88
1
1
6
13
30
36
48
64
94
108
1
3
6
11
19
27
38
52
62
79
1
3
8
22
29
38
56
71
105
124
1
4
10
17
26
39
53
69
105
107
1
1
1
2
2
4
7
6
9
10
1
3
6
10
16
26
33
54
57
68
1
3
7
15
22
45
45
56
75
108
1
1
1
1
2
4
5
7
8
11
1
4
11
21
32
47
74
87
110
146
1
3
5
8
14
28
34
39
54
84


385570

In [85]:
x = (-140036.8041378972, 2.5486165590751364, -0.1998934445512417, 1183)