In [1]:


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym
import collections
import random
from simulator_DFJSP import *

learning_rate = 0.0005 
gamma = 0.99
buffer_limit = 50000
batch_size = 32

class ReplayBuffer():        #buffer class
    def __init__(self):
        self.buffer = collections.deque(maxlen=buffer_limit);
    def put(self, transition):
        self.buffer.append(transition)
    def sample(self, n):
        mini_batch = random.sample(self.buffer, n)
        s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [],[],[],[],[]
        
        for transition in mini_batch:
            s, a, r, s_prime, done_mask = transition
            s_lst.append(s)
            a_lst.append([a])
            r_lst.append([r])
            s_prime_lst.append(s_prime)
            done_mask_lst.append([done_mask])
            
        return torch.tensor(s_lst, dtype=torch. float),torch.tensor(a_lst), torch.tensor(r_lst), torch.tensor(s_prime_lst, dtype=torch. float), torch.tensor(done_mask_lst)
    
    def size(self):
        return len(self.buffer)

class Qnet(nn.Module):        #Qnet
    def __init__(self):
        super(Qnet, self).__init__()
        self.fc1 = nn.Linear(12,64)
        self.fc2 = nn.Linear(64,32)
        self.fc3 = nn.Linear(32,11)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def sample_action(self, obs, epsilon):
        out = self.forward(obs)
        coin = random.random()
        if coin < epsilon:
            return random.randint(0, 10)
        else:
            return out.argmax().item()
        
    def select_action(self, obs, epsilon):
        out = self.forward(obs)
        return out.argmax().item(),out
        
def train(q, q_target, memory, optimizer):
    for i in range(10):
        s,a,r,s_prime,done_mask = memory.sample(batch_size)
        #q.number_of_time_list[a] += 1    
        q_out = q(s)
        q_a = q_out.gather(1,a)
        max_q_prime = q_target(s_prime).max (1)[0].unsqueeze(1)
        #print(max_q_prime.shape)
        target = r + gamma * max_q_prime * done_mask
        loss = F.smooth_l1_loss(q_a, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
def main():
    env = FJSP_simulator('C:/Users/user/main_pro/duedate_DQN/data/FJSP_Sim_10_zero.csv','C:/Users/user/main_pro/duedate_DQN/data/FJSP_Set_10.csv',
                          "C:/Users/user/main_pro/duedate_DQN/data/FJSP_Q_time_10_0.4.csv","C:/Users/user/main_pro/duedate_DQN/data/FJSP_rd_time_10_10,60.csv",i)
    q = Qnet()
    q_target = Qnet()
    q_target.load_state_dict(q.state_dict())
    memory = ReplayBuffer()
    print_interval = 1
    q_load = 10
    score = 0.0
    optimizer = optim.Adam(q.parameters(), lr=learning_rate)
    r_list = []
    makespan_list = []
    q_over_list = []
    q_over_op = []
    
    for n_epi in range(2000):
        #여기는 sample_action 구간
        epsilon = max(0.01 , 0.08 - 0.02*(n_epi/200))
        s = env.reset()
        done = False
        score = 0.0
        while not done:
            a = q.sample_action(torch.from_numpy(s). float(), epsilon)
            s_prime, r, done = env.step(a)
            done_mask =0.0 if done else 1.0
            if done == False:
                memory.put((s,a,r,s_prime,done_mask))
                s = s_prime
                score += r
            if done:
                break
            
        #학습구간    
        if memory.size() > 1000:
            train(q, q_target, memory, optimizer)
        
        #결과 및 파라미터 저장    
        if n_epi % print_interval==0 and n_epi!=0:
            #q_target.load_state_dict(q.state_dict())
            params = q.state_dict()
            Flow_time, machine_util, util, makespan, Tardiness_time, Lateness_time, T_max,q_time_true,q_time_false,q_job_t, q_job_f, q_over_time = env.performance_measure()
            r_list.append(score/print_interval)
            makespan_list.append(makespan)
            q_over_list.append(q_over_time)
            q_over_op.append(q_time_true)
            print(q_time_true)
            print("--------------------------------------------------")
            print("flow time: {}, util : {:.3f}, makespan : {}".format(Flow_time, util, makespan))
            print("Tardiness: {}, Lateness : {}, T_max : {}".format(Tardiness_time, Lateness_time, T_max))
            print("q_true_op: {}, q_false_op : {}, q_true_job : {}, , q_false_job : {} , q_over_time : {}".format(q_time_true, q_time_false, q_job_t, q_job_f, q_over_time))
            print("n_episode: {}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%".format(n_epi, score/print_interval,memory.size(),epsilon*100))
            #score=0.0
        
        #여기는 select_action 구간
        s = env.reset()
        done = False
        score = 0.0
        params = q.state_dict()
        torch.save(params, str(n_epi) + "nomorspt2.pt" )
        while not done:
            a, a_list = q.select_action(torch.from_numpy(s). float(), epsilon)
            #print(a_list)
            #print(a)
            s_prime, r, done = env.step(a)
            #print(r)
            s = s_prime
            score += r
            if done:
                break
        Flow_time, machine_util, util, makespan, Tardiness_time, Lateness_time, T_max,q_time_true,q_time_false,q_job_t, q_job_f, q_over_time = env.performance_measure()
        r_list.append(score/print_interval)
        makespan_list.append(makespan)
        q_over_list.append(q_over_time)
        q_over_op.append(q_time_true)
        print("--------------------------------------------------")
        print("flow time: {}, util : {:.3f}, makespan : {}".format(Flow_time, util, makespan))
        print("Tardiness: {}, Lateness : {}, T_max : {}".format(Tardiness_time, Lateness_time, T_max))
        print("q_true_op: {}, q_false_op : {}, q_true_job : {}, , q_false_job : {} , q_over_time : {}".format(q_time_true, q_time_false, q_job_t, q_job_f, q_over_time))
        print("n_episode: {}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%".format(n_epi, score/print_interval,memory.size(),epsilon*100))
        
        
        
        if n_epi % q_load ==0 and n_epi!=0:
            q_target.load_state_dict(q.state_dict())
    
    
    s = env.reset()
    done = False
    score = 0.0
    
    while not done:
        a, a_list = q.select_action(torch.from_numpy(s). float(), epsilon)
        #print(a_list)
        #print(a)
        s_prime, r, done = env.step(a)
        #print(r)
        s = s_prime
        score += r
        if done:
            break
    Flow_time, machine_util, util, makespan, Tardiness_time, Lateness_time, T_max,q_time_true,q_time_false,q_job_t, q_job_f,q_over_time = env.performance_measure()
    env.gannt_chart()
    return Flow_time, machine_util, util, makespan, score,r_list, makespan_list, q_over_list,q_over_op
for i in range(1):
    Flow_time, machine_util, util, makespan, score,r_list, makespan_list, q_over_list,q_over_op = main()
    print("FlowTime:" , Flow_time)
    print("machine_util:" , machine_util)
    print("util:" , util)
    print("makespan:" , makespan)
    print("Score" , score)
    
"""    
params = torch.load("nomorspt.pt")
q = Qnet()
q.load_state_dict(params)
q.eval()
env = FJSP_simulator('C:/Users/parkh/git_tlsgudcks/simulator/data/FJSP_SIM7_all.csv','C:/Users/parkh/FJSP_SETUP_SIM.csv',"C:/Users/parkh/git_tlsgudcks/simulator/data/FJSP_Fab.csv",1) 
s = env.reset()
done = False
score = 0.0
epsilon = max(0.01 , 0.08 - 0.02*(20/200))
while not done:
    a, a_list = q.select_action(torch.from_numpy(s). float(), epsilon)
    #print(a_list)
    #print(a)
    s_prime, r, done = env.step(a)
    #print(r)
    s = s_prime
    score += r
    if done:
        break
Flow_time, machine_util, util, makespan = env.performance_measure()
print("FlowTime:" , Flow_time)
print("machine_util:" , machine_util)
print("util:" , util)
print("makespan:" , makespan)
print("Score" , score)
""" 

--------------------------------------------------
flow time: 10705, util : 0.978, makespan : 545
Tardiness: 4352, Lateness : 3069, T_max : 383
q_true_op: 223, q_false_op : 47, q_true_job : 21, , q_false_job : 29 , q_over_time : 1493
n_episode: 0, score : -4183.0, n_buffer : 321, eps : 8.0%
212
--------------------------------------------------
flow time: 10970, util : 0.986, makespan : 546
Tardiness: 4427, Lateness : 3334, T_max : 411
q_true_op: 212, q_false_op : 58, q_true_job : 22, , q_false_job : 28 , q_over_time : 2046
n_episode: 1, score : -4125.0, n_buffer : 641, eps : 8.0%
--------------------------------------------------
flow time: 10705, util : 0.978, makespan : 545
Tardiness: 4352, Lateness : 3069, T_max : 383
q_true_op: 223, q_false_op : 47, q_true_job : 21, , q_false_job : 29 , q_over_time : 1493
n_episode: 1, score : -4183.0, n_buffer : 641, eps : 8.0%
201
--------------------------------------------------
flow time: 11114, util : 0.974, makespan : 559
Tardiness: 4499, L


Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  C:\b\abs_bao0hdcrdh\croot\pytorch_1675190257512\work\torch\csrc\utils\tensor_new.cpp:204.)



204
--------------------------------------------------
flow time: 10760, util : 0.976, makespan : 563
Tardiness: 4369, Lateness : 3124, T_max : 376
q_true_op: 204, q_false_op : 66, q_true_job : 17, , q_false_job : 33 , q_over_time : 2299
n_episode: 3, score : -4598.0, n_buffer : 1286, eps : 8.0%
--------------------------------------------------
flow time: 10186, util : 0.974, makespan : 568
Tardiness: 2627, Lateness : 2550, T_max : 128
q_true_op: 210, q_false_op : 60, q_true_job : 10, , q_false_job : 40 , q_over_time : 1719
n_episode: 3, score : -4444.0, n_buffer : 1286, eps : 8.0%
209
--------------------------------------------------
flow time: 10708, util : 0.987, makespan : 559
Tardiness: 3126, Lateness : 3072, T_max : 137
q_true_op: 209, q_false_op : 61, q_true_job : 11, , q_false_job : 39 , q_over_time : 2021
n_episode: 4, score : -4441.0, n_buffer : 1606, eps : 8.0%
--------------------------------------------------
flow time: 10885, util : 0.977, makespan : 581
Tardiness: 3280

KeyboardInterrupt: 