In [1]:
import gym
from or_gym.utils import assign_env_config
import json

In [2]:
connections = {
    'source_blend':{
        's1': ['j1', 'j2', 'j3', 'j4'],
        's2': ['j1', 'j2', 'j3', 'j4']
    },
    
    # 'source_demand':{
    #     's1': [],
    #     's2': []
    # },
    
    'blend_blend':{
        'j1': ['j5', 'j6', 'j7', 'j8'],
        'j2': ['j5', 'j6', 'j7', 'j8'],
        'j3': ['j5', 'j6', 'j7', 'j8'],
        'j4': ['j5', 'j6', 'j7', 'j8'],
        'j5': [],
        'j6': [],
        'j7': [],
        'j8': []
    },
    
    'blend_demand':{
        'j1': [],
        'j2': [],
        'j3': [],
        'j4': [],
        'j5': ['p1', 'p2'],
        'j6': ['p1', 'p2'],
        'j7': ['p1', 'p2'],
        'j8': ['p1', 'p2']
    }
}

In [12]:
with open("./connections_sample.json" ,"r") as f:
    conn_S = f.readline()
    
connections = json.loads(conn_S)

In [15]:
connections

{'source_blend': {'s1': ['j1', 'j2', 'j3', 'j4'],
  's2': ['j1', 'j2', 'j3', 'j4']},
 'blend_blend': {'j1': ['j5', 'j6', 'j7', 'j8'],
  'j2': ['j5', 'j6', 'j7', 'j8'],
  'j3': ['j5', 'j6', 'j7', 'j8'],
  'j4': ['j5', 'j6', 'j7', 'j8'],
  'j5': [],
  'j6': [],
  'j7': [],
  'j8': []},
 'blend_demand': {'j1': [],
  'j2': [],
  'j3': [],
  'j4': [],
  'j5': ['p1', 'p2'],
  'j6': ['p1', 'p2'],
  'j7': ['p1', 'p2'],
  'j8': ['p1', 'p2']}}

In [1]:
# For reference during implementation

action_sample = {
    'source_blend':{
        's1': {'j1':0, 'j2':0, 'j3':0, 'j4':0}, # From s1 to b1, from s1 to b2 etc...
        's2': {'j1':0, 'j2':0, 'j3':0, 'j4':0},
    },
    
    # 'source_demand':{
    #     's1': {},
    #     's2': {}
    # },
    
    'blend_blend':{
        'j1': {'j5':0, 'j6':0, 'j7':0, 'j8':0},
        'j2': {'j5':0, 'j6':0, 'j7':0, 'j8':0},
        'j3': {'j5':0, 'j6':0, 'j7':0, 'j8':0},
        'j4': {'j5':0, 'j6':0, 'j7':0, 'j8':0},
        'j5': {},
        'j6': {},
        'j7': {},
        'j8': {}
    },
    
    'blend_demand':{
        'j1': {},
        'j2': {},
        'j3': {},
        'j4': {},
        'j5': {'p1':0, 'p2':0},
        'j6': {'p1':0, 'p2':0},
        'j7': {'p1':0, 'p2':0},
        'j8': {'p1':0, 'p2':0}
    },
    
    "tau": {"s1": 0, "s2": 0},
    
    "delta": {"p1": 0, "p2": 0}
}

In [None]:
############ /!\/!\/!\/!\ ############

In [None]:
class BlendEnv(gym.Env):
    def __init__(self, *args, **kwargs):
        super().__init__()
        
        self.M = 100
        self.T = 7
        self.alpha = 0.1
        self.beta = 0.02
        self.sources = ["s1", "s2"]
        self.demands = ["p1", "p2"]
        self.blenders = ["j1", "j2", "j3", "j4", "j5", "j6", "j7", "j8"]
        self.properties = ["q1"]
        
        self.tau   = {'s1': [10, 10, 10, 0, 0, 0, 0], 's2': [30, 30, 30, 0, 0, 0, 0]}
        self.delta = {'p1': [0, 0, 0, 15, 15, 15, 15], 'p2': [0, 0, 0, 15, 15, 15, 15]}
        
        self.sigma = {"s1":{"q1": 0.06}, "s2":{"q1": 0.26}}
        self.sigma_ub = {"p1":{"q1": 0.16}, "p2":{"q1": 1}}
        self.sigma_lb = {"p1":{"q1": 0}, "p2":{"q1": 0}}
        
        self.s_inv_lb = {'s1': 0, 's2': 0}
        self.s_inv_ub = {'s1': 0, 's2': 0}
        self.d_quals_lb = {'p1': 0, 'p2': 0}
        self.d_quals_ub = {'p1': 0.16, 'p2': 0.1}
        self.d_inv_lb = {'p1': 0, 'p2': 0}
        self.d_inv_ub = {'p1': 0, 'p2': 0}
        
        self.betaT_d = {'p1': 2, 'p2': 1}
        self.betaT_s = {'s1': 0, 's2': 0}
        
        self.b_inv_ub = {"j1": 30, "j2": 30, "j3": 30, "j4": 30, "j5": 20, "j6": 20, "j7": 20, "j8": 20}
        self.connections = connections
        
        assign_env_config(self, kwargs)
        
        
        self.start_state = {
            "sources": {s:0 for s in self.sources},
            "blenders": {b:0 for b in self.blenders},
            "demands": {p:0 for p in self.demands},
            "properties": {b: {q:0 for q in self.properties} for b in self.blenders}
        }
        
        self.reset()
    
    def step(self, action):
        if self.t == self.T:
            self.done = True
            return self.state, self.reward, self.done
        
        self.t += 1
        
        prev_blend_invs = self.state["blenders"]
        
        for s in self.sources:
            # How does the model know how much he can buy ?
            self.state["sources"][s] = self.state["sources"][s] \
                                        + action["tau"][s] \
                                        - sum([action["source_blend"][s][j] for j in action[s].keys()])
        
        for j in self.blenders:
            self.state["blenders"][j] = self.state["blenders"][j] \
                                        + sum([action[s][j] for s in action["source_blend"].keys()]) \
                                        + sum([action[jp][j] for jp in action["blend_blend"].keys()]) \
                                        - sum([action[j][jp] for jp in action["blend_blend"][j].keys()]) \
                                        - sum([action[j][p] for p in action["blend_demand"][j].keys()]) \
                                            
        for p in self.demands:
            # How does the model know how much he can sell ?
            self.state["demands"][p] = self.state["demands"][p] \
                                        - action["delta"][p] \
                                        + sum([action[j][p] for j in action["blend_demand"].keys()])
                                        
        for j in self.blenders:
            for q in self.properties:
                self.state["properties"][j][q] = (1/self.state["blenders"][j]) * ( \
                                                    self.state["properties"][j][q] * prev_blend_invs[j] \
                                                    + sum(self.sigma[s][q] * action[s][j] for s in self.sources) \
                                                    + sum(self.state["properties"][jp][q] * action[jp][j] for jp in self.blenders) \
                                                    - sum(self.state["properties"][j][q] * action[j][jp] for jp in self.blenders) \
                                                    - sum(self.state["properties"][j][q] * action[j][p] for p in self.demands)
                                                )
        
        self.update_reward(action)
        
        return self.state, self.reward, self.done
        
    def update_reward(self, action):
        Q_float = Q_bin = R1 = R2 = 0
        
        
        for k in ["source_blend", "blend_blend", "blend_demand"]:
            for tank1 in action[k].keys():
                for tank2 in action[k][tank1].keys():
                    Q_float += action[k][tank1][tank2]
                    Q_bin += 1 if action[k][tank1][tank2] > 0 else 0 
                    
        Q = self.alpha * Q_bin + self.beta * Q_float
        
        R1 = 0
        for p in self.demands:
            R1 += self.betaT_d[p] * min(action["delta"][p], self.state["demands"][p])
        for s in self.sources:
            R1 -= self.betaT_s[s] * action["tau"][s]
            
        R1 = R1 - Q
        
        for j in self.blenders:
            R2 += self._penalty_in_out_flow(j, action)
            for q in self.properties:
                for p in self.demands:
                    R2 += self._penalty_quality(p, q, j, action)
        
        R2 = R2 + R1
        
        self.reward = R2
        
    def _penalty_quality(self, p, q, j, action):
        if self.state["properties"][j][q] < self.sigma_lb[p][q] and action["blend_demand"][j][p] > 0:
            return self.M
        return 0
    
    def _penalty_in_out_flow(self, j, action):
        sum_in = sum_out = 0
        for jp in self.blenders:
            sum_in  += action["blend_blend"][jp][j]
            sum_out += action["blend_blend"][j][jp]
        
        for s in self.sources:
            sum_in  += action["source_blend"][s][j]
        
        for p in self.demands:
            sum_out += action["blend_demand"][j][p]
            
        if sum_in > 0 and sum_out > 0:
            return self.M
        
        return 0
        
    def reset(self):
        self.t = 0
        self.state = self.start_state
        self.reward = 0
        self.done = False

In [None]:
import gym
from gym import error, spaces, utils
from gym.utils import seeding

class TicTac4(gym.Env):
	metadata = {'render.modes': ['human']}


	def __init__(self):
		self.state = []
		for i in range(3):
			self.state += [[]]
			for j in range(3):
				self.state[i] += ["-"]
		self.counter = 0
		self.done = 0
		self.add = [0, 0]
		self.reward = 0

	def check(self):

		if(self.counter<5):
			return 0
		for i in range(3):
			if(self.state[i][0] != "-" and self.state[i][1] == self.state[i][0] and self.state[i][1] == self.state[i][2]):
				if(self.state[i][0] == "o"):
					return 1
				else:
					return 2
			if(self.state[0][i] != "-" and self.state[1][i] == self.state[0][i] and self.state[1][i] == self.state[2][i]):
				if(self.state[0][i] == "o"):
					return 1
				else:
					return 2
		if(self.state[0][0] != "-" and self.state[1][1] == self.state[0][0] and self.state[1][1] == self.state[2][2]):
			if(self.state[0][0] == "o"):
				return 1
			else:
				return 2
		if(self.state[0][2] != "-" and self.state[0][2] == self.state[1][1] and self.state[1][1] == self.state[2][0]):
			if(self.state[1][1] == "o"):
				return 1
			else:
				return 2



	def step(self, target):
		if self.done == 1:
			print("Game Over")
			return [self.state, self.reward, self.done, self.add]
		elif self.state[int(target/3)][target%3] != "-":
			print("Invalid Step")
			return [self.state, self.reward, self.done, self.add]
		else:
			if(self.counter%2 == 0):
				self.state[int(target/3)][target%3] = "o"
			else:
				self.state[int(target/3)][target%3] = "x"
			self.counter += 1
			if(self.counter == 9):
				self.done = 1;
			self.render()

		win = self.check()
		if(win):
			self.done = 1;
			print("Player ", win, " wins.", sep = "", end = "\n")
			self.add[win-1] = 1;
			if win == 1:
				self.reward = 100
			else:
				self.reward = -100

		return [self.state, self.reward, self.done, self.add]

	def reset(self):
		for i in range(3):
			for j in range(3):
				self.state[i][j] = "-"
		self.counter = 0
		self.done = 0
		self.add = [0, 0]
		self.reward = 0
		return self.state

	def render(self):
		for i in range(3):
			for j in range(3):
				print(self.state[i][j], end = " ")
			print("")