Pytorch implementation of our problem.

The following problem will be solved in each node of a dynamic programming problem:

find n Actions such that they constitute a local nash equilibrium

input = Action
loss = stochatic salvo combat model: probabilty of winning

restart the process after updating the state with each of these Actions.

In [16]:
import torch
import numpy as np
import time

In [3]:
# global variables
N_Technologies = 3
N_Capabilities = 6
Horizon = 5

# Used in TRL calculations
I = 25
D = 0
CAPABILITYMATRIX = torch.rand(N_Technologies,N_Capabilities,2) # assuming differnt conversion for each of the players, informed by specific scenario 



In [4]:
#helper functions 

def Update_State(State,Action):
    
    #UpdateValue = randomness(Action) #implement randomness
    UpdateValue = Action
    
    return torch.add(State,UpdateValue)

def TechnologyReadiness(State):
    global D,I
    TRL = torch.pow(1+torch.exp(-State*(1/I)+D),-1)
    return TRL

def TechToCapa(State):
    #Capabilities = np.matmul(np.transpose(State),CAPABILITYMATRIX)
    TechnologyReadinessLevel = TechnologyReadiness(State)
    print(TechnologyReadinessLevel)

    
    Capabilities = torch.empty((N_Capabilities,2))

    for i in range(2):
        Capabilities[:,i] = torch.transpose(TechnologyReadinessLevel[:,i],0,-1) @ CAPABILITYMATRIX[:,:,i]
        
    return Capabilities
 


In [5]:
#should give probabilities that [player 1, player 2] wins the battle. 
def torchBattle(capabilities):
    results = torch.div(torch.sum(capabilities,dim=0) , torch.sum(capabilities))
    return results
    

In [38]:
import queue
class TorchGame():
    def __init__(self, N_Technologies =3, N_Capabilities = 6, Horizon = 5, N_actions = 5, N_actions_startpoint = 100, I=25, D = 0) -> None:
        torch.manual_seed(1337)
        # global variables
        self.N_Technologies = N_Technologies
        self.N_Capabilities = N_Capabilities
        self.Horizon = Horizon
        self.N_actions_startpoint = N_actions_startpoint

        # Used in TRL calculations
        self.I = I
        self.D = 0
        self.N_actions = N_actions
        self.CAPABILITYMATRIX = torch.rand(N_Technologies,N_Capabilities,2) # assuming differnt conversion for each of the players, informed by specific scenario 

        
        #creating the initalState
        st = torch.rand(N_Technologies,2)
        divisor = 0.01*torch.sum(st,0) # sum to 100
        self.InitialState = torch.divide(st,divisor)
        
        self.History = []
        self.Q = []
    
    def Update_State(self,State,Action):
        #UpdateValue = randomness(Action) #implement stochasticity
        UpdateValue = Action
        
        return torch.add(State,UpdateValue)

    def TechnologyReadiness(self,State):
        
        TRL = torch.pow(1+torch.exp(-State*(1/self.I)+self.D),-1)
        
        return TRL

    def TechToCapa(self,State):
        #Capabilities = np.matmul(np.transpose(State),CAPABILITYMATRIX)
        TechnologyReadinessLevel = TechnologyReadiness(State)
        print(TechnologyReadinessLevel)

        
        Capabilities = torch.empty((N_Capabilities,2))

        for i in range(2):
            Capabilities[:,i] = torch.transpose(TechnologyReadinessLevel[:,i],0,-1) @ CAPABILITYMATRIX[:,:,i]
            
        return Capabilities
    
    def Battle(self,Capabilities):
        results = torch.div(torch.sum(Capabilities,dim=0) , torch.sum(Capabilities))
        return results
    
    def OptimizeAction(self, state,Action): #this should use the battle function
        return (Action + torch.rand_like(Action)) * .5
        
    def FilterActions(self, Actions): #keep optimization trajectories that converged, and filter out "duplicates" s.t., tol < eps
        return Actions[:self.N_actions]

    def GetActions(self,State):
        
        ActionStartPoints = torch.rand(self.N_Technologies,2,self.N_actions_startpoint)
            
        AllActions = [self.OptimizeAction(State, ActionStartPoints[:,:,i]) for i in range(self.N_actions_startpoint)] 
         
        return self.FilterActions(AllActions)
    
    def Main(self):
        start = time.time()
        self.Q.append((self.InitialState,0))
        
        while (len(self.Q) > 0 and time.time() - start < 10):
            st,t = self.Q.pop() #the state which we are currently examining
            #print(t)
            act = self.GetActions(st) # small number of nash equilibria
            for a in act:
                self.History.append((st,a)) # adding the entering state and the exiting action to history, reward should probably also be added. 
                                          
                
                st_new = self.Update_State(st,a) #the resulting states of traversing along the nash equilibrium
                if t+1 < self.Horizon:
                    self.Q.append((st_new,t+1))
                    
        return self.History
                
             

            
           
FullGame = TorchGame(N_Technologies=21,Horizon=5,N_actions=7)
hist = FullGame.Main()
print(len(hist))

19607


In [7]:
#simple test Case 

State_0 = torch.rand(N_Technologies,2)
divisor = 0.01*torch.sum(State_0,0) # sum to 100
State_0 = torch.divide(State_0,divisor)


Action_0 = torch.rand(N_Technologies,2)
divisor = 0.2*torch.sum(Action_0,0) # sum to 5
Action_0 = torch.divide(Action_0, divisor)

print(State_0)

State_1 = Update_State(State_0,Action_0)
print(State_1)

Capabilities_1 = TechToCapa(State_1)
print(Capabilities_1)

results = torchBattle(Capabilities_1)

print(results)

tensor([[ 5.5375, 50.1063],
        [54.0613,  4.8819],
        [40.4012, 45.0118]])
tensor([[ 7.4430, 52.2487],
        [55.5556,  5.0454],
        [42.0014, 47.7059]])
tensor([[0.5739, 0.8899],
        [0.9022, 0.5503],
        [0.8429, 0.8708]])
tensor([[0.5584, 1.7318],
        [1.0899, 1.4199],
        [1.0759, 1.2462],
        [1.0356, 1.3739],
        [1.1839, 1.0753],
        [0.6875, 1.3174]])
tensor([0.4082, 0.5918])


In [8]:
# 
# #import matplotlib.pyplot as plt 

# nRange = range(4,200,20)
# results = np.zeros((len(nRange),2))

# for i,n in enumerate(nRange):
#     bigMatrix = torch.rand(n,n).cuda()

#     start = time.time()

#     torch.linalg.eig(bigMatrix)
#     end = time.time()

#     gpuTime = end-start

#     start = time.time()
#     for _ in range(50):

#         torch.linalg.eig(bigMatrix)
#     end = time.time()
#     cpuTime = end-start
    
#     results[i,:] = [gpuTime,cpuTime]

# print(results)



[[1.31498837e+00 1.40035152e-02]
 [1.99794769e-03 1.99964046e-02]
 [1.00088120e-03 2.80001163e-02]
 [2.00152397e-03 4.89976406e-02]
 [3.00145149e-03 1.20999336e-01]
 [5.00249863e-03 1.90996885e-01]
 [3.99780273e-03 2.85028458e-01]
 [8.00204277e-03 3.63998175e-01]
 [1.50015354e-02 5.90999603e-01]
 [1.59997940e-02 7.02998877e-01]]


In [17]:
start = time.time()
time.sleep(1)
end = time.time()
total = end-start
total

1.0065457820892334