# Machin supervisor or not

In [1]:
# Imports
import numpy as np

# A function that returns a LaTeX matrix
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\ ' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return ''.join(rv)

In [2]:
# Transission matrix for unsupervised machine
pu = np.matrix([[0.9,0.1],[0.4,0.6]])
pu

matrix([[ 0.9,  0.1],
        [ 0.4,  0.6]])

In [3]:
# Transision matrix for supervied machine
ps = np.matrix([[0.08,0.9,0.02],[0.05,0.9,0.05],[0.4,0,0.6]])
ps

matrix([[ 0.08,  0.9 ,  0.02],
        [ 0.05,  0.9 ,  0.05],
        [ 0.4 ,  0.  ,  0.6 ]])

# Absorbing markov chain.

In [4]:
psabs = np.matrix([[0.08,0.9,0.02],[0.05,0.9,0.05],[0,0,1]])
psabs

matrix([[ 0.08,  0.9 ,  0.02],
        [ 0.05,  0.9 ,  0.05],
        [ 0.  ,  0.  ,  1.  ]])

In [5]:
qs = np.matrix([[0.08, 0.9],[0.05,0.9]])
qs

matrix([[ 0.08,  0.9 ],
        [ 0.05,  0.9 ]])

In [6]:
rs = np.matrix([[0.02],[0.05]])
rs

matrix([[ 0.02],
        [ 0.05]])

In [7]:
# (I - q)^-1
ns = (np.identity(2) - qs)**(-1)
print(str(ns))
print(bmatrix(ns))
print(bmatrix(ns*np.matrix([[1],[1]])))

[[  2.12765957  19.14893617]
 [  1.06382979  19.57446809]]
\begin{bmatrix}  2.12765957 & 19.14893617\\   1.06382979 & 19.57446809\\ \end{bmatrix}
\begin{bmatrix}  21.27659574\\   20.63829787\\ \end{bmatrix}


# Stationary distrubution
Stationary distrubution asks the question $\pi P = \pi$ 
to solve that we are using the eginvalues and egivectors

In [8]:
egin_of_p=np.linalg.eigvals(ps)
egin_of_p

array([ 0.05104394,  0.52895606,  1.        ])

In [9]:
test1 = np.matrix([1,0,0])*ps**50
test1

matrix([[ 0.08948546,  0.80536913,  0.10514541]])

In [10]:
# Trying stationary distrobution
# Calculating the eginvalue of the transposed matrix
np.linalg.eigvals(np.transpose(ps))

array([ 0.05104394,  1.        ,  0.52895606])

In [11]:
# Calculating the eigenvectors
egivectors=np.linalg.eig(np.transpose(ps))
#print(str(egivectors))
egivectors[1:2]
#print(bmatrix(egivectors[1:2]))


(matrix([[ 0.68559419, -0.10951344,  0.33490392],
         [-0.72681591, -0.98562097, -0.81233918],
         [ 0.04122172, -0.12867829,  0.47743526]]),)

In [12]:
# The secound column in the matrix seams to be all negative is that then an cadidate for the system
# fii we can multiply that with -1 so in this case the sationary distrubution is:
t = 1/(0.10951344 + 0.98562097 + 0.12867829)
test2 = t * np.matrix([0.10951344, 0.98562097, 0.12867829])
print("test1=" + str(test1))
print("test2=" + str(test2))


test1=[[ 0.08948546  0.80536913  0.10514541]]
test2=[[ 0.08948546  0.80536913  0.10514541]]


In [13]:
test1*ps

matrix([[ 0.08948546,  0.80536913,  0.10514541]])

# Long term expected reward

In [14]:
Cr = -70 # cost of machine while its uder reapair.
R  = 120 # Running reward 
Cp = -5  # Cost of the supervisor
print(str(Cr))

-70


In [15]:
vpi=test2.tolist()[0]
print("allert " + str(vpi[0]))
print("tierd " + str(vpi[1]))
print("broken "+ str(vpi[2]))

allert 0.0894854580280136
tierd 0.8053691304233074
broken 0.10514541154867897


In [16]:
#reward= Cp*R*vpi[0] + Cp*R*vpi[1] - Cr*Cp*vpi[2]
reward = (R + Cp)*vpi[0] + (R + Cp)*vpi[1] + (Cr + Cp)*vpi[2]
print("the reward is " + str(reward))

the reward is 95.02237180575099


# Two supervisors

In [17]:
# One supervisor have the alert probability to detect a error of
AlertSup = 0.8
TirerdSup = 0.5

probability of two Alert supevisor is asking the question
P(AuB) = P(A) + P(B) - P(A^B)
Then because A and B is unrelated
P(AuB) = P(A) + P(P) -P(A)P(B)

In [18]:
TwoAlertSup = 2 * AlertSup - AlertSup*AlertSup
TwoTiredSup = 2 * TirerdSup - TirerdSup*TirerdSup
print("Thus 2 allert supervisor got the probability "+ str(TwoAlertSup) + " to discover a error")
print("and to tierd supervisor got the probobily " + str(TwoTiredSup) + " to discover a error")

Thus 2 allert supervisor got the probability 0.96 to discover a error
and to tierd supervisor got the probobily 0.75 to discover a error


In [19]:
P2 = np.matrix([[0.1*0.96, 0.9, 0.1*0.04],[0.1*0.75, 0.9, 0.1*(1-0.75)],[0.4, 0, 0.6]])
print(str(P2))
print(bmatrix(P2))

[[ 0.096  0.9    0.004]
 [ 0.075  0.9    0.025]
 [ 0.4    0.     0.6  ]]
\begin{bmatrix}  0.096 & 0.9 & 0.004\\   0.075 & 0.9 & 0.025\\   0.4 & 0. & 0.6\\ \end{bmatrix}


In [20]:
# Statationary distrubution
longtimeExp = np.matrix([1, 0 ,0])*P2**50
print(str(longtimeExp))
lte=longtimeExp.tolist()[0]
print(bmatrix(longtimeExp))

[[ 0.09458501  0.85126507  0.05414992]]
\begin{bmatrix}  0.09458501 & 0.85126507 & 0.05414992\\ \end{bmatrix}


In [21]:
Reward2 = (R + 2*Cp)*lte[0] + (R + 2*Cp)*lte[1] + (Cr + 2*Cp)*lte[2]
print("The rewart for hering 2 supervisors is " + str(Reward2))

The rewart for hering 2 supervisors is 99.71151572476346
