In [1]:
import numpy as np
import plotly.graph_objects as go
from pathlib import Path
import matplotlib.pyplot as plt

In [2]:
exp_path = Path('videos/experiment')

In [3]:
evals_history = np.load(exp_path/'evals_history.npy')[:, :]
evals_history.shape

(1595, 45)

In [4]:
# get the last positive number from each row of evals_history numpy array
lasts = []
for eval in evals_history:
    # get last positive number from the numpy array
    only_nonzero = eval[~np.isnan(eval)]
    if len(only_nonzero) == 0:
        last = 0
    else:
        last = only_nonzero[-1]
        
    lasts.append(last)
lasts = np.array(lasts)

In [5]:
risks_history = np.load(exp_path/'risks_history.npy')[:, :]
risks_history.shape

(1055, 54)

In [6]:
def rbf1(risks):
    prod_risk = 0
    for i in range(len(risks)):
        prod_risk = prod_risk + (np.power(0.99, i) * (risks[i]/10))

    if prod_risk < 0.01:
        prod_risk = 0.01

    slc = (1 - prod_risk) / ((prod_risk))

    # overriting rbf_delta
    rbf_delta = 1.0

    rbf = rbf_delta * slc

    return rbf

In [7]:
def rbf2(risks):
    prod_risk = 0
    for i in range(len(risks)):
        prod_risk = prod_risk + (np.power(0.99, i) * (risks[i]))

    slc = prod_risk

    # overriting rbf_delta
    rbf_delta = 1.0

    rbf = rbf_delta * slc

    return rbf

In [8]:
def rbf(evals):
    max_risk = 0
    evals = evals[~np.isnan(evals)]
    for i in range(len(evals)):
        max_risk = max_risk + (np.power(0.99, i) * (evals[i]))

    slc = max_risk

    # overriting rbf_delta
    rbf_delta = 0.01

    rbf = 1 + rbf_delta * slc

    return rbf

In [9]:
def ser(risks):
    prod_safety = 1
    risks = risks[~np.isnan(risks)]
    for i in range(len(risks)):
        prod_safety = prod_safety + (1 - (risks[i]))    

    ser = prod_safety

    return ser

In [10]:
# Teste para ver impactos de risco = 0
def ser(risks):

    risks = risks[~np.isnan(risks)]
    risks = risks[(risks != 0) & (risks != 1)]

    prod_safety = 1
    for risk in risks[-5:]:
        if risk < 1:
            prod_safety = prod_safety * (1 - (risk/10))
        #print(prod_safety) 

    #         risco         /  segurança
    ser = (1 - prod_safety) / prod_safety

    return ser

In [11]:
sers = np.apply_along_axis(ser, 1, risks_history)
rbfs = np.apply_along_axis(rbf, 1, evals_history)

# plot sers and rbfs
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(sers)), y=sers, mode='lines', name="SER"))
fig.add_trace(go.Scatter(x=np.arange(len(rbfs)), y=rbfs, mode='lines', name="RBF"))
fig.add_trace(go.Scatter(x=np.arange(len(lasts)), y=lasts, mode='lines', name="E()"))
fig.update_layout(title='SER and RBF over time', xaxis_title='Time', yaxis_title='SER/RBF')
fig.show()

durante a simulacao ele encontra um estado de falha, precisa jogar o risk pra +infinito e remover o histórico automaticamente

In [12]:
#plot all evals
fig = go.Figure()
for i in range(1):
    fig.add_trace(go.Scatter(x=np.arange(len(evals_history[i])), y=evals_history[i]/10, mode='lines'))
fig.update_layout(title='Eval over time', xaxis_title='Time', yaxis_title='Eval')
fig.show()


In [13]:
#plot all risks
fig = go.Figure()
for i in range(100):
    fig.add_trace(go.Scatter(x=np.arange(len(risks_history[i])), y=risks_history[i], mode='lines'))
fig.update_layout(title='Risk over time', xaxis_title='Time', yaxis_title='Risk')
fig.show()

In [14]:
#plot last positives
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(lasts)), y=lasts, mode='lines'))
fig.update_layout(title='Last eval over time', xaxis_title='Time', yaxis_title='E()')
fig.show()

In [15]:
def reward(evals):
    episode_reward = 0

    non_zero_evals = evals[evals != 0]

    for eval in non_zero_evals:
        episode_reward += eval

    episode_reward = episode_reward / len(non_zero_evals)

    return episode_reward

In [16]:
rewards = np.apply_along_axis(reward, 1, evals_history)

In [17]:
# plot rewards sum
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(rewards)), y=rewards, mode='lines'))
fig.update_layout(title='Rewards avg over time', xaxis_title='Time', yaxis_title='Rewards')
fig.show()

# MANIPULATING DATA

In [18]:
fake_evals_history = evals_history.copy()
fake_evals_history[200:600] = -evals_history[600:1000]

# get the last positive number from each row of evals_history numpy array
fake_lasts = []
for eval in fake_evals_history:
    # get last positive number from the numpy array
    only_nonzero = eval[eval != 0]
    if len(only_nonzero) == 0:
        last = 0
    else:
        last = only_nonzero[-1]
        
    fake_lasts.append(last)


# recalculate risks
fake_risks_history = []
for evals in fake_evals_history:
    risks = []
    for local_evaluation in evals:
        if local_evaluation >= 0:
            risk = 0.0
        else:
            risk = abs(local_evaluation)
        risks.append(risk)
    fake_risks_history.append(risks)



In [19]:
sers = np.apply_along_axis(ser, 1, fake_risks_history)
rbfs = np.apply_along_axis(rbf, 1, fake_evals_history)

# plot sers and rbfs
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(sers)), y=sers, mode='lines', name="SER"))
fig.add_trace(go.Scatter(x=np.arange(len(rbfs)), y=rbfs, mode='lines', name="RBF"))
fig.add_trace(go.Scatter(x=np.arange(len(fake_lasts)), y=fake_lasts, mode='lines', name="E()"))
fig.update_layout(title='SER and RBF over time', xaxis_title='Time', yaxis_title='SER/RBF')
fig.show()

- enquanto E for positivo, SER será sempre zerado, entao de nada adianta RBF crescer
- quando E é negativo, entao RBF começa a baixar (comportamento esperado)
- quando E é negativo, SER começa a crescer
- na intersecçao RBF limita SER

# SCORES

In [20]:
scores_0 = np.load(exp_path/'scores_0.npy')[:, :]
scores_1 = np.load(exp_path/'scores_1.npy')[:, :]
scores_0.shape, scores_1.shape

((1055, 54), (1055, 54))

In [21]:
# remove zeros of the list in each scores_0
scores_0_clean = []
for scores in scores_0:
    scores = scores[scores != 0]
    scores_0_clean.append(scores)
scores_0_clean = np.array(scores_0_clean, dtype="object")

# remove zeros of the list in each scores_1
scores_1_clean = []
for scores in scores_1:
    scores = scores[scores != 0]
    scores_1_clean.append(scores)
scores_1_clean = np.array(scores_1_clean, dtype="object")

In [22]:
scores_0_clean = scores_0.copy()
scores_0_clean[scores_0_clean == 0] = np.nan

scores_1_clean = scores_1.copy()
scores_1_clean[scores_1_clean == 0] = np.nan

In [23]:
ADD_STD = True

scores_0_mean = np.apply_along_axis(np.nanmean, 1, scores_0_clean)
scores_0_std = np.apply_along_axis(np.nanstd, 1, scores_0_clean)

scores_1_mean = np.apply_along_axis(np.nanmean, 1, scores_1_clean)
scores_1_std = np.apply_along_axis(np.nanstd, 1, scores_1_clean)

# plot mean of scores_0 as a line and std as two other lines above and below the mean
# the std should have the same color as the mean but with a lower opacity
# fill in the area between the two std lines
fig = go.Figure()
color0 = '0,200,0'
fig.add_trace(go.Scatter(x=np.arange(len(scores_0_mean)), y=scores_0_mean, mode='lines', name="Score_self Mean", line=dict(color=f'rgba({color0},1)')))
if ADD_STD:
    fig.add_trace(go.Scatter(x=np.arange(len(scores_0_mean)), y=scores_0_mean + scores_0_std, fill='tonexty', mode='lines', name="Std", fillcolor=f'rgba({color0},0)', line=dict(color=f'rgba({color0},0.1)')))
    fig.add_trace(go.Scatter(x=np.arange(len(scores_0_mean)), y=scores_0_mean - scores_0_std, fill='tonexty', mode='lines', name="Std", fillcolor=f'rgba({color0},0.2)', line=dict(color=f'rgba({color0},0.1)')))

color1 = '200,0,0'
fig.add_trace(go.Scatter(x=np.arange(len(scores_1_mean)), y=scores_1_mean, mode='lines', name="Score_enemy Mean", line=dict(color=f'rgba({color1},1)')))
if ADD_STD:
    fig.add_trace(go.Scatter(x=np.arange(len(scores_1_mean)), y=scores_1_mean + scores_1_std, fill='tonexty', mode='lines', name="Std", fillcolor=f'rgba({color1},0)', line=dict(color=f'rgba({color1},0.1)')))
    fig.add_trace(go.Scatter(x=np.arange(len(scores_1_mean)), y=scores_1_mean - scores_1_std, fill='tonexty', mode='lines', name="Std", fillcolor=f'rgba({color1},0.2)', line=dict(color=f'rgba({color1},0.1)')))


# draw the line between the two means
center = (scores_0_mean + scores_1_mean) / 2
fig.add_trace(go.Scatter(x=np.arange(len(center)), y=center, mode='lines', name="Center", line=dict(color='rgba(0,0,0,0.3)')))

#add evaluation points to the plot plus the center line
eval_center = center + (lasts*1000)
fig.add_trace(go.Scatter(x=np.arange(len(eval_center)), y=eval_center, mode='lines', name="E() + Center", line=dict(color='purple')))

#add ser point to the plot plus the center line
ser_center = center + (sers*1000)
fig.add_trace(go.Scatter(x=np.arange(len(ser_center)), y=ser_center, mode='lines', name="SER + Center", line=dict(color='orange')))

fig.update_layout(title='Mean and Std of scores_0 over time', xaxis_title='Time', yaxis_title='Scores')
fig.show()



Mean of empty slice


Degrees of freedom <= 0 for slice.



ValueError: operands could not be broadcast together with shapes (1055,) (1595,) 

estou perden

In [None]:
fig = go.Figure()

# plot the ratio between the two means
ratio = scores_0_mean / scores_1_mean
fig.add_trace(go.Scatter(x=np.arange(len(ratio)), y=ratio, mode='lines', name="Ratio self/enemy", line=dict(color='red')))

ratio2 = scores_1_mean / scores_0_mean
fig.add_trace(go.Scatter(x=np.arange(len(ratio2)), y=ratio2, mode='lines', name="Ratio enemy/self", line=dict(color='green')))

fig.update_layout(title='Mean and Std of scores_0 over time', xaxis_title='Time', yaxis_title='Scores')
fig.show()

In [None]:
evals_history.shape

(1745, 44)

In [None]:
evals_history[0]

array([0.00917435, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.01724136, 0.00826442, 0.00826442, 0.00826442,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan])

In [None]:

#   perdendo                    distancia curta
# if (score_0 <= score_1 and abs(ratio_self_enemy - ratio_enemy_self) < 0.5)
# #  ganhando                     distancia longa
# or (score_0 > score_1 and abs(ratio_self_enemy - ratio_enemy_self) > 0.5):
    
#     rbf = 1.0
    
#     #return True # arrisca





In [None]:
import pickle as pkl

with open(exp_path/'vulcan_rewards.pkl', 'rb') as f:
    vulcan_rewards = pkl.load(f)

In [None]:
vulcan_rewards_sum = []

for rewards in vulcan_rewards:
    vulcan_rewards_sum.append(rewards.sum())

In [None]:
# plot one reward
reward0 = vulcan_rewards[0]

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(reward0)), y=reward0, mode='lines'))
fig.update_layout(title='Reward over time for one simulation', xaxis_title='Time', yaxis_title='Reward')
fig.show()


In [None]:
np.apply_along_axis(np.sum, 1, rewards)

array([0., 0., 0., 0., 0., 0.])

In [None]:
# plot multiple rewards
fig = go.Figure()
for i in range(100):
    fig.add_trace(go.Scatter(x=np.arange(len(vulcan_rewards[i])), y=vulcan_rewards[i], mode='lines'))
fig.update_layout(title='Reward over time for multiple simulations', xaxis_title='Time', yaxis_title='Reward')
fig.show()

In [None]:
# plot rewards sum
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(vulcan_rewards_sum)), y=vulcan_rewards_sum, mode='lines'))
fig.update_layout(title='Rewards sum over time', xaxis_title='Time', yaxis_title='Rewards')
fig.show()

In [None]:
# normalize rewards
vulcan_rewards_sum = np.array(vulcan_rewards_sum)
vulcan_rewards_sum = (vulcan_rewards_sum - vulcan_rewards_sum.min()) / (vulcan_rewards_sum.max() - vulcan_rewards_sum.min())

# plot rewards sum
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(vulcan_rewards_sum)), y=vulcan_rewards_sum, mode='lines'))
fig.update_layout(title='Rewards sum over time', xaxis_title='Time', yaxis_title='Rewards')
fig.show()


In [None]:
# plot E(), ser and rewards sum
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(lasts)), y=lasts, mode='lines', name="E()"))
fig.add_trace(go.Scatter(x=np.arange(len(vulcan_rewards_sum)), y=vulcan_rewards_sum, mode='lines', name="Rewards"))
fig.add_trace(go.Scatter(x=np.arange(len(sers)), y=sers, mode='lines', name="SER"))
fig.update_layout(title='E() and Rewards sum over time', xaxis_title='Time', yaxis_title='E()/Rewards')
fig.show()

In [None]:
raw_rewards = np.load(exp_path/'raw_rewards.npy')[:, :]
raw_rewards.shape

(1745, 6)

In [None]:
raw_rewards_sum = np.apply_along_axis(np.sum, 1, raw_rewards)

# plot rewards sum
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(raw_rewards_sum)), y=raw_rewards_sum, mode='lines'))
fig.update_layout(title='Rewards sum over time', xaxis_title='Time', yaxis_title='Rewards')
fig.show()