In [35]:
from pathlib import Path
import pandas as pd
import altair as alt
alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [36]:
Path.cwd()

PosixPath('/Users/ben/Developer/SpaceMissionDES/SpaceMissionDES/analysis')

#### Load the Data

In [66]:
data_dir = Path.cwd().parent.parent / "results" / "Case04-r05__Tx10-Tk02"

mc_results = pd.read_csv(data_dir / "mc.csv")
mc_results.head()

Unnamed: 0,replicant,outcome,duration,anomaly_count,anomaly_time,anomaly_vehicle,anomaly_activity
0,2,False,27.474903,2,12.5,Tanker,Countdown
1,2,False,27.474903,3,22.883643,Tanker,Countdown
2,2,False,27.474903,4,27.474903,Tanker,RPOD
3,0,True,89.480727,2,25.5,Tanker,Ascent
4,0,True,89.480727,3,41.318858,Tanker,Countdown


#### Summary Statistics

In [67]:
N = mc_results.replicant.max()
success_rate = sum(mc_results.groupby("replicant").first().outcome) / N
print(f"The success rate was: {success_rate} = {100*success_rate:.3f} %\n")

The success rate was: 0.8554759126521086 = 85.548 %



#### Mission Duration

In [68]:
df = (
    # mc_results[mc_results.outcome == True]
    mc_results
    .groupby("replicant", as_index=False)
    .first()
    .loc[:, ["replicant", "outcome", "duration"]]
)

df.loc[df.outcome == False, "duration"] = 1e6

alt.Chart(df).mark_bar().encode(
    # alt.X("duration:Q", bin=True),
    alt.X("duration:Q", bin=alt.Bin(extent=[50, 200], step=10)),
    y='count()',
).properties(
    width = 600
)

Compute the Emprical CDF

In [69]:
import numpy as np

# from: https://stackoverflow.com/questions/33345780/empirical-cdf-in-python-similiar-to-matlabs-one
def ecdf(sample):

    # convert sample to a numpy array, if it isn't already
    sample = np.atleast_1d(sample)

    # find the unique values and their corresponding counts
    quantiles, counts = np.unique(sample, return_counts=True)

    # take the cumulative sum of the counts and divide by the sample size to
    # get the cumulative probabilities between 0 and 1
    cumprob = np.cumsum(counts).astype(np.double) / sample.size

    return quantiles, cumprob

def plot_ECDF(df):
    sample = df.duration.values
    qe, pe = ecdf(sample)
    data = pd.DataFrame({"duration": list(qe), "prob": list(pe)})
    fig = alt.Chart(data).mark_line().encode(
        x = alt.X("duration", scale=alt.Scale(domain=(0,200))),
        y = "prob"
    ).properties(width=500, height=250)
    return fig


In [65]:
fig = plot_ECDF(df)
fig.interactive()

In [76]:
fig = plot_ECDF(df)
fig.interactive()

In [58]:
df = (
    # mc_results[mc_results.outcome == True]
    mc_results
    .groupby("replicant", as_index=False)
    .first()
    .loc[:, ["replicant", "outcome", "duration"]]
)

df.loc[df.outcome == False, "duration"] = 1e6

df

Unnamed: 0,replicant,outcome,duration
0,0,False,1000000.000000
1,1,True,98.390595
2,2,True,114.060942
3,3,False,1000000.000000
4,4,True,114.060942
...,...,...,...
5995,5995,False,1000000.000000
5996,5996,False,1000000.000000
5997,5997,True,126.621073
5998,5998,False,1000000.000000


In [28]:
df

sample = df.duration.values
sample

# convert sample to a numpy array, if it isn't already
sample = np.atleast_1d(sample)

# find the unique values and their corresponding counts
quantiles, counts = np.unique(sample, return_counts=True)

# take the cumulative sum of the counts and divide by the sample size to
# get the cumulative probabilities between 0 and 1
cumprob = np.cumsum(counts).astype(np.double) / sample.size

#### Failure Diagnostics (Aggregate)

Which Vehicles Fail

In [9]:
# Relative failure frequency by vehicle
df = (
    mc_results
    .groupby("anomaly_vehicle")
    .count()
    .rename(columns={'replicant': 'n'})
    .loc[:,'n']
)

fail_count = df.sum()
# (df / fail_count).reset_index()

alt.Chart(
    (df / fail_count).reset_index()
).mark_bar().encode(
    y = "anomaly_vehicle",
    x = "n"
)

Which Activities Fail?

In [10]:
# Relative failure frequency by activity
df = (
    mc_results
    .groupby("anomaly_activity")
    .count()
    .rename(columns={'replicant': 'n'})
    .loc[:,'n']
)

fail_count = df.sum()
# (df / fail_count).reset_index()

alt.Chart(
    (df / fail_count).reset_index()
).mark_bar().encode(
    y = "anomaly_activity",
    x = "n"
)

#### Failure Diagnostics (per mission)

Avg ascent failures per mission

In [11]:
mc_results.head()

Unnamed: 0,replicant,outcome,duration,anomaly_count,anomaly_time,anomaly_vehicle,anomaly_activity
0,0,False,75.820468,2,0.0,MTV,Countdown
1,0,False,75.820468,3,49.34668,Tanker,Ascent
2,6,False,66.320468,2,12.5,Tanker,Countdown
3,6,False,66.320468,3,39.84668,Tanker,Ascent
4,1,True,98.390595,2,12.5,Tanker,Countdown


In [12]:
mc_results.query("anomaly_count > 1")
# df = mc_results.query("replicant == 3")
# df

Unnamed: 0,replicant,outcome,duration,anomaly_count,anomaly_time,anomaly_vehicle,anomaly_activity
0,0,False,75.820468,2,0.000000,MTV,Countdown
1,0,False,75.820468,3,49.346680,Tanker,Ascent
2,6,False,66.320468,2,12.500000,Tanker,Countdown
3,6,False,66.320468,3,39.846680,Tanker,Ascent
4,1,True,98.390595,2,12.500000,Tanker,Countdown
...,...,...,...,...,...,...,...
21349,5997,True,126.621073,3,20.346680,Tanker,Countdown
21350,5997,True,126.621073,4,36.722230,Tanker,Countdown
21351,5997,True,126.621073,5,57.507941,Tanker,Countdown
21352,5997,True,126.621073,6,69.767639,Tanker,Countdown


In [13]:
# df.anomaly_activity.value_counts()

In [14]:
df = mc_results.groupby("replicant", as_index=False)["anomaly_activity"].value_counts()
df

Unnamed: 0,replicant,anomaly_activity,count
0,0,Ascent,1
1,0,Countdown,1
2,1,Countdown,4
3,2,Countdown,5
4,3,Countdown,7
...,...,...,...
7255,5997,Countdown,6
7256,5998,RPOD,1
7257,5998,Countdown,1
7258,5999,Countdown,1


In [15]:
# alt.Chart(df.unstack().Ascent.reset_index()).mark_boxplot().encode(x = "Ascent")

In [16]:
df = (
    mc_results
    # mc_results[mc_results.anomaly_activity != "Countdown"]
    .groupby("replicant", as_index=False)
    ["anomaly_activity"]
    .value_counts()
)

alt.Chart(df).mark_boxplot().encode(
    x = "count",
    y = "anomaly_activity"

)

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation

alt.Chart(...)

In [None]:
df[df.anomaly_activity != "Countdown"]

Unnamed: 0,replicant,anomaly_activity,count
8,7,Orbit Insertion,1
14,13,Return to Base,1
15,13,Orbit Insertion,1
16,14,Ascent,1
18,15,Orbit Insertion,1
...,...,...,...
1105,988,Ascent,1
1110,992,Orbit Insertion,1
1112,994,Ascent,1
1113,995,Return to Base,1


In [None]:
mc_results

Unnamed: 0,replicant,outcome,duration,anomaly_count,anomaly_time,anomaly_vehicle,anomaly_activity
0,0,True,72.127086,2,6.000000,Tanker,Countdown
1,0,True,72.127086,3,37.762713,Tanker,Countdown
2,2,True,73.776831,2,6.000000,Tanker,Countdown
3,2,True,73.776831,3,37.762713,Tanker,Countdown
4,2,True,73.776831,4,45.110132,Tanker,Countdown
...,...,...,...,...,...,...,...
3213,998,True,76.209632,2,19.000000,Tanker,Countdown
3214,998,True,76.209632,3,37.762713,Tanker,Countdown
3215,998,True,76.209632,4,45.110132,Tanker,Countdown
3216,998,True,76.209632,5,46.757138,Tanker,Countdown


---
### Experiments

In [None]:
import pandas as pd
import altair as alt
import numpy as np

# from: https://stackoverflow.com/questions/33345780/empirical-cdf-in-python-similiar-to-matlabs-one
def ecdf(sample):

    # convert sample to a numpy array, if it isn't already
    sample = np.atleast_1d(sample)

    # find the unique values and their corresponding counts
    quantiles, counts = np.unique(sample, return_counts=True)

    # take the cumulative sum of the counts and divide by the sample size to
    # get the cumulative probabilities between 0 and 1
    cumprob = np.cumsum(counts).astype(np.double) / sample.size

    return quantiles, cumprob

sample = [1, 2, 2, 3, 3, 3, 4, 4, 5, 5,]
qe, pe = ecdf(sample)
data = pd.DataFrame({"duration": list(qe), "prob": list(pe)})

alt.Chart(data).mark_line(interpolate='step-after', point=True, color="red").encode(
    x = alt.X("duration", axis=alt.Axis(values=[0, 1, 2, 3, 4, 5]), scale=alt.Scale(domain=(1,5))),
    y = "prob"
).properties(
    width=400, 
    height=400
)