# 9. Dash analysis

Run `python3 09-dashboard.py` fro your command line to start the dash server.

This Notebook brings some numbers in the form of tables to answer the following questions.

1. Launch site with a largest success launch rate and number of successes: **KSC LC-39A**
2. Launch site with a lowest success launch rate and number of failures: **CCAFS LC-40 76.92%**
3. Payload range with highest success rate: **2000 - 5000 kg**
4. Payload range with lowest success rate: **(0-2000 kg) & (5000-7000 kg)**
5. Booster with highest number of launch success: **F9 FT with 66.69%**


## Imports


In [45]:
import pandas as pd
from pathlib import Path

import helpers as hlp  # helper functions

## Setup


In [46]:
INPUT_FILE = hlp.DATA_DIR / Path("spacex_launch_dash.csv")
PAYLOAD_BINS = list(range(0, 10001, 1000))

## Load data


In [47]:
df = pd.read_csv(INPUT_FILE, encoding="utf-8", header=0, index_col=False)

# normalize column names
df.columns = [hlp.normalize_column_name(col) for col in df.columns]
df = df.drop("unnamed:_0", axis=1)
df.head()

Unnamed: 0,flight_number,launch_site,class,payload_mass_kg,booster_version,booster_version_category
0,1,CCAFS LC-40,0,0.0,F9 v1.0 B0003,v1.0
1,2,CCAFS LC-40,0,0.0,F9 v1.0 B0004,v1.0
2,3,CCAFS LC-40,0,525.0,F9 v1.0 B0005,v1.0
3,4,CCAFS LC-40,0,500.0,F9 v1.0 B0006,v1.0
4,5,CCAFS LC-40,0,677.0,F9 v1.0 B0007,v1.0


## Common code


In [48]:
unique_sites = list(df["launch_site"].unique())
unique_booster_versions = df["booster_version"].apply(lambda x: x.split()[1]).unique()

## 1. Launch site with largest / lowest success launch rate


In [49]:
# intermediary lists
launches = []
succeeded = []
failed = []

# populate intermediary lists
for booster_version in unique_sites:
    launches.append(df.loc[(df["launch_site"] == booster_version)]["class"].count())
    succeeded.append(
        df.loc[(df["launch_site"] == booster_version) & (df["class"] == 1)][
            "class"
        ].count()
    )
    failed.append(
        df.loc[(df["launch_site"] == booster_version) & (df["class"] == 0)][
            "class"
        ].count()
    )

# create a new DataFrame to host launches, succeeded and failed launches
success_df = pd.DataFrame(
    data={
        "site": unique_sites,
        "launches": launches,
        "success": succeeded,
        "failure": failed,
    }
)
success_df["success_ratio"] = round(success_df["success"] / success_df["launches"], 4)
success_df["failure_ratio"] = round(success_df["failure"] / success_df["launches"], 4)
success_df

Unnamed: 0,site,launches,success,failure,success_ratio,failure_ratio
0,CCAFS LC-40,26,7,19,0.2692,0.7308
1,VAFB SLC-4E,10,4,6,0.4,0.6
2,KSC LC-39A,13,10,3,0.7692,0.2308
3,CCAFS SLC-40,7,3,4,0.4286,0.5714


## 3. Payload success / failure rate


In [50]:
intervals_df = (
    pd.cut(df["payload_mass_kg"], PAYLOAD_BINS, right=True).value_counts().sort_index()
)
intervals_df = intervals_df.reset_index()
intervals_df

Unnamed: 0,payload_mass_kg,count
0,"(0, 1000]",8
1,"(1000, 2000]",3
2,"(2000, 3000]",10
3,"(3000, 4000]",11
4,"(4000, 5000]",8
5,"(5000, 6000]",5
6,"(6000, 7000]",4
7,"(7000, 8000]",0
8,"(8000, 9000]",0
9,"(9000, 10000]",5


In [None]:
# intermediary lists
unique_intervals = []
launches = []
succeeded = []
failed = []
mins = []
maxs = []

# populate intermediary lists
for row in intervals_df.itertuples():
    _, interval, counts = row
    min_, max_ = [interval.left, interval.right]
    unique_intervals.append([min_, max_])
    mins.append(min_)
    maxs.append(max_)

    payload_condition = (min_ <= df["payload_mass_kg"]) & (df["payload_mass_kg"] < max_)
    launches.append(df.loc[payload_condition]["class"].count())
    succeeded.append(df.loc[(payload_condition) & (df["class"] == 1)]["class"].count())
    failed.append(df.loc[(payload_condition) & (df["class"] == 0)]["class"].count())

# create a new DataFrame to host intervals, succeeded and failed launches
success_df = pd.DataFrame(
    data={
        "interval": unique_intervals,
        "min": mins,
        "max": maxs,
        "launches": launches,
        "success": succeeded,
        "failure": failed,
    }
)
success_df["success_ratio"] = round(success_df["success"] / success_df["launches"], 4)
success_df["failure_ratio"] = round(success_df["failure"] / success_df["launches"], 4)
success_df = success_df.fillna(0)  # set NaNs to 0
success_df

Unnamed: 0,interval,min,max,launches,success,failure,success_ratio,failure_ratio
0,"[0, 1000]",0,1000,10,2,8,0.2,0.8
1,"[1000, 2000]",1000,2000,3,1,2,0.3333,0.6667
2,"[2000, 3000]",2000,3000,10,5,5,0.5,0.5
3,"[3000, 4000]",3000,4000,11,8,3,0.7273,0.2727
4,"[4000, 5000]",4000,5000,8,3,5,0.375,0.625
5,"[5000, 6000]",5000,6000,5,2,3,0.4,0.6
6,"[6000, 7000]",6000,7000,4,0,4,0.0,1.0
7,"[7000, 8000]",7000,8000,0,0,0,0.0,0.0
8,"[8000, 9000]",8000,9000,0,0,0,0.0,0.0
9,"[9000, 10000]",9000,10000,5,3,2,0.6,0.4


In [80]:
study_intervals = [(0, 2000), (2000, 4000), (5000, 7000), (7000, 9000), (9000, 11000)]
print(f"{'INTERVAL':14} | {'LAUNCHES':10} | {'Success %':10} | {'Failed %':10} |")
print("-" * (14 + 10 + 10 + 10 + 11))
for interval in study_intervals:
    min_, max_ = interval
    cond = (success_df["min"] >= min_) & (success_df["max"] <= max_)
    temp_df = success_df.loc[cond]
    launches = temp_df["launches"].sum()
    succeeded = temp_df["success"].sum()
    failed = temp_df["failure"].sum()
    succeeded_pct = 0
    failed_pct = 0
    if launches > 0:
        succeeded_pct = round(succeeded / launches, 2)
        failed_pct = round(failed / launches, 2)

    print(
        f"{str(interval):14} | {launches:10d} | {succeeded_pct:10.2f} | {failed_pct:10.2f}"
    )

INTERVAL       | LAUNCHES   | Success %  | Failed %   |
-------------------------------------------------------
(0, 2000)      |         13 |       0.23 |       0.77
(2000, 4000)   |         21 |       0.62 |       0.38
(5000, 7000)   |          9 |       0.22 |       0.78
(7000, 9000)   |          0 |       0.00 |       0.00
(9000, 11000)  |          5 |       0.60 |       0.40


## 5. Booster with highest number of launches


In [None]:
# add new column to host booster versions
df["version"] = df["booster_version"].apply(lambda x: x.split()[1])

In [None]:
# intermediary lists
launches = []
succeeded = []
failed = []

# populate intermediary lists
for booster_version in unique_booster_versions:
    launches.append(df.loc[(df["version"] == booster_version)]["class"].count())
    succeeded.append(
        df.loc[(df["version"] == booster_version) & (df["class"] == 1)]["class"].count()
    )
    failed.append(
        df.loc[(df["version"] == booster_version) & (df["class"] == 0)]["class"].count()
    )

# create a new DataFrame to host booster versions, succeeded and failed launches
success_df = pd.DataFrame(
    data={
        "booster": unique_booster_versions,
        "launches": launches,
        "success": succeeded,
        "failure": failed,
    }
)
success_df["success_ratio"] = round(success_df["success"] / success_df["launches"], 4)
success_df["failure_ratio"] = round(success_df["failure"] / success_df["launches"], 4)
success_df

Unnamed: 0,booster,launches,success,failure,success_ratio,failure_ratio
0,v1.0,5,0,5,0.0,1.0
1,v1.1,15,1,14,0.0667,0.9333
2,FT,24,16,8,0.6667,0.3333
3,B4,11,6,5,0.5455,0.4545
4,B5,1,1,0,1.0,0.0
