# Setup

In [1]:
# Basic Imports
import requests
import datetime

# Data Wrangling
import pandas as pd
import numpy as np 


In [2]:
# Global variables 
static_json_url="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json"
booster_version = []
payload_mass = []
orbit = []
launch_site = []
outcome = []
flights = []
grid_fins = []
reused = []
legs = []
landing_pad = []
block = []
reused_count = []
serial = []
longitude = []
latitude = []

In [3]:
# Global Functions
def get_api_data(url : str) -> pd.DataFrame:
    response = requests.get(url)
    df = pd.json_normalize(response.json())
    df = (
        df
        .loc[df["cores"].str.len().eq(1) & df["payloads"].str.len().eq(1), :]
        .loc[:, ["rocket", "payloads", "launchpad", "cores", "flight_number", "date_utc"]]
        .assign(
            cores=df["cores"].str.get(0),
            payloads=df["payloads"].str.get(0),
            date=pd.to_datetime(df["date_utc"]).dt.date
        )
        .loc[lambda _df: _df["date"].le(datetime.date(2020, 11, 13)), :]
    )
    return df


def get_booster_version(data):
    for x in data["rocket"]:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/rockets/" + str(x)).json()
            booster_version.append(response["name"])


def get_launch_site(data):
    for x in data["launchpad"]:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/" + str(x)).json()
            longitude.append(response["longitude"])
            latitude.append(response["latitude"])
            launch_site.append(response["name"])


def get_payload_data(data):
    for load in data["payloads"]:
        if load:
            response = requests.get("https://api.spacexdata.com/v4/payloads/" + load).json()
            payload_mass.append(response["mass_kg"])
            orbit.append(response["orbit"])


def get_core_data(data):
    for core in data["cores"]:
        if core["core"] is not None:
            response = requests.get("https://api.spacexdata.com/v4/cores/" + core["core"]).json()
            block.append(response["block"])
            reused_count.append(response["reuse_count"])
            serial.append(response["serial"])
        else:
            block.append(None)
            reused_count.append(None)
            serial.append(None)
        outcome.append(str(core["landing_success"]) + " " + str(core["landing_type"]))
        flights.append(core["flight"])
        grid_fins.append(core["gridfins"])
        reused.append(core["reused"])
        legs.append(core["legs"])
        landing_pad.append(core["landpad"])


# Collecting Data

In [4]:
data = get_api_data(static_json_url)
get_booster_version(data)
get_launch_site(data)
get_payload_data(data)
get_core_data(data)

In [5]:
launch_dict = {
    "FlightNumber": list(data["flight_number"]),
    "Date": list(data["date"]),
    "BoosterVersion": booster_version,
    "PayloadMass": payload_mass,
    "Orbit": orbit,
    "LaunchSite": launch_site,
    "Outcome": outcome,
    "Flights": flights,
    "GridFins": grid_fins,
    "Reused": reused,
    "Legs": legs,
    "LandingPad": landing_pad,
    "Block": block,
    "ReusedCount": reused_count,
    "Serial": serial,
    "Longitude": longitude,
    "Latitude": latitude
}


In [6]:
data_falcon9 = (
    pd.DataFrame.from_dict(launch_dict)
    .loc[lambda _df: _df["BoosterVersion"].eq("Falcon 9"), :]
    .assign(
        PayloadMass=lambda _df: _df["PayloadMass"].fillna(_df["PayloadMass"].mean())
    )
)

In [7]:
display(data_falcon9)

Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
4,6,2010-06-04,Falcon 9,6123.547647,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
5,8,2012-05-22,Falcon 9,525.000000,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
6,10,2013-03-01,Falcon 9,677.000000,ISS,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857
7,11,2013-09-29,Falcon 9,500.000000,PO,VAFB SLC 4E,False Ocean,1,False,False,False,,1.0,0,B1003,-120.610829,34.632093
8,12,2013-12-03,Falcon 9,3170.000000,GTO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B1004,-80.577366,28.561857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,102,2020-09-03,Falcon 9,15600.000000,VLEO,KSC LC 39A,True ASDS,2,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,12,B1060,-80.603956,28.608058
90,103,2020-10-06,Falcon 9,15600.000000,VLEO,KSC LC 39A,True ASDS,3,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,13,B1058,-80.603956,28.608058
91,104,2020-10-18,Falcon 9,15600.000000,VLEO,KSC LC 39A,True ASDS,6,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,12,B1051,-80.603956,28.608058
92,105,2020-10-24,Falcon 9,15600.000000,VLEO,CCSFS SLC 40,True ASDS,3,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,12,B1060,-80.577366,28.561857


# Saving Data

In [8]:
data_falcon9.to_csv("../data/processed/dataset_part_1.csv", index=False)