# Gather & process Falcon 9 data

First, import necessary dependencies.

In [95]:
import requests
import pandas as pd
import numpy as np

# Setting this option will print all columns of a dataframe
pd.set_option('display.max_columns', None)
# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)
pd.options.display.max_rows = 17

## Define Functions

These functions will be used to make calls to different endpoints for the spacexdata API, allowing us to gather necessary information.

In [96]:
# Takes the dataset and uses the rocket column to call the API and append the data to the list
def getBoosterVersion(data):
    for i in data['rocket']:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(i)).json()
        BoosterVersion.append(response['name'])

# Takes the dataset and uses the launchpad column to call the API and append the data to the list
def getLaunchSite(data):
    for i in data['launchpad']:
        response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(i)).json()
        Longitude.append(response['longitude'])
        Latitude.append(response['latitude'])
        LaunchSite.append(response['name'])

# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])

# Takes the dataset and uses the cores column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])


### Make request to SpaceX Data API

In [100]:
spacex_url = "https://api.spacexdata.com/v4/launches/past"


response = requests.get(spacex_url)

Normalize the response into a format pandas can work with

In [102]:
data = pd.json_normalize(response.json())

Display tail of response

In [103]:
data.tail()

Unnamed: 0,static_fire_date_utc,static_fire_date_unix,net,window,rocket,success,failures,details,crew,ships,capsules,payloads,launchpad,flight_number,name,date_utc,date_unix,date_local,date_precision,upcoming,cores,auto_update,tbd,launch_library_id,id,fairings.reused,fairings.recovery_attempt,fairings.recovered,fairings.ships,links.patch.small,links.patch.large,links.reddit.campaign,links.reddit.launch,links.reddit.media,links.reddit.recovery,links.flickr.small,links.flickr.original,links.presskit,links.webcast,links.youtube_id,links.article,links.wikipedia,fairings
151,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],[],[61fc0379e0dc5662b76489b6],5e9e4502f509094188566f88,152,Starlink 4-9 (v1.5),2022-03-03T14:35:00.000Z,1646318100,2022-03-03T09:35:00-05:00,hour,False,"[{'core': '5ef670f10059c33cee4a826c', 'flight': 11, 'gridfins': True, 'legs': True, 'reused': True, 'landing_attempt': True, 'landing_success': True, 'landing_type': 'ASDS', 'landpad': '5e9e3033383ecbb9e534e7cc'}]",True,False,861795c5-e694-4d3e-b22f-a356a31cd5d8,61fc0224e0dc5662b76489ab,,,,[],https://imgur.com/BrW201S.png,https://imgur.com/573IfGk.png,https://www.reddit.com/r/spacex/comments/jhu37i/starlink_general_discussion_and_deployment_thread/,https://www.reddit.com/r/spacex/comments/t5lzm9/rspacex_starlink_49_launch_discussion_and_updates/,,https://www.reddit.com/r/spacex/comments/k2ts1q/rspacex_fleet_updates_discussion_thread/,[],"[https://live.staticflickr.com/65535/51924631989_4e0b26f306_o.jpg, https://live.staticflickr.com/65535/51924934610_296c72bf67_o.jpg, https://live.staticflickr.com/65535/51924933910_9627ae096e_o.jpg]",,https://youtu.be/ypb2sDdUkRo,ypb2sDdUkRo,https://spaceflightnow.com/2022/03/03/after-another-starlink-mission-spacex-on-pace-for-one-launch-per-week-this-year/,https://en.wikipedia.org/wiki/Starlink,
152,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],[],[61fc0382e0dc5662b76489b7],5e9e4501f509094ba4566f84,153,Starlink 4-10 (v1.5),2022-03-09T13:45:00.000Z,1646833500,2022-03-09T08:45:00-05:00,hour,False,"[{'core': '5e9e28a6f359183c413b265d', 'flight': 4, 'gridfins': True, 'legs': True, 'reused': True, 'landing_attempt': True, 'landing_success': True, 'landing_type': 'ASDS', 'landpad': '5e9e3033383ecb075134e7cd'}]",True,False,d8c7fbe0-6a32-42dc-8c24-f1c632adc8b5,61fc0243e0dc5662b76489ae,,,,[],https://imgur.com/BrW201S.png,https://imgur.com/573IfGk.png,https://www.reddit.com/r/spacex/comments/jhu37i/starlink_general_discussion_and_deployment_thread/,https://www.reddit.com/r/spacex/comments/t9la7r/rspacex_starlink_410_launch_discussion_and/,,https://www.reddit.com/r/spacex/comments/k2ts1q/rspacex_fleet_updates_discussion_thread/,[],"[https://live.staticflickr.com/65535/51928220502_1a44139be7_o.jpg, https://live.staticflickr.com/65535/51929288928_46decee5db_o.jpg, https://live.staticflickr.com/65535/51929537589_f03fb8c20a_o.jpg]",,https://youtu.be/uqAppamdGyo,uqAppamdGyo,https://spaceflightnow.com/2022/03/09/spacex-broomstick-launches-40th-starlink-mission/,https://en.wikipedia.org/wiki/Starlink,
153,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],[],[623491e5f051102e1fcedac9],5e9e4501f509094ba4566f84,154,Starlink 4-12 (v1.5),2022-03-19T03:24:00.000Z,1647660240,2022-03-18T23:24:00-04:00,hour,False,"[{'core': '5e9e28a6f35918c0803b265c', 'flight': 12, 'gridfins': True, 'legs': True, 'reused': True, 'landing_attempt': True, 'landing_success': True, 'landing_type': 'ASDS', 'landpad': '5e9e3033383ecbb9e534e7cc'}]",True,False,72188aca-810d-40b9-887d-43040614dd2c,6234908cf051102e1fcedac4,,,,[],https://imgur.com/BrW201S.png,https://imgur.com/573IfGk.png,https://www.reddit.com/r/spacex/comments/jhu37i/starlink_general_discussion_and_deployment_thread/,,,https://www.reddit.com/r/spacex/comments/k2ts1q/rspacex_fleet_updates_discussion_thread/,[],"[https://live.staticflickr.com/65535/51947052831_3b1599cd70_o.jpg, https://live.staticflickr.com/65535/51946071252_b51d6839e9_o.jpg]",,https://youtu.be/0giA6VZOICs,0giA6VZOICs,https://spaceflightnow.com/2022/03/19/spacex-stretches-rocket-reuse-record-with-another-starlink-launch/,https://en.wikipedia.org/wiki/Starlink,
154,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],[],[6243af62af52800c6e919260],5e9e4501f509094ba4566f84,155,Transporter-4,2022-04-01T16:24:00.000Z,1648830240,2022-04-01T12:24:00-04:00,hour,False,"[{'core': '5f57c53d0622a6330279009f', 'flight': 7, 'gridfins': True, 'legs': True, 'reused': True, 'landing_attempt': True, 'landing_success': True, 'landing_type': 'ASDS', 'landpad': '5e9e3033383ecbb9e534e7cc'}]",True,False,335acce9-a35c-436c-9a22-a2505f20957f,6243ad8baf52800c6e919252,,,,[],https://imgur.com/IJWn9pK.png,https://imgur.com/u49XVx4.png,,https://www.reddit.com/r/spacex/comments/tt5n43/rspacex_transporter4_launch_discussion_and/,,,[],"[https://live.staticflickr.com/65535/51981688502_0584ac5658_o.jpg, https://live.staticflickr.com/65535/51982975529_3e1610767a_o.jpg]",,https://youtu.be/4NqSoHnkKEM,4NqSoHnkKEM,https://spaceflightnow.com/2022/04/01/forty-payloads-ride-into-orbit-on-spacex-falcon-9-rocket/,,
155,2022-04-06T19:13:00.000Z,1649272000.0,False,,5e9d0d95eda69973a809d1ec,True,[],"Axiom Mission 1 (or Ax-1) is a planned SpaceX Crew Dragon mission to the International Space Station (ISS), operated by SpaceX on behalf of Axiom Space. The flight will launch no earlier than 31 March 2022 and send four people to the ISS for an eight-day stay","[61eefc9c9eb1064137a1bd77, 61eefcf89eb1064137a1bd79, 61eefd5b9eb1064137a1bd7a, 61eefdbf9eb1064137a1bd7b]",[5ea6ed2e080df4000697c909],[5e9e2c5df359188aba3b2676],[61eefb129eb1064137a1bd74],5e9e4502f509094188566f88,156,Ax-1,2022-04-08T15:17:00.000Z,1649431020,2022-04-08T11:17:00-04:00,hour,False,"[{'core': '5f57c5440622a633027900a0', 'flight': 5, 'gridfins': True, 'legs': True, 'reused': True, 'landing_attempt': True, 'landing_success': True, 'landing_type': 'ASDS', 'landpad': '5e9e3033383ecb075134e7cd'}]",True,False,a3eeb03b-a209-4255-91b5-772dc0d2150e,61eefaa89eb1064137a1bd73,,,,,https://i.imgur.com/lOSw7Q1.png,https://i.imgur.com/QV9W8OJ.png,https://www.reddit.com/r/spacex/comments/t3ez79/axiom1_launch_campaign_thread/,https://www.reddit.com/r/spacex/comments/tyd866/rspacex_axiom1_launch_discussion_and_updates/,,,[],[],,https://youtu.be/5nLk_Vqp7nw,5nLk_Vqp7nw,,https://en.wikipedia.org/wiki/Axiom_Mission_1,


Take a subset of the dataframe to remove unnecessary features and format values as needed

In [104]:
# Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows that have multiple payloads in a single rocket.
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]

# Payloads and cores are lists of size 1, so extract the single value in the list and replace the feature.
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

# Convert the date_utc to a datetime datatype and then extracting the date leaving the time
data['date'] = pd.to_datetime(data['date_utc']).dt.date

In [105]:
#Global variables 
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

### Call functions to alternate API endpoints to obtain additional data

In [107]:
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)

Create a dictionary to hold the further data obtained from the other API endpoints.

In [112]:
launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}

Create a pandas dataframe from the dictionary

In [113]:
# Create a dataframe from launch_dict
df = pd.DataFrame(launch_dict)

In [114]:
df.tail()

Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
138,152,2022-03-03,Falcon 9,13260.0,VLEO,KSC LC 39A,True ASDS,11,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,10,B1060,-80.603956,28.608058
139,153,2022-03-09,Falcon 9,13260.0,VLEO,CCSFS SLC 40,True ASDS,4,True,True,True,5e9e3033383ecb075134e7cd,5.0,3,B1052,-80.577366,28.561857
140,154,2022-03-19,Falcon 9,13260.0,VLEO,CCSFS SLC 40,True ASDS,12,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,11,B1051,-80.577366,28.561857
141,155,2022-04-01,Falcon 9,,SSO,CCSFS SLC 40,True ASDS,7,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,6,B1061,-80.577366,28.561857
142,156,2022-04-08,Falcon 9,,ISS,KSC LC 39A,True ASDS,5,True,True,True,5e9e3033383ecb075134e7cd,5.0,4,B1062,-80.603956,28.608058


Create a new dataframe called data_falcon9 which only contains data of Falcon 9 boosters.

In [115]:
# Filter to create a dataframe with only Falcon9 boosters
data_falcon9 = df.loc[(df.BoosterVersion == 'Falcon 9')].copy()

Now that values have been removed for all non-Falcon 9 launches, reset the flight numbers.

In [116]:
data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))
data_falcon9

Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
4,1,2010-06-04,Falcon 9,,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
5,2,2012-05-22,Falcon 9,525.0,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
6,3,2013-03-01,Falcon 9,677.0,ISS,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857
7,4,2013-09-29,Falcon 9,500.0,PO,VAFB SLC 4E,False Ocean,1,False,False,False,,1.0,0,B1003,-120.610829,34.632093
8,5,2013-12-03,Falcon 9,3170.0,GTO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B1004,-80.577366,28.561857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,135,2022-03-03,Falcon 9,13260.0,VLEO,KSC LC 39A,True ASDS,11,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,10,B1060,-80.603956,28.608058
139,136,2022-03-09,Falcon 9,13260.0,VLEO,CCSFS SLC 40,True ASDS,4,True,True,True,5e9e3033383ecb075134e7cd,5.0,3,B1052,-80.577366,28.561857
140,137,2022-03-19,Falcon 9,13260.0,VLEO,CCSFS SLC 40,True ASDS,12,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,11,B1051,-80.577366,28.561857
141,138,2022-04-01,Falcon 9,,SSO,CCSFS SLC 40,True ASDS,7,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,6,B1061,-80.577366,28.561857


### Data Validation

Now that we have data from the API, let's check it for null values and missing entries.

In [117]:
#Begin checking data for null values
data_falcon9.isnull().sum()

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass       17
Orbit              1
                  ..
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
Length: 17, dtype: int64

We can see that there are 17 null PayloadMass entries and 1 null Orbit type. 

We also have 17 null LandingPad entries, but this is expected as those denote launches where landing pads were not used.

Let's replace missing PayloadMass values with the mean of the PayloadMass column.

In [118]:
# Calculate the mean value of PayloadMass column
pay_mean = data_falcon9['PayloadMass'].mean()
print("Pay Mean is: ", pay_mean)
# Replace the np.nan values with its mean value
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan, pay_mean)
data_falcon9.isnull().sum()

Pay Mean is:  7647.061885245901


FlightNumber      0
Date              0
BoosterVersion    0
PayloadMass       0
Orbit             1
                 ..
Block             0
ReusedCount       0
Serial            0
Longitude         0
Latitude          0
Length: 17, dtype: int64

For missing Orbit types, its more appropriate to drop that entry from the table

In [119]:
#Drop rows where orbit type is null
data_falcon9 = data_falcon9.dropna(subset=['Orbit'])

Checking the data for null values once more

In [120]:
data_falcon9.isnull().sum()

FlightNumber      0
Date              0
BoosterVersion    0
PayloadMass       0
Orbit             0
                 ..
Block             0
ReusedCount       0
Serial            0
Longitude         0
Latitude          0
Length: 17, dtype: int64

### Analyzing and Formatting Launch Outcomes

Now we want to determine if a launch was successful. For our purposes, this means a boolean value - success or failure.

In [123]:
landing_outcomes = data_falcon9['Outcome'].value_counts()
landing_outcomes

True ASDS      83
None None      19
True RTLS      19
False ASDS      7
True Ocean      5
False Ocean     2
None ASDS       2
False RTLS      1
Name: Outcome, dtype: int64

As you can see, there are several different options for the landing outcome, some denoting types of successes and others failures. 

- **True ASDS**:   Successful landing on drone ship
- **None None**:   Failure to land
- **True RTLS**:   Successful landing on ground pad
- **False ASDS**:   Unsuccessful landing on drone ship
- **True Ocean**:   Successful landing to specific region of the ocean
- **False Ocean**:   Unsuccessful landing to specific region of the ocean
- **None ASDS**:   Failure to land (Drone ship)
- **False RTLS**:   Unsuccessful landing on ground pad


Again, we want to simplify these into a boolean-like success or failure attribute

Let's enumerate the landing_outcomes so we can view the keys and use this information to create a set of bad outcomes

In [124]:
for i, outcome in enumerate(landing_outcomes.keys()):
    print(i, outcome)

0 True ASDS
1 None None
2 True RTLS
3 False ASDS
4 True Ocean
5 False Ocean
6 None ASDS
7 False RTLS


Create a set for outcomes where landing was unsuccessful

In [125]:
bad_outcomes = set(landing_outcomes.keys()[[1, 3, 5, 6, 7]])
bad_outcomes

{'False ASDS', 'False Ocean', 'False RTLS', 'None ASDS', 'None None'}

Now we can create a landing_class dictionary which will show whether a landing was successful or not. '0' will denote failure and '1' will denote success

In [126]:
#landing_class = 0 if bad_outcome
#landing_class = 1 if otherwise
landing_class = []
for key, value in data_falcon9['Outcome'].items():
    if value not in bad_outcomes:
        landing_class.append(1)
    else:
        landing_class.append(0)

Fill Class column in data_falcon9 with landing_class data

In [None]:
data_falcon9['Class'] = landing_class

### Export data

Export data_falcon9 to .csv for future use within analysis and feature engineering notebook

In [128]:
#As of here, data_falcon9 is formatted
data_falcon9.to_csv("falcon9_data.csv", index = False)