In [None]:
# Importing Libraries

## "requests" allows us to make HTTP requests which we will use to get data from an API
import requests
# "pandas" is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
# "numpy" is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# "datetime" is a library that allows us to represent dates
import datetime

# Setting this option will print all collumns of a dataframe
pd.set_option('display.max_columns', None)
# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)

In [None]:
# Defining Helper functions that will help us use the API to Extract Information using identification numbers in the launch data

## From the "rocket" column, we want to learn the Booster's name

# Takes the dataset and uses the "rocket" column to call the API and append the data to the list
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])

## From the "launchpad" column, we want to know the Name of the Launch Site being used, its Latitude, and its Longitude

# Takes the dataset and uses the "launchpad" column to call the API and append the data to the list
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])
        
## From the "payload" column, we want to learn the mass of the payload and the orbit that it is going to

# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
       if load:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])
        
## From the "cores" column, we want to learn:
### - The outcome of the landing
### - The type of the landing 
### - Number of flights with that core
### - Whether gridfins were used
### - Whether the core is reused
### - Wheter legs were used
### - The landing pad used
### - The block of the core which is a number used to seperate version of cores
### - The number of times this specific core has been reused
### - The serial of the core.

# Takes the dataset and uses the "cores" column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

In [None]:
# Now, we will Start Requesting Rocket Launch Data from SpaceX API with the following URL:

spacex_url="https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)

## You may Convert it into a Dataframe: "r = pd.DataFrame(response)"

# Print the Content of the SpaceX Data Requested
print(response.content)

In [None]:
# Request and Parse - to make the Requested JSON Results more Consistent - SpaceX Launch Data Using the GET Request

static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

response.status_code ## "200" is Status Response Code: Request was Successful

# Using "json_normalize" meethod to convert the JSON result into a Dataframe

r2 = requests.get(static_json_url)
r = r2.json()

data = pd.json_normalize(r) ## Converting JSON to a DataFrame

In [None]:
# We will now use the API again to get Information about the Launches using the IDs given for each launch. Specifically, we will be using columns "rocket", "payloads", "launchpad", and "cores"

# Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# We will remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows that have multiple payloads in a single rocket.
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]

# Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

# We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Using the date we will restrict the dates of the launches
data = data[data['date'] <= datetime.date(2020, 11, 13)]

In [None]:
# The data from these Requests will be Stored in Lists and will be used to Create a new Dataframe

## Global Variables
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

## Applying Respective Function Methods to get Data into the Lists

getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)

In [None]:
# Constructing the Dataset Using the Data Obtained - Combining the Columns into a Dictionary

launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}

# Creating a Pandas DataFrame from the Dictionary
ld = pd.DataFrame(launch_dict)

# Then, Filter the DataFrame to Only Include "Falcon 9" Launches
falc9 = ld[ld["BoosterVersion"]=="Falcon 9"]
falc9.loc[:,'FlightNumber'] = list(range(1, falc9.shape[0]+1)) ## Reset the "FlightNumber" Column After Removal of Some Values

# Check for Missing Values "falc9.isnull().sum()" and then Fill in the Missing Values in the "PayloadMass" with the Mean Values of that Column
falc9.isnull().sum()
falc9.replace(np.NaN, falc9["PayloadMass"].mean())

# Export the Resultant File to a CSV
falc9.isnull().sum()