In [1]:
# import required libraries
import pandas as pd
import numpy as np
import requests

In [2]:
import datetime

In [3]:
spacex_url = "https://api.spacexdata.com/v4/launches/past"

In [4]:
response = requests.get(spacex_url)

In [5]:
print(response.status_code)

200


In [6]:
## print the raw that response.get() extracted from spacex API.
# print(response.text)
#  (or)
# print(response.json())

In [7]:
# assigning Response data to a variable which is in JSON.
data = response.json()

##### The data that is collected previously is dynamic but I want to use the static data for this project as a learner. I will use the static JSON data provided by COURSERA.

In [8]:
spacex_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json"

In [9]:
response = requests.get(spacex_url)

In [10]:
print(response.status_code)

200


In [11]:
data = response.json()

In [12]:
# Print the JSON data.
# print(data)

##### The JSON data contain nested structure. So, we use json_normalize method to reduce errors while using DataFrame.

In [13]:
df = pd.json_normalize(data)

In [14]:
# First 5 rows of the DataFrame.
# df.head()

In [15]:
# columns in the data frame.
# df.columns

In [16]:
# data types of each column
# df.dtypes

In [17]:
# shape of the dataset.
# df.shape

##### Filtering the dataset by keeping the required columns for the project.

In [18]:
df = df[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

In [19]:
# Removing rows with multiple cores(rockets with extra boosters).
df = df[df['cores'].map(len) == 1]
df = df[df['payloads'].map(len) == 1]

In [20]:
# Both payloads and cores have lists as data, extracting the single value from them.
df['cores'] = df['cores'].map(lambda x : x[0])
df['payloads'] = df['payloads'].map(lambda x : x[0])

In [21]:
# Date columns contains both date and time, we need to extract date.
df['date'] = pd.to_datetime(df['date_utc']).dt.date

In [22]:
# Filtering the launches happened before 2020-11-13.
df = df[df['date'] <= datetime.date(2020,11,13)]

In [23]:
# df.head()

#### The dataset contains the data in encrypted form, we need to convert them into human readable form. 

##### We need to create some helper functions to use API to extract information using these encrypted data.

In [24]:
# Variables that store the extracted data and helps to create a new dataframe.
# Global Variables.
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [25]:
''' HELPER FUNCTIONS'''

# From ROCKET column we can extract booster name.
def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
            BoosterVersion.append(response['name'])

# From LAUNCHPAD column we can extract coordinates and launchsite name.
def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

# From PAYLOAD column we extract payload mass and orbit name.
def getPayLoadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get("https://api.spacexdata.com/v4/payloads/"+str(load)).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

# From CORES column we can extract various data.
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

In [26]:
# Check whether the boosterversion list is empty or not.
BoosterVersion

[]

In [27]:
# Apply getBoosterVersion function method to dataframe.
getBoosterVersion(df)

In [28]:
# BoosterVersion

In [30]:
# Call getLaunchSite function.
getLaunchSite(df)

In [31]:
# Call getPayloadData function.
getPayLoadData(df)

In [32]:
# Call getCoreData function.
getCoreData(df)