In [1]:
# import required libraries
import pandas as pd
import numpy as np
import requests

In [2]:
import datetime

In [None]:
spacex_url = "https://api.spacexdata.com/v4/launches/past"

In [None]:
response = requests.get(spacex_url)

In [None]:
print(response.status_code)

In [None]:
## print the raw that response.get() extracted from spacex API.
# print(response.text)
#  (or)
# print(response.json())

In [None]:
# assigning Response data to a variable which is in JSON.
data = response.json()

##### The data that is collected previously is dynamic but I want to use the static data for this project as a learner. I will use the static JSON data provided by COURSERA.

In [3]:
spacex_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json"

In [4]:
response = requests.get(spacex_url)

In [5]:
print(response.status_code)

200


In [6]:
data = response.json()

In [7]:
# Print the JSON data.
# print(data)

##### The JSON data contain nested structure. So, we use json_normalize method to reduce errors while using DataFrame.

In [8]:
df = pd.json_normalize(data)

In [9]:
# First 5 rows of the DataFrame.
# df.head()

In [10]:
# columns in the data frame.
# df.columns

In [11]:
# data types of each column
# df.dtypes

In [12]:
# shape of the dataset.
# df.shape

##### Filtering the dataset by keeping the required columns for the project.

In [13]:
df = df[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

In [14]:
# Removing rows with multiple cores(rockets with extra boosters).
df = df[df['cores'].map(len) == 1]
df = df[df['payloads'].map(len) == 1]

In [15]:
# Both payloads and cores have lists as data, extracting the single value from them.
df['cores'] = df['cores'].map(lambda x : x[0])
df['payloads'] = df['payloads'].map(lambda x : x[0])

In [16]:
# Date columns contains both date and time, we need to extract date.
df['date'] = pd.to_datetime(df['date_utc']).dt.date

In [17]:
# Filtering the launches happened before 2020-11-13.
df = df[df['date'] <= datetime.date(2020,11,13)]

In [18]:
# df.tail()
df.shape

(94, 7)

#### The dataset contains the data in encrypted form, we need to convert them into human readable form. 

##### We need to create some helper functions to use API to extract information using these encrypted data.

In [19]:
# Variables that store the extracted data and helps to create a new dataframe.
# Global Variables.
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [20]:
''' HELPER FUNCTIONS'''

# From ROCKET column we can extract booster name.
def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
            BoosterVersion.append(response['name'])

# From LAUNCHPAD column we can extract coordinates and launchsite name.
def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

# From PAYLOAD column we extract payload mass and orbit name.
def getPayLoadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get("https://api.spacexdata.com/v4/payloads/"+str(load)).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

# From CORES column we can extract various data.
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

In [21]:
# Check whether the boosterversion list is empty or not.
BoosterVersion

[]

In [22]:
df

Unnamed: 0,rocket,payloads,launchpad,cores,flight_number,date_utc,date
0,5e9d0d95eda69955f709d1eb,5eb0e4b5b6c3bb0006eeb1e1,5e9e4502f5090995de566f86,"{'core': '5e9e289df35918033d3b2623', 'flight':...",1,2006-03-24T22:30:00.000Z,2006-03-24
1,5e9d0d95eda69955f709d1eb,5eb0e4b6b6c3bb0006eeb1e2,5e9e4502f5090995de566f86,"{'core': '5e9e289ef35918416a3b2624', 'flight':...",2,2007-03-21T01:10:00.000Z,2007-03-21
3,5e9d0d95eda69955f709d1eb,5eb0e4b7b6c3bb0006eeb1e5,5e9e4502f5090995de566f86,"{'core': '5e9e289ef3591855dc3b2626', 'flight':...",4,2008-09-28T23:15:00.000Z,2008-09-28
4,5e9d0d95eda69955f709d1eb,5eb0e4b7b6c3bb0006eeb1e6,5e9e4502f5090995de566f86,"{'core': '5e9e289ef359184f103b2627', 'flight':...",5,2009-07-13T03:35:00.000Z,2009-07-13
5,5e9d0d95eda69973a809d1ec,5eb0e4b7b6c3bb0006eeb1e7,5e9e4501f509094ba4566f84,"{'core': '5e9e289ef359185f2b3b2628', 'flight':...",6,2010-06-04T18:45:00.000Z,2010-06-04
...,...,...,...,...,...,...,...
101,5e9d0d95eda69973a809d1ec,5ef6a4600059c33cee4a829e,5e9e4502f509094188566f88,"{'core': '5ef670f10059c33cee4a826c', 'flight':...",102,2020-09-03T12:46:00.000Z,2020-09-03
102,5e9d0d95eda69973a809d1ec,5ef6a48e0059c33cee4a829f,5e9e4502f509094188566f88,"{'core': '5e9e28a7f3591817f23b2663', 'flight':...",103,2020-10-06T11:29:00.000Z,2020-10-06
103,5e9d0d95eda69973a809d1ec,5ef6a4d50059c33cee4a82a1,5e9e4502f509094188566f88,"{'core': '5e9e28a6f35918c0803b265c', 'flight':...",104,2020-10-18T12:25:00.000Z,2020-10-18
104,5e9d0d95eda69973a809d1ec,5ef6a4ea0059c33cee4a82a2,5e9e4501f509094ba4566f84,"{'core': '5ef670f10059c33cee4a826c', 'flight':...",105,2020-10-24T15:31:00.000Z,2020-10-24


In [23]:
# Apply getBoosterVersion function method to dataframe.
getBoosterVersion(df)

In [25]:
# len(BoosterVersion)

In [26]:
# Call getLaunchSite function.
getLaunchSite(df)

In [31]:
# len(Latitude)

In [28]:
# Call getPayloadData function.
getPayLoadData(df)

In [30]:
# len(PayloadMass)

In [32]:
# Call getCoreData function.
getCoreData(df)

#### Create a New DATA FRAME with the help of extracted data.

In [34]:
# dictionary to store the information of all variables.
launch_dict = {
    'FlightNumber':list(df['flight_number']),
    'Date': list(df['date']),
    'BoosterVersion':BoosterVersion,
    'PayloadMass':PayloadMass,
    'Orbit':Orbit,
    'LaunchSite':LaunchSite,
    'Outcome':Outcome,
    'Flights':Flights,
    'GridFins':GridFins,
    'Reused':Reused,
    'Legs':Legs,
    'LandingPad':LandingPad,
    'Block':Block,
    'ReusedCount':ReusedCount,
    'Serial':Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}

In [36]:
# Check the length of each column.
ls = list(launch_dict.keys())
for i in range(len(launch_dict)):
    print(ls[i],len(launch_dict[ls[i]]))

FlightNumber 94
Date 94
BoosterVersion 94
PayloadMass 94
Orbit 94
LaunchSite 94
Outcome 94
Flights 94
GridFins 94
Reused 94
Legs 94
LandingPad 94
Block 94
ReusedCount 94
Serial 94
Longitude 94
Latitude 94


In [37]:
# Create a new DataFrame from launch_dict dictionary.
launch_df = pd.DataFrame(launch_dict)

In [38]:
# First 5 rows.
launch_df.head()

Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
0,1,2006-03-24,Falcon 1,20.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin1A,167.743129,9.047721
1,2,2007-03-21,Falcon 1,,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin2A,167.743129,9.047721
2,4,2008-09-28,Falcon 1,165.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin2C,167.743129,9.047721
3,5,2009-07-13,Falcon 1,200.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin3C,167.743129,9.047721
4,6,2010-06-04,Falcon 9,,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
