In [None]:
import pandas as pd
import requests as rq
import numpy as np
import datetime

In [None]:
pd.set_option("display.max_columns", None) #Print all columns
pd.set_option("display.max_colwidth", None) #full width for all column display

In [None]:
rockets_url = "https://api.spacexdata.com/v4/rockets/"
launchpad_url = "https://api.spacexdata.com/v4/launchpads/"
payloads_url = "https://api.spacexdata.com/v4/payloads/"
core_url = "https://api.spacexdata.com/v4/cores/"
main_url = "https://api.spacexdata.com/v4/launches/past"
static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

In [None]:
#Function to retrieve booster name from rocket column
def getBoosterVersion(data):
    for rock in data['rocket']:
        if rock:
            response = rq.get(rockets_url+str(rock)).json()
            BoosterVersion.append(response['name'])

In [None]:
#Function to retrieve name of launch site using launchpad
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = rq.get(launchpad_url+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])

In [None]:
#Function to retrieve mass of payload from payload api
def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = rq.get(payloads_url+load).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

In [None]:
#Function to retrieve landing outcome from core dataset
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = rq.get(core_url+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

In [None]:
#Get main spacex data
response = rq.get(main_url)

In [None]:
#Retrieve json from response and create a data frame
norml = pd.json_normalize(response.json())
norml.head()

In [None]:
#take a subset of data frame
data = norml[['rocket','payloads','launchpad','cores','flight_number','date_utc']]

In [None]:
#Removing rows where cores or payloads are more than 1
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

In [None]:
#convert the date_utc to a datetime datatype and then extracting the date leaving the time
data['date'] = pd.to_datetime(data['date_utc']).dt.date

In [None]:
#restrict the dates of the launches to before nov 2020
data = data[data['date'] <= datetime.date(2020, 11, 13)]

In [None]:
data.head()

In [None]:
#Creating empty vectors to store data retrieve from different API endpoints using core API information 
#Global variables 
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [None]:
# Call getBoosterVersion
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)

In [None]:
launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}

In [None]:
launchdf = pd.DataFrame(launch_dict)

In [None]:
df = launchdf[launchdf['BoosterVersion'] == 'Falcon 9']

In [None]:
df.shape

In [None]:
df.loc[:,'FlightNumber'] = list(range(1, df.shape[0]+1))

In [None]:
df.isnull().sum()

In [None]:
df['PayloadMass'].fillna(df['PayloadMass'].mean(), inplace=True)

In [None]:
df.to_csv('dataset_part_1.csv', index=False)