In [None]:
!pip install requests
!pip install pandas
!pip install numpy

In [None]:
import requests
import pandas as pd
import numpy as np
import datetime

In [None]:
# Setting pandas options to display all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Define API URL
spacex_url = "https://api.spacexdata.com/v4/launches/past"

In [None]:
# Request data from API
response = requests.get(spacex_url)
response.status_code  # Check if request was successful

In [None]:
# Load static JSON file for consistent results
static_json_url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'
response = requests.get(static_json_url)
data = pd.json_normalize(response.json())

In [None]:
# Selecting relevant columns
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

In [30]:
# Removing rows with multiple cores or multiple payloads
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]
data['cores'] = data['cores'].map(lambda x: x[0])
data['payloads'] = data['payloads'].map(lambda x: x[0])
data['date'] = pd.to_datetime(data['date_utc']).dt.date
data = data[data['date'] <= datetime.date(2020, 11, 13)]

In [31]:
# Initialize global lists
BoosterVersion, PayloadMass, Orbit = [], [], []
LaunchSite, Outcome, Flights = [], [], []
GridFins, Reused, Legs, LandingPad = [], [], [], []
Block, ReusedCount, Serial = [], [], []
Longitude, Latitude = [], []

In [32]:
# Define functions to extract information from API
def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/rockets/{x}").json()
            BoosterVersion.append(response['name'])

In [33]:
def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{x}").json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

In [34]:
def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get(f"https://api.spacexdata.com/v4/payloads/{load}").json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

In [35]:
def getCoreData(data):
    for core in data['cores']:
        if core['core'] is not None:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response.get('block', None))
            ReusedCount.append(response.get('reuse_count', None))
            Serial.append(response.get('serial', None))
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(str(core['landing_success']) + ' ' + str(core['landing_type']))
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])

In [None]:
# Apply functions to fetch additional data
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)

In [None]:
# Create DataFrame from collected data
launch_dict = {
    'FlightNumber': list(data['flight_number']),
    'Date': list(data['date']),
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}


In [None]:
launch_df = pd.DataFrame(launch_dict)

In [None]:
# Filter only Falcon 9 launches
data_falcon9 = launch_df[launch_df['BoosterVersion'] != 'Falcon 1']

In [None]:
# Reset FlightNumber column
data_falcon9.loc[:, 'FlightNumber'] = list(range(1, data_falcon9.shape[0] + 1))

In [None]:
# Handle missing values in PayloadMass
data_falcon9['PayloadMass'].fillna(data_falcon9['PayloadMass'].mean(), inplace=True)

In [None]:
# Save to CSV
data_falcon9.to_csv('dataset_part_1.csv', index=False)

In [None]:
# Display first few rows
data_falcon9.head()

In [None]:
data_falcon9.head()