# SpaceX Falcon 9 First Stage Landing Prediction

## Lab 1: Collecting the Data

In this capstone, we aim to predict if the Falcon 9 first stage will land successfully. SpaceX advertises Falcon 9 rocket launches at 62 million dollars, significantly less than the competition due to its reusability. Predicting first stage landing success helps understand launch costs and offers competitive analysis for other aerospace companies.


### Step 1: Importing Required Libraries

In [None]:
import requests
import pandas as pd
import numpy as np
import datetime

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


### Step 2: Fetch Launch Data from SpaceX API

In [None]:
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/rockets/" + str(x)).json()
            BoosterVersion.append(response['name'])

def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/" + str(x)).json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get("https://api.spacexdata.com/v4/payloads/" + load).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

def getCoreData(data):
    for core in data['cores']:
        if core['core'] is not None:
            response = requests.get("https://api.spacexdata.com/v4/cores/" + core['core']).json()
            Block.append(response['block'])
            ReusedCount.append(response['reuse_count'])
            Serial.append(response['serial'])
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)

        Outcome.append(str(core['landing_success']) + ' ' + str(core['landing_type']))
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])


### Step 3: Load Static JSON Data from URL

In [None]:
import pandas as pd
import requests  # ✅ Add this line to fix the error

# Load data from static JSON URL
static_json_url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'
response = requests.get(static_json_url)

# Normalize nested JSON structure
data = pd.json_normalize(response.json())

# Select only required columns
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Display the head of the DataFrame
data.head()

### Step 4: Cleaning and Preparing Dataset

In [None]:
import datetime  # ✅ Required for datetime.date()

# Filter data where there is only one core and one payload
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]

# Flatten the nested lists
data['cores'] = data['cores'].map(lambda x: x[0])
data['payloads'] = data['payloads'].map(lambda x: x[0])

# Convert date_utc to Python date
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Filter by date
data = data[data['date'] <= datetime.date(2020, 11, 13)]

# Optional: Preview result
data.head()

### Step 5: Calling Helper Functions to Fetch Detailed Info

In [None]:
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)


### Step 6: Construct the Final Launch Data Dictionary

In [None]:
launch_dict = {
    'FlightNumber': list(data['flight_number']),
    'Date': list(data['date']),
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}
data = pd.DataFrame(launch_dict)


### Step 7: Filter for Falcon 9 Launches

In [None]:
data_falcon9 = data[data['BoosterVersion'] != 'Falcon 1'].copy()
data_falcon9.loc[:, 'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))


### Step 8: Handle Missing Values

In [None]:
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].fillna(payload_mean)


### Step 9: Save to CSV

In [None]:
data_falcon9.to_csv('dataset_part_1.csv', index=False)

**Dataset preparation complete for Part 1: Collecting and Wrangling the Data.**