In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load SpaceX data
df = pd.read_json('https://api.spacexdata.com/v4/launches/past')

# Step 1: Normalize nested JSON (extract relevant fields from nested dicts/lists)
df['core'] = df['cores'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else {})
df['landing_success'] = df['core'].apply(lambda x: x.get('landing_success', None))
df['landing_type'] = df['core'].apply(lambda x: x.get('landing_type', None))

# Step 2: Drop rows with missing landing success
df = df.dropna(subset=['landing_success'])

# Step 3: Convert date string to datetime
df['date'] = pd.to_datetime(df['date_utc'])

# Step 4: Select relevant features
features = ['date', 'rocket', 'payloads', 'launchpad', 'success', 'landing_type', 'landing_success']
df = df[features]

# Step 5: One-hot encode categorical features
df = pd.get_dummies(df, columns=['rocket', 'launchpad', 'landing_type'])

# Step 6: Scale numerical features (e.g., using payload mass if available)
# First, enrich data by merging with payloads endpoint if needed
# Here we’ll simulate with dummy numeric column
df['dummy_mass'] = df.index * 10  # Simulated numerical feature for demonstration

scaler = StandardScaler()
df[['dummy_mass']] = scaler.fit_transform(df[['dummy_mass']])

# Final clean DataFrame
df_processed = df.copy()


In [3]:
df.head(300)

Unnamed: 0,date,payloads,success,landing_success,rocket_5e9d0d95eda69973a809d1ec,rocket_5e9d0d95eda69974db09d1ed,launchpad_5e9e4501f509094ba4566f84,launchpad_5e9e4502f509092b78566f87,launchpad_5e9e4502f509094188566f88,landing_type_ASDS,landing_type_Ocean,landing_type_RTLS,dummy_mass
10,2013-09-29 16:00:00+00:00,[5eb0e4bbb6c3bb0006eeb1ee],1.0,False,True,False,False,True,False,False,True,False,-1.926990
13,2014-04-18 19:25:00+00:00,[5eb0e4bbb6c3bb0006eeb1f1],1.0,True,True,False,True,False,False,False,True,False,-1.866313
14,2014-07-14 15:15:00+00:00,[5eb0e4bcb6c3bb0006eeb1f2],1.0,True,True,False,True,False,False,False,True,False,-1.846088
17,2014-09-21 05:52:00+00:00,[5eb0e4bcb6c3bb0006eeb1f5],1.0,False,True,False,True,False,False,False,True,False,-1.785412
18,2015-01-10 09:47:00+00:00,[5eb0e4bdb6c3bb0006eeb1f6],1.0,False,True,False,True,False,False,True,False,False,-1.765186
...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,2022-09-05 02:09:00+00:00,"[631614e9ffc78f3b85670717, 631617fbffc78f3b856...",1.0,True,True,False,True,False,False,True,False,False,1.551782
183,2022-09-11 01:10:00+00:00,"[63161610ffc78f3b85670718, 63161872ffc78f3b856...",1.0,True,True,False,False,False,True,True,False,False,1.572008
184,2022-09-17 01:05:00+00:00,[63161699ffc78f3b85670719],1.0,True,True,False,True,False,False,True,False,False,1.592233
185,2022-09-24 23:30:00+00:00,[631616a7ffc78f3b8567071a],1.0,True,True,False,True,False,False,True,False,False,1.612458
