In [1]:
import requests
import pandas as pd

# Endpoint
url = "https://api.spacexdata.com/v4/launches"

# GET request
response = requests.get(url)
data = response.json()

# Normalizar JSON a DataFrame
df = pd.json_normalize(data)
print(df.shape)
df.head(2)


(205, 43)


Unnamed: 0,static_fire_date_utc,static_fire_date_unix,net,window,rocket,success,failures,details,crew,ships,...,links.reddit.media,links.reddit.recovery,links.flickr.small,links.flickr.original,links.presskit,links.webcast,links.youtube_id,links.article,links.wikipedia,fairings
0,2006-03-17T00:00:00.000Z,1142554000.0,False,0.0,5e9d0d95eda69955f709d1eb,False,"[{'time': 33, 'altitude': None, 'reason': 'mer...",Engine failure at 33 seconds and loss of vehicle,[],[],...,,,[],[],,https://www.youtube.com/watch?v=0a_00nJ_Y88,0a_00nJ_Y88,https://www.space.com/2196-spacex-inaugural-fa...,https://en.wikipedia.org/wiki/DemoSat,
1,,,False,0.0,5e9d0d95eda69955f709d1eb,False,"[{'time': 301, 'altitude': 289, 'reason': 'har...",Successful first stage burn and transition to ...,[],[],...,,,[],[],,https://www.youtube.com/watch?v=Lk4zQ2wP-Nc,Lk4zQ2wP-Nc,https://www.space.com/3590-spacex-falcon-1-roc...,https://en.wikipedia.org/wiki/DemoSat,


In [2]:
# Obtener catálogo de rockets
rockets = requests.get("https://api.spacexdata.com/v4/rockets").json()
rocket_map = {r['id']: r['name'] for r in rockets}

# Mapear al DataFrame
df['rocket_name'] = df['rocket'].map(rocket_map)

# Filtrar Falcon 9
df = df[df['rocket_name'] == 'Falcon 9'].reset_index(drop=True)
print("Lanzamientos Falcon 9:", df.shape[0])


Lanzamientos Falcon 9: 195


In [3]:
# Selección de columnas clave
cols = ['flight_number','date_utc','rocket_name','rocket',
        'payloads','launchpad','cores','success']
df = df[cols]
df['date_utc'] = pd.to_datetime(df['date_utc'])


In [12]:
# Expandir payloads
df_exp = df.explode('payloads').reset_index(drop=True)

# Obtener info de payloads
payloads = requests.get("https://api.spacexdata.com/v4/payloads").json()
payload_map = {p['id']: p for p in payloads}

df_exp['payload_mass_kg'] = df_exp['payloads'].map(
    lambda x: payload_map[x].get('mass_kg') if x in payload_map else None
)

# Expandir la columna 'cores' para manejar las listas de diccionarios
df_exp = df_exp.explode('cores').reset_index(drop=True)

# Convertir los diccionarios de la columna 'cores' en tuplas
df_exp['cores'] = df_exp['cores'].apply(lambda x: tuple(sorted(x.items())) if isinstance(x, dict) else x)

# Agrupar los datos
df_grouped = df_exp.groupby(['flight_number','date_utc','rocket_name','rocket',
                             'launchpad','cores','success'], as_index=False).agg({
    'payload_mass_kg':'sum'
})

print(df_grouped.head())

   flight_number                  date_utc rocket_name  \
0              6 2010-06-04 18:45:00+00:00    Falcon 9   
1              7 2010-12-08 15:43:00+00:00    Falcon 9   
2              8 2012-05-22 07:44:00+00:00    Falcon 9   
3              9 2012-10-08 00:35:00+00:00    Falcon 9   
4             10 2013-03-01 19:10:00+00:00    Falcon 9   

                     rocket                 launchpad  \
0  5e9d0d95eda69973a809d1ec  5e9e4501f509094ba4566f84   
1  5e9d0d95eda69973a809d1ec  5e9e4501f509094ba4566f84   
2  5e9d0d95eda69973a809d1ec  5e9e4501f509094ba4566f84   
3  5e9d0d95eda69973a809d1ec  5e9e4501f509094ba4566f84   
4  5e9d0d95eda69973a809d1ec  5e9e4501f509094ba4566f84   

                                               cores success  payload_mass_kg  
0  ((core, 5e9e289ef359185f2b3b2628), (flight, 1)...    True              0.0  
1  ((core, 5e9e289ef35918187c3b2629), (flight, 1)...    True              0.0  
2  ((core, 5e9e289ef35918f39c3b262a), (flight, 1)...    True        

In [13]:
# Explode cores
df_grouped = df_grouped.explode('cores').reset_index(drop=True)

# Cargar info de cores
cores = requests.get("https://api.spacexdata.com/v4/cores").json()
core_map = {c['id']: c for c in cores}

def extract_landing_success(core):
    if isinstance(core, dict):
        return core.get('landing_success')
    return None

df_grouped['landing_success'] = df_grouped['cores'].apply(extract_landing_success)


In [14]:
# Reemplazar NaN payloads por media
mean_payload = df_grouped['payload_mass_kg'].mean()
df_grouped['payload_mass_kg'] = df_grouped['payload_mass_kg'].fillna(mean_payload)

# Crear target binario: 1 éxito, 0 fallo
df_grouped['landing_success'] = df_grouped['landing_success'].apply(
    lambda x: 1 if x is True else 0 if x is False else None
)

# Guardar
df_grouped.to_csv("spacex_launches_clean_v2.csv", index=False)
print("Archivo guardado: spacex_launches_clean_v2.csv")


Archivo guardado: spacex_launches_clean_v2.csv
