<a href="https://colab.research.google.com/github/dayanidhi116/DATA-SCIENCE-WITH-CAPSTONE/blob/main/spacex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import requests
from datetime import datetime

# API endpoint
spacex_url = "https://api.spacexdata.com/v4/launches/past"

# Fetch data
response = requests.get(spacex_url)

# Check if request was successful
if response.status_code == 200:
    launches = response.json()

    # Print structured data
    for launch in launches:
        name = launch.get('name', 'N/A')
        date_utc = launch.get('date_utc', 'N/A')
        date_local = datetime.strptime(date_utc, "%Y-%m-%dT%H:%M:%S.%fZ")
        success = launch.get('success', 'N/A')

        print(f"Name: {name}")
        print(f"Date (UTC): {date_local.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"Success: {success}")
        print("-" * 40)

else:
    print("Failed to retrieve data. Status code:", response.status_code)


Name: FalconSat
Date (UTC): 2006-03-24 22:30:00
Success: False
----------------------------------------
Name: DemoSat
Date (UTC): 2007-03-21 01:10:00
Success: False
----------------------------------------
Name: Trailblazer
Date (UTC): 2008-08-03 03:34:00
Success: False
----------------------------------------
Name: RatSat
Date (UTC): 2008-09-28 23:15:00
Success: True
----------------------------------------
Name: RazakSat
Date (UTC): 2009-07-13 03:35:00
Success: True
----------------------------------------
Name: Falcon 9 Test Flight
Date (UTC): 2010-06-04 18:45:00
Success: True
----------------------------------------
Name: COTS 1
Date (UTC): 2010-12-08 15:43:00
Success: True
----------------------------------------
Name: COTS 2
Date (UTC): 2012-05-22 07:44:00
Success: True
----------------------------------------
Name: CRS-1
Date (UTC): 2012-10-08 00:35:00
Success: True
----------------------------------------
Name: CRS-2
Date (UTC): 2013-03-01 19:10:00
Success: True
---------------

In [12]:
import pandas as pd
import numpy as np
import requests
import datetime

# Static SpaceX launch dataset
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/' \
      'IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

response = requests.get(url)
data = pd.json_normalize(response.json())

# Select relevant columns
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Filter for single-core and single-payload launches
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]

# Flatten core and payload fields
data['cores'] = data['cores'].apply(lambda x: x[0])
data['payloads'] = data['payloads'].apply(lambda x: x[0])

# Convert date
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Filter by date
data = data[data['date'] <= datetime.date(2020, 11, 13)]

# Normalize nested core and payload data
core_data = pd.json_normalize(data['cores'])
payload_data = pd.json_normalize(data['payloads'])

# Combine all data into one DataFrame
df = pd.concat([data.reset_index(drop=True), core_data, payload_data], axis=1)

# Build the final DataFrame
launch_dict = {
    'FlightNumber': df['flight_number'],
    'Date': df['date'],
    'BoosterVersion': df['rocket'],  # This is just an ID; enriching it would require an API call
    'PayloadMass': df.get('mass_kg', np.nan),
    'Orbit': df.get('orbit', np.nan),
    'LaunchSite': df['launchpad'],  # Also just an ID
    'Outcome': df.get('landing_success', np.nan),
    'Flights': df.get('flight', np.nan),
    'GridFins': df.get('gridfins', np.nan),
    'Reused': df.get('reused', np.nan),
    'Legs': df.get('legs', np.nan),
    'LandingPad': df.get('landpad', np.nan),
    'Block': df.get('block', np.nan),
    'ReusedCount': df.get('reuse_count', np.nan),
    'Serial': df.get('core_serial', np.nan),
    'Longitude': df.get('longitude', np.nan),
    'Latitude': df.get('latitude', np.nan),
}

launch_df = pd.DataFrame(launch_dict)

# Display the top 5 rows
print(launch_df.head())


   FlightNumber        Date            BoosterVersion  PayloadMass  Orbit  \
0             1  2006-03-24  5e9d0d95eda69955f709d1eb          NaN    NaN   
1             2  2007-03-21  5e9d0d95eda69955f709d1eb          NaN    NaN   
2             4  2008-09-28  5e9d0d95eda69955f709d1eb          NaN    NaN   
3             5  2009-07-13  5e9d0d95eda69955f709d1eb          NaN    NaN   
4             6  2010-06-04  5e9d0d95eda69973a809d1ec          NaN    NaN   

                 LaunchSite Outcome  Flights  GridFins  Reused   Legs  \
0  5e9e4502f5090995de566f86    None        1     False   False  False   
1  5e9e4502f5090995de566f86    None        1     False   False  False   
2  5e9e4502f5090995de566f86    None        1     False   False  False   
3  5e9e4502f5090995de566f86    None        1     False   False  False   
4  5e9e4501f509094ba4566f84    None        1     False   False  False   

  LandingPad  Block  ReusedCount  Serial  Longitude  Latitude  
0       None    NaN          NaN  

In [13]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
# URL of the Wikipedia page containing SpaceX launch data
url = 'https://en.wikipedia.org/wiki/List_of_Falcon_9_and_Falcon_Heavy_launches'

# Send a request and parse the page content
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find all tables on the page
tables = soup.find_all('table', class_='wikitable')

# Display how many tables were found
print(f"Found {len(tables)} tables.")
launch_data = []

for table in tables:
    rows = table.find_all("tr")[1:]  # Skip header row
    for row in rows:
        cols = row.find_all("td")
        if len(cols) > 5:
            date = cols[0].text.strip().split("\n")[0]
            rocket = cols[1].text.strip()
            site = cols[2].text.strip()
            payload = cols[3].text.strip()
            orbit = cols[4].text.strip()
            outcome = cols[5].text.strip()
            booster_landing = cols[-1].text.strip()

            launch_data.append([date, rocket, site, payload, orbit, outcome, booster_landing])
columns = ["Date", "Rocket", "Launch Site", "Payload", "Orbit", "Launch Outcome", "Booster Landing"]
df = pd.DataFrame(launch_data, columns=columns)
df.head()
df.to_csv("spacex_launch_data.csv", index=False)


Found 6 tables.


In [15]:
df = df.replace(r'\[.*?\]', '', regex=True)
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Year'] = df['Date'].dt.year

df['Launch Success'] = df['Launch Outcome'].apply(lambda x: 1 if 'Success' in x or 'Operational' in x else 0)

df['Booster Landing Success'] = df['Booster Landing'].apply(
    lambda x: 1 if 'success' in x.lower() else (0 if 'failure' in x.lower() else None)
)
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing critical values
df = df.dropna(subset=['Date', 'Launch Site', 'Payload', 'Orbit'])
df = df[['Date', 'Year', 'Rocket', 'Launch Site', 'Payload', 'Orbit',
         'Launch Outcome', 'Launch Success', 'Booster Landing', 'Booster Landing Success']]
print(df.head())
print(df.info())


Date                       338
Rocket                       0
Launch Site                  0
Payload                      0
Orbit                        0
Launch Outcome               0
Booster Landing              0
Year                       338
Launch Success               0
Booster Landing Success    102
dtype: int64
          Date    Year Rocket         Launch Site  \
305 2025-06-21  2025.0  F9 B5  Vandenberg, SLC‑4E   
306 2025-06-01  2025.0  F9 B5            TBA (FL)   
307 2025-06-01  2025.0  F9 B5            TBA (FL)   
310 2025-07-01  2025.0  F9 B5            TBA (FL)   
311 2025-07-01  2025.0  F9 B5            TBA (FL)   

                                 Payload      Orbit  \
305  Transporter-14 (smallsat rideshare)        SSO   
306                       Nusantara Lima       GTO?   
307     Bandwagon-4 (smallsat rideshare)        LEO   
310                              Crew-11  LEO (ISS)   
311                 MTG-S1 — Sentinel-4A        GTO   

                   Launch O

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
