# Download a list of all rocket launches from KSC or Cape Canaveral

In [None]:
import requests
import ast
import json
import pandas as pd
from datetime import datetime, timedelta
alldfs = []
for launchsite in ['cape', 'kennedy']:
    print(f'Processing {launchsite} launches')

    # --- Configuration ---
    # Define your date range (in ISO format)
    #start_date_str = "2020-01-01T00:00:00Z"  # earliest launch to download
    #end_date_str   = "2023-01-01T00:00:00Z"    # initial (most recent) end date
    startdate='2016-01-01'
    enddate=datetime.today().strftime('%Y-%m-%d')
    current_enddate = enddate

    # Convert date strings to datetime objects (assuming UTC)
    #start_date = datetime.fromisoformat(start_date_str.replace("Z", "+00:00"))
    #current_end_date = datetime.fromisoformat(end_date_str.replace("Z", "+00:00"))



    # Container for all downloaded launches
    all_launches = []

    # --- Loop to Download Batches of 100 Launches ---
    while True:

        # Define parameters for the API request.
        '''
        params = {
            "end_date": current_end_date.isoformat(),  # get launches before this date
            "limit": 100
        }
        
        print(f"Requesting launches before {current_end_date.isoformat()} ...")
        #response = requests.get(url, params=params)
        '''

        # API endpoint URL (use lldev rather than ll if testing)
        url = f"https://ll.thespacedevs.com/2.3.0/launches/previous/?search={launchsite}&window_start__gte={startdate}&window_start__lte={current_enddate}&limit=100" 
        response = requests.get(url)
        
        # Check for a successful API call
        if response.status_code != 200:
            print(f"Error: Received status code {response.status_code}")
            print("Response content:", response.text)
            break

        # Parse the JSON response. Adjust the key "launches" to match your API's response structure.
        #data = response.json()
        #launches = data.get("launches", [])
        data = response.json().get('results', [])
        
        # If no launches are returned, then we've reached the end of available data.
        if not data:
            print("No more launches returned by the API.")
            break
        
        # Append the current batch to the list
        all_launches.extend(data)
        df = pd.DataFrame(data).sort_values("window_start")
        df["window_start"] = pd.to_datetime(df["window_start"])
        oldest_date = df['window_start'].min()
        oldest_ymdstr = oldest_date.strftime('%Y-%m-%d')
        print(f"Fetched {len(data)} launches; oldest launch in this batch is at {oldest_ymdstr}")

        # Check if the oldest launch is at or before the start_date.
        if oldest_ymdstr <= startdate:
            print("Reached the start date. Stopping the loop.")
            break

        # Update current_end_date to be one second before the oldest launch date,
        # so that the next query doesn't include it again.
        current_enddate = (oldest_date - timedelta(seconds=1)).strftime('%Y-%m-%d')

    # --- Create DataFrame and Final Filtering ---
    df_launches = pd.DataFrame(all_launches)

    # Convert the "date_utc" column to datetime objects
    df_launches["window_start"] = pd.to_datetime(df_launches["window_start"])
    df_launches["window_end"] = pd.to_datetime(df_launches["window_end"])

    # Filter out any launches that are older than the start_date (if necessary)
    df_launches = df_launches[df_launches["window_start"] >= startdate]

    # Sort the DataFrame in increasing chronological order
    df_launches = df_launches.sort_values("window_start")

    df_launches['SLC']=None
    df_launches['success']=True
    for i, row in df_launches.iterrows():
        print(row['pad'])
        #pad_json = row['pad'].replace("'s ",'s ').replace("'", '"').replace('True','true').replace('False','false').replace('None','null')
        #pad_dict = json.loads(pad_json)
        #pad_name = pad_dict["name"]
        #df_launches.iat[i, df_launches.columns.get_loc('SLC')] = pad_name
        df_launches.iat[i, df_launches.columns.get_loc('SLC')] = row['pad']['name']
        if 'Failure' in row['status']:
            df_launches.iat[i, df_launches.columns.get_loc('status')] = 'Failure'

    print(f"Total launches downloaded: {len(df_launches)}")

    alldfs.append(df_launches)

# Concatenate the DataFrames vertically (stack rows) and reset the index
df_combined = pd.concat(alldfs, ignore_index=True)
df_combined = df_combined.sort_values("window_start")
df_filtered = df_combined[['name', 'slug', 'launch_designator', 'SLC', 'success', 'net', 'window_start', 'window_end']]

#df_filtered["company"] = df_combined["launch_service_provider"].apply(lambda x: ast.literal_eval(x)["name"])

# Optionally, save the concatenated DataFrame to a new CSV file
df_filtered.to_csv('all_florida_launches.csv', index=False)



Processing cape launches
Fetched 71 launches; oldest launch in this batch is at 2024-02-25
No more launches returned by the API.
{'id': 80, 'url': 'https://lldev.thespacedevs.com/2.3.0/pads/80/', 'active': True, 'agencies': [{'response_mode': 'normal', 'id': 121, 'url': 'https://lldev.thespacedevs.com/2.3.0/agencies/121/', 'name': 'SpaceX', 'abbrev': 'SpX', 'type': {'id': 3, 'name': 'Commercial'}, 'featured': True, 'country': [{'id': 2, 'name': 'United States of America', 'alpha_2_code': 'US', 'alpha_3_code': 'USA', 'nationality_name': 'American', 'nationality_name_composed': 'Americano'}], 'description': 'Space Exploration Technologies Corp., known as SpaceX, is an American aerospace manufacturer and space transport services company headquartered in Hawthorne, California. It was founded in 2002 by entrepreneur Elon Musk with the goal of reducing space transportation costs and enabling the colonization of Mars. SpaceX operates from many pads, on the East Coast of the US they operate fr

# Download SpaceX data

In [None]:
import requests
import pandas as pd

# --- Step 1: Get Landing Pad Information ---
# The landing pads endpoint is still on v4.
landpads_url = "https://api.spacexdata.com/v4/landpads"
lp_response = requests.get(landpads_url)

if lp_response.status_code != 200:
    print(f"Error: Received status code {lp_response.status_code} from {landpads_url}")
    print("Response content:", lp_response.text)
    lp_response.raise_for_status()

landpads = lp_response.json()
print(landpads)


# Identify landing pads at Cape Canaveral (filtering based on pad name)
target_pad_ids = []
target_pad_names = {}
for pad in landpads:
    name = pad.get("name", "")
    region = pad.get("region", "")
    # Adjust keywords if necessary
    if "Florida" in region:
        pad_id = pad.get("id")
        target_pad_ids.append(pad_id)
        target_pad_names[pad_id] = name

print("Target landing pad IDs:", target_pad_ids)

# --- Step 2: Get Launch Data from API v5 ---
launches_url = "https://api.spacexdata.com/v5/launches"
launches_response = requests.get(launches_url)
if launches_response.status_code != 200:
    print(f"Error: Received status code {launches_response.status_code} from {launches_url}")
    print("Response content:", launches_response.text)
    launches_response.raise_for_status()

launch_data = launches_response.json()

# --- Step 3: Filter for Booster Landings at Cape Canaveral ---
# In each launch record, the "cores" field is a list of booster attempts.
rows = []
for launch in launch_data:
    launch_name = launch.get("name")
    launch_time = launch.get("date_utc")  
    static_fire_time = launch.get("static_fire_date_utc")


    for core in launch.get("cores", []):
        landing_attempt = core.get("landing_attempt")
        landing_success = core.get("landing_success")
        landing_pad = core.get("landpad")         

        rows.append({
            "Launch Name": launch_name,
            "launch_time": pd.to_datetime(launch_time),
            "static_fire_time": pd.to_datetime(static_fire_time), 
            "landing_attempt": landing_attempt,
            "landing_success": landing_success,
            "LandingPad": target_pad_names.get(landing_pad, ""),
            "Where": core.get("landing_type")
        })

# --- Step 4: Export Data to CSV ---
df = pd.DataFrame(rows)
csv_filename = "spacex_launches_landings.csv"
df.to_csv(csv_filename, index=False)
print(f"CSV file saved as {csv_filename}")

# Optionally, display the first few rows
print(df.head())

# Merge CSV files

In [22]:
import pandas as pd
import os
print(os.getcwd())

# --- Step 1: Read the CSV files ---
df_ksc = pd.read_csv("ksc_launch_events_filtered.csv", parse_dates=["window_start", "window_end"])
#df_ksc = pd.read_csv("all_florida_launches.csv", parse_dates=["window_start", "window_end", "net"])
df_spacex = pd.read_csv("spacex_launches_landings.csv", parse_dates=["launch_time"])

# --- Step 2: Prepare for Cross Join ---
# Create a temporary key to do a cross join.
df_ksc["key"] = 1
df_spacex["key"] = 1

# Cross join the DataFrames so each SpaceX launch is paired with every KSC launch.
df_cross = pd.merge(df_spacex, df_ksc, on="key").drop("key", axis=1)

# --- Step 3: Filter the Merged DataFrame ---
# Condition 1: launch_time between window_start and window_end.
cond_time = (df_cross["launch_time"] >= df_cross["window_start"]) & (df_cross["launch_time"] <= df_cross["window_end"])

# Condition 2: 'Launch Name' is contained within the 'name' column.
# We use a row-wise lambda because this is a substring operation.
cond_name = df_cross.apply(lambda row: row["Launch Name"] in row["name"], axis=1)

# Filter the DataFrame where either condition is true.
df_merged = df_cross[cond_time | cond_name]


# --- Step 4: Merge Results with an Outer (Left) Join ---
# We want to keep all KSC rows; join the matching SpaceX info onto the KSC data.
# Here, we merge on 'launch_designator'. If a KSC row has multiple matching SpaceX rows, it will appear multiple times.
df_merged = df_ksc.merge(df_merged[['name', 'Launch Name', 'launch_time', 'static_fire_time',
       'landing_attempt', 'landing_success', 'LandingPad', 'Where']], on="name", how="left", suffixes=("", ""))
df_merged.drop("key", inplace=True, axis=1)

df_merged = df_merged.rename(columns={"name":"mission", "success":"launch_success", "Launch Name": "spacex_name", "launch_time": "spacex_launchtime", "LandingPad": "spacex_landingpad", "Where": "spacex_landingtype"})
df_merged["SLC"] = df_merged["SLC"].str.replace("Space Launch Complex", "", regex=False).str.strip()
df_merged["SLC"] = df_merged["SLC"].str.replace("Launch Complex", "", regex=False).str.strip()

bool_columns = ["launch_success", "landing_attempt", "landing_success"]  # adjust with the actual boolean columns
df_merged[bool_columns] = df_merged[bool_columns].fillna(False)
# Get columns with dtype 'object' (usually strings)
str_cols = df_merged.select_dtypes(include=['object']).columns

# Replace NaN in these columns with an empty string
df_merged[str_cols] = df_merged[str_cols].fillna("")

# --- Step 4: Save the Merged DataFrame ---
df_merged.to_csv("merged_launches.csv", index=False)
print("Merged CSV saved as merged_launches.csv")

/Volumes/ExtremeSSD1TB/Developer/GitHub/KSCRocketSeismoHydrology/Python/new_workflow
Merged CSV saved as merged_launches.csv


  df_merged[bool_columns] = df_merged[bool_columns].fillna(False)
