In [10]:
import pandas as pd

# Load the CSV files
spacex_df = pd.read_csv("data/raw_spacex_launches.csv")
weather_df = pd.read_csv("data/Global_Weather_Data.csv")

# Select only the required weather columns (first N rows, matching SpaceX data length)
weather_subset = weather_df[['temperature', 'humidity', 'wind_speed']].head(len(spacex_df))

# Concatenate the weather data directly with SpaceX launch data
merged_df = pd.concat([spacex_df.reset_index(drop=True), weather_subset.reset_index(drop=True)], axis=1)

# Save the enriched dataset
merged_df.to_csv("enriched_spacex_launches.csv", index=False)

print("✅ Weather data merged successfully and saved as 'enriched_spacex_launches.csv'")


✅ Weather data merged successfully and saved as 'enriched_spacex_launches.csv'


  weather_df = pd.read_csv("data/Global_Weather_Data.csv")


In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def preprocess_data():
    df = pd.read_csv("data/enriched_spacex_launches.csv")

    # Drop missing target labels
    df = df[df['success'].notna()]

    # Select relevant features
    df = df[[
        'name',
        'rocket',
        'date_utc',
        'success',
        'payloads',
        'launchpad',
        'temperature',
        'humidity',
        'wind_speed'
    ]]

    # Fill missing weather values
    df[['temperature', 'humidity', 'wind_speed']] = df[['temperature', 'humidity', 'wind_speed']].fillna(method='ffill')

    # Convert launch date to datetime features
    df['date_utc'] = pd.to_datetime(df['date_utc'])
    df['year'] = df['date_utc'].dt.year
    df['month'] = df['date_utc'].dt.month
    df['day'] = df['date_utc'].dt.day
    df['hour'] = df['date_utc'].dt.hour

    # Encode categorical variables
    le_rocket = LabelEncoder()
    le_launchpad = LabelEncoder()
    le_payloads = LabelEncoder()

    df['rocket_encoded'] = le_rocket.fit_transform(df['rocket'].astype(str))
    df['launchpad_encoded'] = le_launchpad.fit_transform(df['launchpad'].astype(str))
    df['payload_mass'] = le_payloads.fit_transform(df['payloads'].astype(str))

    # Final feature set
    df_final = df[[
        'rocket_encoded',
        'launchpad_encoded',
        'payload_mass',
        'temperature',
        'humidity',
        'wind_speed',
        'year',
        'month',
        'day',
        'hour',
        'success'
    ]]

    df_final.to_csv("data/processed_spacex_data.csv", index=False)
    print("Processed data saved.")

if __name__ == "__main__":
    preprocess_data()


Processed data saved.


  df[['temperature', 'humidity', 'wind_speed']] = df[['temperature', 'humidity', 'wind_speed']].fillna(method='ffill')
